diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 index fe883a6b25..d987ca5a6b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -1,5 +1,5 @@ diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f -index 880769442..5a3da931f 100644 +index 0a3dfa449..8f4aaecd6 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -484,23 +484,140 @@ C @@ -77,9 +77,9 @@ index 880769442..5a3da931f 100644 + STOP + ENDIF + IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) -+ CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, -+ & HEL_RAND, COL_RAND, 0, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering ++ CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering ++ & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, ++ & SELECTED_HEL2, SELECTED_COL2 ) + FIRST = .FALSE. +c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) + IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -96,9 +96,9 @@ index 880769442..5a3da931f 100644 + ENDIF + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + IF ( .NOT. MULTI_CHANNEL ) THEN -+ CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, -+ & HEL_RAND, COL_RAND, 0, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled ++ CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled ++ & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, ++ & SELECTED_HEL2, SELECTED_COL2 ) + ELSE + IF( SDE_STRAT.NE.1 ) THEN + WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy @@ -284,7 +284,7 @@ index 71fbf2b25..0f1d199fc 100644 open(unit=lun,file=tempname,status='old',ERR=20) fopened=.true. diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f -index 3ac962688..daea73a6d 100644 +index 817af778b..0c2ce6ec4 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -72,7 +72,10 @@ C diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common index e19a7d2054..a6cda7dcfb 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common @@ -38,7 +38,7 @@ index 617f10b93..00c73099a 100644 +cleanall: cleanSource # THIS IS THE ONE + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile -index 348c283be..65369d610 100644 +index 348c283be..d572486c2 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -1,6 +1,28 @@ @@ -297,10 +297,10 @@ index 348c283be..65369d610 100644 +distclean: cleanall # Clean all fortran and cudacpp builds as well as the googletest installation + $(MAKE) -f $(CUDACPP_MAKEFILE) distclean diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py -index ebbc1ac1d..a88d60b28 100755 +index fb7efa87c..5fd170d18 100755 --- b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py +++ a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py -@@ -385,8 +385,20 @@ class gensym(object): +@@ -391,8 +391,20 @@ class gensym(object): done = True if not done: raise Exception('Parsing error in gensym: %s' % stdout) @@ -324,7 +324,7 @@ index ebbc1ac1d..a88d60b28 100755 self.submit_to_cluster(job_list) job_list = {} diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py -index 389b93ab8..d72270289 100755 +index 8c509e83f..cb6bf4ca5 100755 --- b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py +++ a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py @@ -3614,8 +3614,20 @@ Beware that this can be dangerous for local multicore runs.""") diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f index b83da36661..e0801181a7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/testmisc.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/testmisc.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index 9d02c4be5b..38978865ff 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -554,9 +554,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -573,9 +573,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f index f8acb795d8..772339d0ac 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f index f8acb795d8..772339d0ac 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index c4acdbe58d..f9e2335de4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index f8acb795d8..772339d0ac 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f index f8acb795d8..772339d0ac 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index c4acdbe58d..f9e2335de4 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index f8acb795d8..772339d0ac 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index d5066ef315..29cee23b2e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index e878a2d212..c9ca1538d3 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index ad23759d13..208149fcf6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index a0e7e538b7..7c94a0776f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f index 0a2b1165de..90c412d4a5 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f index 0a2b1165de..90c412d4a5 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 1e300c123d..2c11f53b89 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -586,9 +586,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -605,9 +605,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 402b25367b..d829a73049 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -586,9 +586,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -605,9 +605,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/fcheck_sa.f b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/fcheck_sa.f index adecad4d4a..3c59869dc5 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/fcheck_sa.f +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index 11c8fc430d..7bd8ec493e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f index f8acb795d8..772339d0ac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index 7039f6678e..c4e476d6c0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -597,9 +597,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -616,9 +616,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f index f8acb795d8..772339d0ac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index e878a2d212..c9ca1538d3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 1e300c123d..2c11f53b89 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -586,9 +586,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -605,9 +605,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 402b25367b..d829a73049 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -586,9 +586,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -605,9 +605,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index a669e02e8d..0eb22610bf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -597,9 +597,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -616,9 +616,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f index 90a5621e77..e6b0fd4d2a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index e735357659..6a17e242b2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -542,9 +542,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -561,9 +561,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index c379c67380..a952958df8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -575,9 +575,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -594,9 +594,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index fbb6dc13d2..a41c6f876a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -586,9 +586,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -605,9 +605,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 712c8840ed..700cdbece2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -586,9 +586,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -605,9 +605,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index 3401b8a79b..bc898ac10e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -613,9 +613,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -632,9 +632,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index c0d78a041e..3db88ba2c3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -685,9 +685,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -704,9 +704,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 4e54a1064f..8988ba6c1d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -597,9 +597,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -616,9 +616,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index aba833493c..37b6741d5b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -685,9 +685,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -704,9 +704,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index 600420eafc..4f5f2bb65a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -597,9 +597,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -616,9 +616,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index 15316e0702..598e4f55b8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -597,9 +597,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -616,9 +616,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index a686672159..dd3cd5c8a4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -613,9 +613,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -632,9 +632,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index 039bbe194b..ef5dde5b56 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -597,9 +597,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled for helicity filtering + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -616,9 +616,9 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 IF ( .NOT. MULTI_CHANNEL ) THEN - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, - & HEL_RAND, COL_RAND, 0, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 0: multi channel disabled + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & SELECTED_HEL2, SELECTED_COL2 ) ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f index 20bc3318cb..a4739bea17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f @@ -19,8 +19,6 @@ PROGRAM FCHECK_SA DOUBLE PRECISION GS(NEVTMAX) DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used - INTEGER*4 CHANID - PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 DOUBLE PRECISION MES(NEVTMAX) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used @@ -64,8 +62,8 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO - CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc index 22ce3f5115..27ce14277f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc @@ -99,14 +99,39 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); #endif } + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++, without multi-channel mode. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + } + /** * Retrieve the number of good helicities for helicity filtering in the Bridge. * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc index e2115de6ec..0c319d8e7c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc @@ -36,7 +36,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -56,6 +56,31 @@ C END SUBROUTINE FBRIDGESEQUENCE END INTERFACE +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL + END INTERFACE + C C Retrieve the number of good helicities for helicity filtering in the Bridge. C - PBRIDGE: the memory address of the C++ Bridge diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/testmisc.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/testmisc.cc index ba9e59a8a3..ac0b049e60 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/testmisc.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/testmisc.cc @@ -235,4 +235,64 @@ TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testmisc ) } //-------------------------------------------------------------------------- + + // Boolean vector (mask) times FP vector + /* + // From https://github.com/madgraph5/madgraph4gpu/issues/765#issuecomment-1853672838 + channelids_sv = CHANNEL_ACCESS::kernelAccess( pchannelIds ); // the 4 channels in the SIMD vector + bool_sv mask_sv = ( channelids_sv == 1 ); + numerators_sv += mask_sv * cxabs2( amp_sv[0] ); + if( pchannelIds != nullptr ) denominators_sv += cxabs2( amp_sv[0] ); + */ + { + typedef bool_sv test_int_sv; // defined as scalar_or_vector of long int (FPTYPE=double) or int (FPTYPE=float) + test_int_sv channelids0_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + test_int_sv channelids1_sv{}; // mimic CHANNEL_ACCESS::kernelAccess( pchannelIds ) + fptype_sv absamp0_sv{}; // mimic cxabs2( amp_sv[0] ) + fptype_sv absamp1_sv{}; // mimic cxabs2( amp_sv[0] ) +#ifdef MGONGPU_CPPSIMD + for( int i = 0; i < neppV; i++ ) + { + channelids0_sv[i] = i; // 0123 + channelids1_sv[i] = i; // 1234 + absamp0_sv[i] = 10. + i; // 10. 11. 12. 13. + absamp1_sv[i] = 11. + i; // 11. 12. 13. 14. + } +#else + channelids0_sv = 0; + channelids1_sv = 1; + absamp0_sv = 10.; + absamp1_sv = 11.; +#endif + bool_sv mask0_sv = ( channelids0_sv % 2 == 0 ); // even channels 0123 -> TFTF (1010) + bool_sv mask1_sv = ( channelids1_sv % 2 == 0 ); // even channels 1234 -> FTFT (0101) + constexpr fptype_sv fpZERO_sv{}; // 0000 + //fptype_sv numerators0_sv = mask0_sv * absamp0_sv; // invalid operands to binary * ('__vector(4) long int' and '__vector(4) double') + fptype_sv numerators0_sv = fpternary( mask0_sv, absamp0_sv, fpZERO_sv ); // equivalent to "mask0_sv * absamp0_sv" + fptype_sv numerators1_sv = fpternary( mask1_sv, absamp1_sv, fpZERO_sv ); // equivalent to "mask1_sv * absamp1_sv" +#ifdef MGONGPU_CPPSIMD + //std::cout << "numerators0_sv: " << numerators0_sv << std::endl; + //std::cout << "numerators1_sv: " << numerators1_sv << std::endl; + for( int i = 0; i < neppV; i++ ) + { + // Values of numerators0_sv: 10.*1 11.*0 12.*1 13.*0 + if( channelids0_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators0_sv[i] == ( 10. + i ) ); + else // odd channels + EXPECT_TRUE( numerators0_sv[i] == 0. ); + // Values of numerators1_sv: 11.*0 12.*1 13.*0 14.*1 + if( channelids1_sv[i] % 2 == 0 ) // even channels + EXPECT_TRUE( numerators1_sv[i] == ( 11. + i ) ); + else // odd channels + EXPECT_TRUE( numerators1_sv[i] == 0. ); + } +#else + // Values of numerators0_sv: 10.*1 + EXPECT_TRUE( numerators0_sv == 10. ); + // Values of numerators1_sv: 11.*0 + EXPECT_TRUE( numerators1_sv == 0. ); +#endif + } + + //-------------------------------------------------------------------------- }