diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 7f2a4d315867c..6bade1363b7ce 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -197,7 +197,7 @@ if(NOT GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc") set_target_properties(${targetName} PROPERTIES LINKER_LANGUAGE CXX) endif() -if(NOT ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") +if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR CONFIG_O2_EXTENSIONS) add_library(GPUTrackingCUDAExternalProvider OBJECT GPUReconstructionCUDAExternalProvider.cu) add_library(O2::GPUTrackingCUDAExternalProvider ALIAS GPUTrackingCUDAExternalProvider) set_property(TARGET GPUTrackingCUDAExternalProvider PROPERTY CUDA_SEPARABLE_COMPILATION ON) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index d8383e870f7ac..b291bf735aee3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -23,6 +23,9 @@ #ifdef GPUCA_HAVE_O2HEADERS #include "Framework/SHA1.h" #endif +#include +#include +#include using namespace GPUCA_NAMESPACE::gpu; @@ -62,11 +65,27 @@ int GPUReconstructionCUDA::genRTC(std::string& filename, unsigned int& nCompile) nCompile = mProcessingSettings.rtc.compilePerKernel ? kernels.size() : 1; bool cacheLoaded = false; + int fd = 0; if (mProcessingSettings.rtc.cacheOutput) { + if (mProcessingSettings.RTCcacheFolder != ".") { + std::filesystem::create_directories(mProcessingSettings.RTCcacheFolder); + } #ifndef GPUCA_HAVE_O2HEADERS throw std::runtime_error("Cannot use RTC cache without O2 headers"); #else - FILE* fp = fopen("rtc.cuda.cache", "rb"); + if (mProcessingSettings.rtc.cacheMutex) { + mode_t mask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + fd = open((mProcessingSettings.RTCcacheFolder + "/cache.lock").c_str(), O_RDWR | O_CREAT | O_CLOEXEC, mask); + if (fd == -1) { + throw std::runtime_error("Error opening rtc cache mutex lock file"); + } + fchmod(fd, mask); + if (lockf(fd, F_LOCK, 0)) { + throw std::runtime_error("Error locking rtc cache mutex file"); + } + } + + FILE* fp = fopen((mProcessingSettings.RTCcacheFolder + "/rtc.cuda.cache").c_str(), "rb"); char sharead[20]; if (fp) { size_t len; @@ -100,6 +119,7 @@ int GPUReconstructionCUDA::genRTC(std::string& filename, unsigned int& nCompile) break; } GPUSettingsProcessingRTC cachedSettings; + static_assert(std::is_trivially_copyable_v == true, "GPUSettingsProcessingRTC must be POD"); if (fread(&cachedSettings, sizeof(cachedSettings), 1, fp) != 1) { throw std::runtime_error("Cache file corrupt"); } @@ -183,7 +203,7 @@ int GPUReconstructionCUDA::genRTC(std::string& filename, unsigned int& nCompile) } #ifdef GPUCA_HAVE_O2HEADERS if (mProcessingSettings.rtc.cacheOutput) { - FILE* fp = fopen("rtc.cuda.cache", "w+b"); + FILE* fp = fopen((mProcessingSettings.RTCcacheFolder + "/rtc.cuda.cache").c_str(), "w+b"); if (fp == nullptr) { throw std::runtime_error("Cannot open cache file for writing"); } @@ -221,6 +241,12 @@ int GPUReconstructionCUDA::genRTC(std::string& filename, unsigned int& nCompile) } #endif } + if (mProcessingSettings.rtc.cacheOutput && mProcessingSettings.rtc.cacheMutex) { + if (lockf(fd, F_ULOCK, 0)) { + throw std::runtime_error("Error unlocking RTC cache mutex file"); + } + close(fd); + } #endif return 0; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index c4e818c31dc0c..151eb5ae3930a 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -260,7 +260,7 @@ if(NOT GPUCA_HIP_COMPILE_MODE STREQUAL "rdc") target_link_options(${targetName} PRIVATE $<$:-fno-gpu-rdc>) endif() -if(NOT ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") +if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR CONFIG_O2_EXTENSIONS) add_library(GPUTrackingHIPExternalProvider OBJECT ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPExternalProvider.hip) add_library(O2::GPUTrackingHIPExternalProvider ALIAS GPUTrackingHIPExternalProvider) target_compile_options(GPUTrackingHIPExternalProvider PRIVATE $<$:-fgpu-rdc>) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 982aaaa5ed69d..3175b088d8440 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -191,6 +191,7 @@ AddSubConfig(GPUSettingsRecTRD, trd) AddHelp("help", 'h') EndConfig() +#ifndef __OPENCL__ // Settings steering the processing once the device was selected, only available on the host BeginSubConfig(GPUSettingsProcessingRTC, rtc, configStandalone.proc, "RTC", 0, "Processing settings", proc_rtc) AddOption(cacheOutput, bool, false, "", 0, "Cache RTC compilation results") @@ -198,6 +199,7 @@ AddOption(optConstexpr, bool, true, "", 0, "Replace constant variables by static AddOption(compilePerKernel, bool, true, "", 0, "Run one RTC compilation per kernel") AddOption(enable, bool, false, "", 0, "Use RTC to optimize GPU code") AddOption(runTest, int, 0, "", 0, "Do not run the actual benchmark, but just test RTC compilation (1 full test, 2 test only compilation)") +AddOption(cacheMutex, bool, true, "", 0, "Use a file lock to serialize access to the cache folder") AddHelp("help", 'h') EndConfig() @@ -276,11 +278,13 @@ AddOption(tpcSingleSector, int, -1, "", 0, "Restrict TPC processing to a single AddOption(tpcDownscaledEdx, unsigned char, 0, "", 0, "If != 0, downscale dEdx processing (if enabled) to x %") AddOption(tpcMaxAttachedClustersPerSectorRow, unsigned int, 51000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow") AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC decoding") +AddOption(RTCcacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddVariable(eventDisplay, GPUCA_NAMESPACE::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingParam, param) AddHelp("help", 'h') EndConfig() +#endif // __OPENCL__ #ifndef GPUCA_GPUCODE_DEVICE // Light settings concerning the event display (can be changed without rebuilding vertices) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 4e3cff32c5bc9..92199f06a5f68 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -297,8 +297,8 @@ int SetupReconstruction() GPUSettingsGRP grp = rec->GetGRPSettings(); GPUSettingsRec recSet; GPUSettingsProcessing procSet; - memcpy((void*)&recSet, (void*)&configStandalone.rec, sizeof(GPUSettingsRec)); - memcpy((void*)&procSet, (void*)&configStandalone.proc, sizeof(GPUSettingsProcessing)); + recSet = configStandalone.rec; + procSet = configStandalone.proc; GPURecoStepConfiguration steps; if (configStandalone.eventGenerator) { diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 2da1f13de517e..30fe2850ff3eb 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -83,6 +83,7 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) endif() list(APPEND O2_GPU_KERNEL_TEMPLATE_FILES "${TMP_KERNEL_CLASS_FILE}.cxx") list(REMOVE_DUPLICATES O2_GPU_KERNEL_TEMPLATE_FILES) + list(FILTER O2_GPU_KERNEL_TEMPLATE_FILES EXCLUDE REGEX "^-$") list(TRANSFORM O2_GPU_KERNEL_TEMPLATE_FILES APPEND "\"") list(TRANSFORM O2_GPU_KERNEL_TEMPLATE_FILES PREPEND "#include \"") list(JOIN O2_GPU_KERNEL_TEMPLATE_FILES "\n" O2_GPU_KERNEL_TEMPLATE_FILES) diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index 36ce6da419d4b..af65de4ce5c48 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -321,11 +321,11 @@ void GPURecoWorkflowSpec::init(InitContext& ic) mode_t mask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; fd = open("/tmp/o2_gpu_memlock_mutex.lock", O_RDWR | O_CREAT | O_CLOEXEC, mask); if (fd == -1) { - throw std::runtime_error("Error opening lock file"); + throw std::runtime_error("Error opening memlock mutex lock file"); } fchmod(fd, mask); if (lockf(fd, F_LOCK, 0)) { - throw std::runtime_error("Error locking file"); + throw std::runtime_error("Error locking memlock mutex file"); } } std::chrono::time_point start, end; @@ -342,7 +342,7 @@ void GPURecoWorkflowSpec::init(InitContext& ic) } if (mConfParam->mutexMemReg) { if (lockf(fd, F_ULOCK, 0)) { - throw std::runtime_error("Error unlocking file"); + throw std::runtime_error("Error unlocking memlock mutex file"); } close(fd); } diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 302f5ab93209f..bd244fcaaecbe 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -111,7 +111,7 @@ if(ENABLE_CUDA) endif() endif() if(CMAKE_CUDA_COMPILER) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" --expt-relaxed-constexpr --extended-lambda --allow-unsupported-compiler -Xptxas -v -Wno-attributes") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" --expt-relaxed-constexpr --extended-lambda --allow-unsupported-compiler -Xptxas -v -Xcompiler -Wno-attributes") if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.3") string(APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=20257") # TODO: Cleanup endif() diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index f67c5c01e81f1..5b5d84c413abe 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -320,6 +320,7 @@ if has_detector_calib PHS && workflow_has_parameter CALIB; then fi [[ ${O2_GPU_DOUBLE_PIPELINE:-$EPNSYNCMODE} == 1 ]] && GPU_CONFIG+=" --enableDoublePipeline" +[[ ${O2_GPU_RTC:-0} == 1 ]] && GPU_CONFIG_KEY+="GPU_proc_rtc.enable=1;GPU_proc_rtc.cacheOutput=1;GPU_proc.RTCcacheFolder=/tmp/o2_gpu_rtc_cache;" ( workflow_has_parameter AOD || [[ -z "$DISABLE_ROOT_OUTPUT" ]] || needs_root_output o2-emcal-cell-writer-workflow ) && has_detector EMC && RAW_EMC_SUBSPEC=" --subspecification 1 " has_detector_reco MID && has_detector_matching MCHMID && MFTMCHConf="FwdMatching.useMIDMatch=true;" || MFTMCHConf="FwdMatching.useMIDMatch=false;" diff --git a/prodtests/full-system-test/start_tmux.sh b/prodtests/full-system-test/start_tmux.sh index e56514196afe3..22b0ce2ddcd2a 100755 --- a/prodtests/full-system-test/start_tmux.sh +++ b/prodtests/full-system-test/start_tmux.sh @@ -97,19 +97,16 @@ fi FST_SLEEP0=0 FST_SLEEP1=0 -FST_SLEEP2=45 +FST_SLEEP2=30 if [[ -z $SHM_MANAGER_SHMID ]]; then rm -f /dev/shm/*fmq* if [[ `ls /dev/shm/*fmq* 2> /dev/null | wc -l` != "0" ]]; then echo "FMQ SHM files left which cannot be deleted, please clean up!" exit 1 fi -else - FST_SLEEP0=0 - FST_SLEEP1=0 - FST_SLEEP2=30 fi [[ ! -z $FST_TMUX_DD_WAIT ]] && FST_SLEEP2=$FST_TMUX_DD_WAIT +[[ ${O2_GPU_RTC:-0} == 1 ]] && FST_SLEEP2=60 if workflow_has_parameter CALIB_PROXIES; then CALIB_COMMAND="$GEN_TOPO_MYDIR/aggregator-workflow.sh"