diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h index 22ee80992f432..8a731a61c8a2d 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h @@ -49,9 +49,9 @@ class CalibdEdxCorrection } CalibdEdxCorrection(std::string_view fileName) { loadFromFile(fileName); } #else - CalibdEdxCorrection() CON_DEFAULT; + CalibdEdxCorrection() = default; #endif - ~CalibdEdxCorrection() CON_DEFAULT; + ~CalibdEdxCorrection() = default; GPUd() float getCorrection(const StackID& stack, ChargeType charge, float tgl = 0, float snp = 0) const { diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h index a996f59f51f9e..f3070d456afb1 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h @@ -76,7 +76,7 @@ struct ClusterNative { GPUd() static float unpackPad(uint16_t pad) { return float(pad) * (1.f / scalePadPacked); } GPUd() static float unpackTime(uint32_t time) { return float(time) * (1.f / scaleTimePacked); } - GPUdDefault() ClusterNative() CON_DEFAULT; + GPUdDefault() ClusterNative() = default; GPUd() ClusterNative(uint32_t time, uint8_t flags, uint16_t pad, uint8_t sigmaTime, uint8_t sigmaPad, uint16_t qmax, uint16_t qtot) : padPacked(pad), sigmaTimePacked(sigmaTime), sigmaPadPacked(sigmaPad), qMax(qmax), qTot(qtot) { setTimePackedFlags(time, flags); diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CompressedClusters.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CompressedClusters.h index 46da2da2a702e..18ad5c6819344 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CompressedClusters.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CompressedClusters.h @@ -77,8 +77,8 @@ struct CompressedClustersOffsets : public CompressedClustersPtrs_x class TimeStamp { public: - GPUhdDefault() TimeStamp() CON_DEFAULT; - GPUhdDefault() ~TimeStamp() CON_DEFAULT; + GPUhdDefault() TimeStamp() = default; + GPUhdDefault() ~TimeStamp() = default; GPUdi() TimeStamp(T time) { mTimeStamp = time; } GPUhdi() T getTimeStamp() const { return mTimeStamp; } GPUdi() void setTimeStamp(T t) { mTimeStamp = t; } diff --git a/Detectors/Base/include/DetectorsBase/MatCell.h b/Detectors/Base/include/DetectorsBase/MatCell.h index 88143ddf44b03..40c5fd3db1f69 100644 --- a/Detectors/Base/include/DetectorsBase/MatCell.h +++ b/Detectors/Base/include/DetectorsBase/MatCell.h @@ -31,7 +31,7 @@ struct MatCell { float meanX2X0; ///< fraction of radiaton lenght GPUd() MatCell() : meanRho(0.f), meanX2X0(0.f) {} - GPUdDefault() MatCell(const MatCell& src) CON_DEFAULT; + GPUdDefault() MatCell(const MatCell& src) = default; GPUd() void set(const MatCell& c) { @@ -55,7 +55,7 @@ struct MatBudget : MatCell { float length; ///< length in material GPUd() MatBudget() : length(0.f) {} - GPUdDefault() MatBudget(const MatBudget& src) CON_DEFAULT; + GPUdDefault() MatBudget(const MatBudget& src) = default; GPUd() void scale(float scale) { diff --git a/Detectors/Base/include/DetectorsBase/MatLayerCyl.h b/Detectors/Base/include/DetectorsBase/MatLayerCyl.h index 869234e03f6c1..ca015fa457a1a 100644 --- a/Detectors/Base/include/DetectorsBase/MatLayerCyl.h +++ b/Detectors/Base/include/DetectorsBase/MatLayerCyl.h @@ -56,8 +56,8 @@ class MatLayerCyl : public o2::gpu::FlatObject #ifndef GPUCA_GPUCODE MatLayerCyl(); - MatLayerCyl(const MatLayerCyl& src) CON_DELETE; - ~MatLayerCyl() CON_DEFAULT; + MatLayerCyl(const MatLayerCyl& src) = delete; + ~MatLayerCyl() = default; #endif #ifndef GPUCA_ALIGPUCODE // this part is unvisible on GPU version diff --git a/Detectors/Base/include/DetectorsBase/MatLayerCylSet.h b/Detectors/Base/include/DetectorsBase/MatLayerCylSet.h index 83fed8caf42eb..c74ce365d378f 100644 --- a/Detectors/Base/include/DetectorsBase/MatLayerCylSet.h +++ b/Detectors/Base/include/DetectorsBase/MatLayerCylSet.h @@ -52,9 +52,9 @@ class MatLayerCylSet : public o2::gpu::FlatObject public: #ifndef GPUCA_GPUCODE - MatLayerCylSet() CON_DEFAULT; - ~MatLayerCylSet() CON_DEFAULT; - MatLayerCylSet(const MatLayerCylSet& src) CON_DELETE; + MatLayerCylSet() = default; + ~MatLayerCylSet() = default; + MatLayerCylSet(const MatLayerCylSet& src) = delete; #endif GPUd() const MatLayerCylSetLayout* get() const { return reinterpret_cast(mFlatBufferPtr); } diff --git a/Detectors/Base/include/DetectorsBase/Ray.h b/Detectors/Base/include/DetectorsBase/Ray.h index 304ad5f00b03f..a72208c41af0d 100644 --- a/Detectors/Base/include/DetectorsBase/Ray.h +++ b/Detectors/Base/include/DetectorsBase/Ray.h @@ -49,7 +49,7 @@ class Ray GPUd() Ray() : mP{0.f}, mD{0.f}, mDistXY2(0.f), mDistXY2i(0.f), mDistXYZ(0.f), mXDxPlusYDy(0.f), mXDxPlusYDyRed(0.f), mXDxPlusYDy2(0.f), mR02(0.f), mR12(0.f) { } - GPUdDefault() ~Ray() CON_DEFAULT; + GPUdDefault() ~Ray() = default; #ifndef GPUCA_ALIGPUCODE // this part is unvisible on GPU version Ray(const math_utils::Point3D point0, const math_utils::Point3D point1); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 67a515df1c730..19edef6c40346 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -1215,14 +1215,14 @@ void processNeighboursHandler(const int startLayer, thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out nCurrentCells + 1, // num_items - 0)); + 0)); // NOLINT: failure in clang-tidy discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes)); gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage temp_storage_bytes, // temp_storage_bytes thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out nCurrentCells + 1, // num_items - 0)); + 0)); // NOLINT: failure in clang-tidy thrust::device_vector updatedCellIds(foundSeedsTable.back()) /*, lastCellIds(foundSeedsTable.back())*/; thrust::device_vector updatedCellSeeds(foundSeedsTable.back()) /*, lastCellSeeds(foundSeedsTable.back())*/; diff --git a/GPU/Common/GPUCommonConstants.h b/GPU/Common/GPUCommonConstants.h index f45aa05ed00ca..c6dfedc14ab7e 100644 --- a/GPU/Common/GPUCommonConstants.h +++ b/GPU/Common/GPUCommonConstants.h @@ -17,11 +17,9 @@ #include "GPUCommonDef.h" -#if !defined(__OPENCL1__) namespace GPUCA_NAMESPACE::gpu::gpu_common_constants { -static CONSTEXPR const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this when OpenCL1 is removed +static CONSTEXPR const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this now that we use only OpenCL CPP } -#endif #endif diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index ac3d7279fbaf4..2b3164d16d981 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -30,17 +30,7 @@ //Some GPU configuration settings, must be included first #include "GPUCommonDefSettings.h" -#if !defined(__OPENCL1__) && (!(defined(__CINT__) || defined(__ROOTCINT__)) || defined(__CLING__)) && defined(__cplusplus) && __cplusplus >= 201103L - #define GPUCA_NOCOMPAT // C++11 + No old ROOT5 + No old OpenCL - #ifndef __OPENCL__ - #define GPUCA_NOCOMPAT_ALLOPENCL // + No OpenCL at all - #endif - #ifndef __CINT__ - #define GPUCA_NOCOMPAT_ALLCINT // + No ROOT CINT at all - #endif -#endif - -#if !(defined(__CINT__) || defined(__ROOTCINT__) || defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) // No GPU code for ROOT +#if !(defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) // No GPU code for ROOT #if defined(__CUDACC__) || defined(__OPENCL__) || defined(__HIPCC__) || defined(__OPENCL_HOST__) #define GPUCA_GPUCODE // Compiled by GPU compiler #endif @@ -50,26 +40,11 @@ #endif #endif -// Definitions for C++11 features not supported by CINT / OpenCL -#ifdef GPUCA_NOCOMPAT - #define CON_DELETE = delete - #define CON_DEFAULT = default - #define GPUCA_CPP11_INIT(...) __VA_ARGS__ - #if defined(__cplusplus) && __cplusplus >= 201703L - #define CONSTEXPR constexpr - #else - #define CONSTEXPR - #endif +// Definitions for C++11 features +#if defined(__cplusplus) && __cplusplus >= 201703L + #define CONSTEXPR constexpr #else - #define CON_DELETE - #define CON_DEFAULT #define CONSTEXPR - #define GPUCA_CPP11_INIT(...) -#endif -#if defined(__ROOT__) && !defined(GPUCA_NOCOMPAT) - #define VOLATILE // ROOT5 has a problem with volatile in CINT -#else - #define VOLATILE volatile #endif // Set AliRoot / O2 namespace @@ -82,7 +57,7 @@ #define GPUCA_NAMESPACE o2 #endif -#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL1__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) || (defined(__OPENCLCPP__) && defined(GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY)) +#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) #define GPUCA_NO_CONSTANT_MEMORY #elif defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h index 124a29ecc7a37..23e16c75f098f 100644 --- a/GPU/Common/GPUCommonDefAPI.h +++ b/GPU/Common/GPUCommonDefAPI.h @@ -95,7 +95,7 @@ #define GPUprivate() __private #define GPUgeneric() __generic #define GPUconstexprref() GPUconstexpr() - #if defined(__OPENCLCPP__) && !defined(__clang__) + #if defined(__OPENCL__) && !defined(__clang__) #define GPUbarrier() work_group_barrier(mem_fence::global | mem_fence::local); #define GPUbarrierWarp() #define GPUAtomic(type) atomic @@ -103,7 +103,7 @@ #else #define GPUbarrier() barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE) #define GPUbarrierWarp() - #if defined(__OPENCLCPP__) && defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS) + #if defined(__OPENCL__) && defined(GPUCA_OPENCL_CLANG_C11_ATOMICS) namespace GPUCA_NAMESPACE { namespace gpu { template struct oclAtomic; template <> struct oclAtomic {typedef atomic_uint t;}; @@ -114,14 +114,14 @@ #define GPUAtomic(type) volatile type #endif #endif - #if !defined(__OPENCLCPP__) // Other special defines for OpenCL 1 + #if !defined(__OPENCL__) // Other special defines for OpenCL 1 #define GPUCA_USE_TEMPLATE_ADDRESS_SPACES // TODO: check if we can make this (partially, where it is already implemented) compatible with OpenCL CPP #define GPUsharedref() GPUshared() #define GPUglobalref() GPUglobal() #undef GPUgeneric #define GPUgeneric() #endif - #if (!defined(__OPENCLCPP__) || !defined(GPUCA_NO_CONSTANT_MEMORY)) + #if (!defined(__OPENCL__) || !defined(GPUCA_NO_CONSTANT_MEMORY)) #define GPUconstantref() GPUconstant() #endif #elif defined(__HIPCC__) //Defines for HIP diff --git a/GPU/Common/GPUCommonDefSettings.h b/GPU/Common/GPUCommonDefSettings.h index 6a4ef86125a3f..860ca8792eb88 100644 --- a/GPU/Common/GPUCommonDefSettings.h +++ b/GPU/Common/GPUCommonDefSettings.h @@ -22,12 +22,11 @@ #error Please include GPUCommonDef.h! #endif -//#define GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS // Use C11 atomic instead of old style atomics for OpenCL C++ in clang (OpenCL 2.2 C++ will use C++11 atomics irrespectively) +//#define GPUCA_OPENCL_CLANG_C11_ATOMICS // Use C11 atomic instead of old style atomics for OpenCL C++ in clang (OpenCL 2.2 C++ will use C++11 atomics irrespectively) //#define GPUCA_CUDA_NO_CONSTANT_MEMORY // Do not use constant memory for CUDA //#define GPUCA_HIP_NO_CONSTANT_MEMORY // Do not use constant memory for HIP -//#define GPUCA_OPENCL_NO_CONSTANT_MEMORY // Do not use constant memory for OpenCL 1.2 -#define GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY // Do not use constant memory for OpenCL C++ - MANDATORY as OpenCL cannot cast between __constant and __generic yet! +#define GPUCA_OPENCL_NO_CONSTANT_MEMORY // Do not use constant memory for OpenCL C++ - MANDATORY as OpenCL cannot cast between __constant and __generic yet! // clang-format on diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 0e5db743d0c57..ec1c3d54096a3 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -31,12 +31,10 @@ #include #endif -#if !defined(__OPENCL1__) namespace GPUCA_NAMESPACE { namespace gpu { -#endif class GPUCommonMath { @@ -147,12 +145,6 @@ class GPUCommonMath template GPUd() CONSTEXPR static T nextMultipleOf(T val); -#ifdef GPUCA_NOCOMPAT - GPUdi() static float Sum2() // Needed for legacy C++, For >=17 the below if constexpr handles the case - { - return 0.f; - } - template GPUdi() static float Sum2(float w, Args... args) { @@ -163,7 +155,6 @@ class GPUCommonMath } return 0; } -#endif private: template @@ -289,7 +280,7 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) { -#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL1__) +#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available #else for (int32_t i = 31; i >= 0; i--) { @@ -303,7 +294,7 @@ GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) { -#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) /* !defined(__OPENCL1__)*/) // TODO: exclude only OPENCLC (not CPP) when reported SPIR-V bug is fixed +#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed // use builtin if available return CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x)); #else @@ -439,22 +430,19 @@ GPUhdi() int32_t GPUCommonMath::Abs(int32_t x) GPUhdi() float GPUCommonMath::Copysign(float x, float y) { -#if defined(__OPENCLCPP__) +#if defined(__OPENCL__) return copysign(x, y); #elif defined(GPUCA_GPUCODE) && !defined(__OPENCL__) return copysignf(x, y); -#elif defined(__cplusplus) && __cplusplus >= 201103L - return std::copysignf(x, y); #else - x = GPUCommonMath::Abs(x); - return (y >= 0) ? x : -x; + return std::copysignf(x, y); #endif // GPUCA_GPUCODE } template GPUdi() uint32_t GPUCommonMath::AtomicExchInternal(S* addr, T val) { -#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)) +#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS)) return ::atomic_exchange(addr, val); #elif defined(GPUCA_GPUCODE) && defined(__OPENCL__) return ::atomic_xchg(addr, val); @@ -472,7 +460,7 @@ GPUdi() uint32_t GPUCommonMath::AtomicExchInternal(S* addr, T val) template GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val) { -#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)) +#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS)) return ::atomic_compare_exchange(addr, cmp, val) == cmp; #elif defined(GPUCA_GPUCODE) && defined(__OPENCL__) return ::atomic_cmpxchg(addr, cmp, val) == cmp; @@ -488,7 +476,7 @@ GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val) template GPUdi() uint32_t GPUCommonMath::AtomicAddInternal(S* addr, T val) { -#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)) +#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS)) return ::atomic_fetch_add(addr, val); #elif defined(GPUCA_GPUCODE) && defined(__OPENCL__) return ::atomic_add(addr, val); @@ -504,7 +492,7 @@ GPUdi() uint32_t GPUCommonMath::AtomicAddInternal(S* addr, T val) template GPUdi() void GPUCommonMath::AtomicMaxInternal(S* addr, T val) { -#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)) +#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS)) ::atomic_fetch_max(addr, val); #elif defined(GPUCA_GPUCODE) && defined(__OPENCL__) ::atomic_max(addr, val); @@ -520,7 +508,7 @@ GPUdi() void GPUCommonMath::AtomicMaxInternal(S* addr, T val) template GPUdi() void GPUCommonMath::AtomicMinInternal(S* addr, T val) { -#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)) +#if defined(GPUCA_GPUCODE) && defined(__OPENCL__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CLANG_C11_ATOMICS)) ::atomic_fetch_min(addr, val); #elif defined(GPUCA_GPUCODE) && defined(__OPENCL__) ::atomic_min(addr, val); @@ -533,7 +521,7 @@ GPUdi() void GPUCommonMath::AtomicMinInternal(S* addr, T val) #endif // GPUCA_GPUCODE } -#if (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__ROOTCINT__) && !defined(G__ROOT) +#if (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(G__ROOT) #define GPUCA_HAVE_ATOMIC_MINMAX_FLOAT template <> GPUdii() void GPUCommonMath::AtomicMaxInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val) @@ -563,9 +551,7 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt #undef CHOICE -#if !defined(__OPENCL1__) -} -} -#endif +} // namespace gpu +} // namespace GPUCA_NAMESPACE #endif // GPUCOMMONMATH_H diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index 88fcc9b838a65..6d72565d1f1fb 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -21,7 +21,7 @@ #ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif -#elif !defined(__OPENCL1__) +#else // We just reimplement some type traits in std for the GPU namespace std { diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index df797f4c79419..26c7726d13ca6 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -20,21 +20,8 @@ #include "GPUDataTypes.h" #include "GPUErrors.h" -// Dummies for stuff not supported in legacy code (ROOT 5 / OPENCL1.2) -#if defined(GPUCA_NOCOMPAT_ALLCINT) #include "GPUTPCGMMerger.h" -#else -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -class GPUTPCGMMerger -{ -}; -} // namespace gpu -} // namespace GPUCA_NAMESPACE -#endif -#if defined(GPUCA_NOCOMPAT_ALLCINT) && (!defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB)) +#if (!defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB)) #include "GPUTRDTracker.h" #else #include "GPUTRDDef.h" @@ -52,7 +39,7 @@ class GPUTRDTracker_t #endif // Dummies for stuff not suppored in legacy code, or for what requires O2 headers while not available -#if defined(GPUCA_NOCOMPAT_ALLCINT) && (!defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB)) && defined(GPUCA_HAVE_O2HEADERS) +#if (!defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB)) && defined(GPUCA_HAVE_O2HEADERS) #include "GPUTPCConvert.h" #include "GPUTPCCompression.h" #include "GPUTPCDecompression.h" @@ -71,12 +58,10 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() struct GPUConstantMem { - MEM_CONSTANT(GPUParam) - param; - MEM_GLOBAL(GPUTPCTracker) - tpcTrackers[GPUCA_NSLICES]; + GPUParam param; + GPUTPCTracker + tpcTrackers[GPUCA_NSLICES]; GPUTPCConvert tpcConverter; GPUTPCCompression tpcCompressor; GPUTPCDecompression tpcDecompressor; @@ -95,7 +80,7 @@ struct GPUConstantMem { GPUKernelDebugOutput debugOutput; #endif -#if defined(GPUCA_HAVE_O2HEADERS) && defined(GPUCA_NOCOMPAT) +#if defined(GPUCA_HAVE_O2HEADERS) template GPUd() auto& getTRDTracker(); #else // GPUCA_HAVE_O2HEADERS @@ -107,7 +92,7 @@ struct GPUConstantMem { #endif // !GPUCA_HAVE_O2HEADERS }; -#if defined(GPUCA_HAVE_O2HEADERS) && defined(GPUCA_NOCOMPAT) +#if defined(GPUCA_HAVE_O2HEADERS) template <> GPUdi() auto& GPUConstantMem::getTRDTracker<0>() { @@ -120,7 +105,6 @@ GPUdi() auto& GPUConstantMem::getTRDTracker<1>() } #endif -#ifdef GPUCA_NOCOMPAT union GPUConstantMemCopyable { #if !defined(__OPENCL__) || defined(__OPENCL_HOST__) GPUh() GPUConstantMemCopyable() {} // NOLINT: We want an empty constructor, not a default one @@ -134,9 +118,8 @@ union GPUConstantMemCopyable { #endif GPUConstantMem v; }; -#endif -#if defined(GPUCA_GPUCODE) && defined(GPUCA_NOCOMPAT) +#if defined(GPUCA_GPUCODE) static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1); #endif } // namespace gpu @@ -150,7 +133,7 @@ namespace gpu { // Must be placed here, to avoid circular header dependency -GPUdi() GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GPUProcessor::GetConstantMem() const +GPUdi() GPUconstantref() const GPUConstantMem* GPUProcessor::GetConstantMem() const { #if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) return &GPUCA_CONSMEM; @@ -159,7 +142,7 @@ GPUdi() GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GPUProcessor::GetC #endif } -GPUdi() GPUconstantref() const MEM_CONSTANT(GPUParam) & GPUProcessor::Param() const +GPUdi() GPUconstantref() const GPUParam& GPUProcessor::Param() const { return GetConstantMem()->param; } diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.cxx b/GPU/GPUTracking/Base/GPUGeneralKernels.cxx index 8fc60bae6dbe9..44faf09112e5e 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.cxx +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.cxx @@ -17,7 +17,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUMemClean16::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() processors, GPUglobalref() void* ptr, uint64_t size) +GPUdii() void GPUMemClean16::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUglobalref() void* ptr, uint64_t size) { const uint64_t stride = get_global_size(0); int4 i0; @@ -30,7 +30,7 @@ GPUdii() void GPUMemClean16::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_ } template <> -GPUdii() void GPUitoa::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() processors, GPUglobalref() int32_t* ptr, uint64_t size) +GPUdii() void GPUitoa::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUglobalref() int32_t* ptr, uint64_t size) { const uint64_t stride = get_global_size(0); for (uint64_t i = get_global_id(0); i < size; i += stride) { diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index 44314e3393589..9829fe350fde1 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -36,7 +36,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() struct GPUConstantMem; class GPUKernelTemplate @@ -50,7 +49,6 @@ class GPUKernelTemplate step4 = 4, step5 = 5 }; - MEM_CLASS_PRE() struct GPUSharedMemory { }; @@ -82,24 +80,16 @@ class GPUKernelTemplate #endif }; - typedef GPUconstantref() MEM_CONSTANT(GPUConstantMem) processorType; + typedef GPUconstantref() GPUConstantMem processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors; } -#ifdef GPUCA_NOCOMPAT template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, Args... args) + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args) { } -#else - template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors) - { - } -#endif }; // Clean memory, ptr multiple of 16, size will be extended to multiple of 16 @@ -108,7 +98,7 @@ class GPUMemClean16 : public GPUKernelTemplate public: GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); }; // Fill with incrementing sequnce of integers @@ -117,7 +107,7 @@ class GPUitoa : public GPUKernelTemplate public: GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; } // namespace gpu diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 6d8125251800f..143a0ead26235 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -23,7 +23,6 @@ namespace GPUCA_NAMESPACE namespace gpu { -#ifdef GPUCA_NOCOMPAT_ALLOPENCL struct GPUMemoryReuse { enum Type : int32_t { NONE = 0, @@ -50,7 +49,6 @@ struct GPUMemoryReuse { Type type = NONE; ID id = 0; }; -#endif class GPUMemoryResource { @@ -81,7 +79,7 @@ class GPUMemoryResource GPUMemoryResource(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), MemoryType type, const char* name = "") : mProcessor(proc), mPtr(nullptr), mPtrDevice(nullptr), mSetPointers(setPtr), mName(name), mSize(0), mOverrideSize(0), mReuse(-1), mType(type) { } - GPUMemoryResource(const GPUMemoryResource&) CON_DEFAULT; + GPUMemoryResource(const GPUMemoryResource&) = default; void* SetPointers(void* ptr) { diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index 661ae830ca6f3..39b5a18c51eff 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -229,9 +229,7 @@ void GPUParam::LoadClusterErrors(bool Print) if (Print) { typedef std::numeric_limits flt; std::cout << std::scientific; -#if __cplusplus >= 201103L std::cout << std::setprecision(flt::max_digits10 + 2); -#endif std::cout << "ParamS0Par[2][3][7]=" << std::endl; std::cout << " { " << std::endl; for (int32_t i = 0; i < 2; i++) { diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index ce9ac30b7c35b..1a3ff9065dc94 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -22,7 +22,7 @@ #include "GPUTPCGeometry.h" #include "GPUTPCGMPolynomialField.h" -#if !defined(GPUCA_GPUCODE) && defined(GPUCA_NOCOMPAT) +#if !defined(GPUCA_GPUCODE) namespace o2::base { template @@ -78,8 +78,6 @@ struct GPUParam_t { }; } // namespace internal -#if !(defined(__CINT__) || defined(__ROOTCINT__)) || defined(__CLING__) // Hide from ROOT 5 CINT -MEM_CLASS_PRE() struct GPUParam : public internal::GPUParam_t { #ifndef GPUCA_GPUCODE @@ -116,7 +114,6 @@ struct GPUParam : public internal::GPUParam_t GPUd() bool rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const; }; -#endif } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 41ed3c8f203cb..0b32067f8980c 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -17,17 +17,14 @@ #include "GPUParam.h" #include "GPUTPCGMMergedTrackHit.h" -#if !defined(__OPENCL1__) #include "GPUTPCClusterOccupancyMap.h" -#endif namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const +GPUdi() void GPUParam::Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const { // conversion of coordinates sector->global *X = x * SliceParam[iSlice].CosAlpha - y * SliceParam[iSlice].SinAlpha; @@ -35,8 +32,7 @@ GPUdi() void MEM_LG(GPUParam)::Slice2Global(int32_t iSlice, float x, float y, fl *Z = z; } -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::Global2Slice(int32_t iSlice, float X, float Y, float Z, float* x, float* y, float* z) const +GPUdi() void GPUParam::Global2Slice(int32_t iSlice, float X, float Y, float Z, float* x, float* y, float* z) const { // conversion of coordinates global->sector *x = X * SliceParam[iSlice].CosAlpha + Y * SliceParam[iSlice].SinAlpha; @@ -46,8 +42,7 @@ GPUdi() void MEM_LG(GPUParam)::Global2Slice(int32_t iSlice, float X, float Y, fl #ifdef GPUCA_TPC_GEOMETRY_O2 -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { const int32_t rowType = tpcGeometry.GetROC(iRow); z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); @@ -62,10 +57,9 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t ErrZ2 = GetClusterErrorSeeding(1, rowType, z, angleZ2, unscaledMult); // Returns Err2 } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult) const // Note, returns Err2 despite the name not containing 2 +GPUdi() float GPUParam::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult) const // Note, returns Err2 despite the name not containing 2 { - MakeType(const float*) c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 + const float* c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 float v = c[0] + c[1] * angle2 + c[2] * zDiff + c[3] * (unscaledMult * unscaledMult); v = CAMath::Abs(v); v *= yz ? rec.tpc.clusterError2CorrectionZ : rec.tpc.clusterError2CorrectionY; @@ -73,10 +67,9 @@ GPUdi() float MEM_LG(GPUParam)::GetClusterErrorSeeding(int32_t yz, int32_t type, return v; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float scaledInvAvgCharge, float scaledInvCharge) const +GPUdi() float GPUParam::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float scaledInvAvgCharge, float scaledInvCharge) const { - MakeType(const float*) c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 + const float* c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 float v = c[0] + c[1] * angle2 * scaledInvAvgCharge + c[2] * zDiff * scaledInvCharge + c[3] * (unscaledMult * unscaledMult) * (scaledInvAvgCharge * scaledInvAvgCharge); v = CAMath::Abs(v); v *= yz ? rec.tpc.clusterError2CorrectionZ : rec.tpc.clusterError2CorrectionY; @@ -84,8 +77,7 @@ GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float return v; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float x, float y, float z, bool sideC) const +GPUdi() float GPUParam::GetSystematicClusterErrorIFC2(float x, float y, float z, bool sideC) const { float sysErr = 0.f; const float kMaxExpArg = 9.f; // limit r-dumped error to this exp. argument @@ -118,8 +110,7 @@ GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float x, float y, return sysErr; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float x, float y, uint8_t sector) const +GPUdi() float GPUParam::GetSystematicClusterErrorC122(float x, float y, uint8_t sector) const { const float dx = x - 83.f; if (dx > occupancyTotal * rec.tpc.sysClusErrorC12Box) { @@ -133,17 +124,15 @@ GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float x, float y, #else // GPUCA_TPC_GEOMETRY_O2 -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float scaledMult) const +GPUdi() float GPUParam::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float scaledMult) const { - MakeType(const float*) c = ParamErrorsSeeding0[yz][type]; + const float* c = ParamErrorsSeeding0[yz][type]; float v = c[0] + c[1] * zDiff + c[2] * angle2; v = CAMath::Abs(v); return v; } -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { int32_t rowType = tpcGeometry.GetROC(iRow); z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); @@ -158,10 +147,9 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t ErrZ2 = ErrZ2 * ErrZ2 * rec.tpc.clusterError2CorrectionZ + rec.tpc.clusterError2AdditionalZ; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float avgInvCharge, float invCharge) const +GPUdi() float GPUParam::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float avgInvCharge, float invCharge) const { - MakeType(const float*) c = ParamS0Par[yz][type]; + const float* c = ParamS0Par[yz][type]; float v = c[0] + c[1] * zDiff + c[2] * angle2 + c[3] * zDiff * zDiff + c[4] * angle2 * angle2 + c[5] * zDiff * angle2; v = CAMath::Abs(v); if (v < 0.0001f) { @@ -172,22 +160,19 @@ GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float return v; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float trackX, float trackY, float z, bool sideC) const +GPUdi() float GPUParam::GetSystematicClusterErrorIFC2(float trackX, float trackY, float z, bool sideC) const { return 0; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float trackX, float trackY, uint8_t sector) const +GPUdi() float GPUParam::GetSystematicClusterErrorC122(float trackX, float trackY, uint8_t sector) const { return 0; } #endif // !GPUCA_TPC_GEOMETRY_O2 -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::GetClusterErrors2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const { const int32_t rowType = tpcGeometry.GetROC(iRow); z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); @@ -204,8 +189,7 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(uint8_t sector, int32_t iRow, f ErrZ2 = GetClusterError2(1, rowType, z, angleZ2, unscaledMult, scaledInvAvgCharge, scaledInvCharge); } -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(int16_t clusterState, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::UpdateClusterError2ByState(int16_t clusterState, float& ErrY2, float& ErrZ2) const { if (clusterState & GPUTPCGMMergedTrackHit::flagEdge) { ErrY2 += rec.tpc.extraClusterErrorEdgeY2; @@ -225,22 +209,16 @@ GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(int16_t clusterState, } } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetUnscaledMult(float time) const +GPUdi() float GPUParam::GetUnscaledMult(float time) const { -#if !defined(__OPENCL1__) if (!occupancyMap) { return 0.f; } const uint32_t bin = CAMath::Max(0.f, time / rec.tpc.occupancyMapTimeBins); return occupancyMap[bin]; -#else - return 0.f; -#endif } -MEM_CLASS_PRE() -GPUdi() bool MEM_LG(GPUParam)::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const +GPUdi() bool GPUParam::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const { return CAMath::Abs(uncorrectedY) > (tpcGeometry.NPads(iRow) - 1) * 0.5f * tpcGeometry.PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; } diff --git a/GPU/GPUTracking/Base/GPUProcessor.h b/GPU/GPUTracking/Base/GPUProcessor.h index 95b56a5c4cd28..eb635ae210b73 100644 --- a/GPU/GPUTracking/Base/GPUProcessor.h +++ b/GPU/GPUTracking/Base/GPUProcessor.h @@ -29,9 +29,7 @@ namespace gpu { struct GPUTrackingInOutPointers; class GPUReconstruction; -MEM_CLASS_PRE() struct GPUParam; -MEM_CLASS_PRE() struct GPUConstantMem; class GPUProcessor @@ -48,12 +46,12 @@ class GPUProcessor #ifndef GPUCA_GPUCODE GPUProcessor(); ~GPUProcessor(); - GPUProcessor(const GPUProcessor&) CON_DELETE; - GPUProcessor& operator=(const GPUProcessor&) CON_DELETE; + GPUProcessor(const GPUProcessor&) = delete; + GPUProcessor& operator=(const GPUProcessor&) = delete; #endif - GPUd() GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GetConstantMem() const; // Body in GPUConstantMem.h to avoid circular headers - GPUd() GPUconstantref() const MEM_CONSTANT(GPUParam) & Param() const; // ... + GPUd() GPUconstantref() const GPUConstantMem* GetConstantMem() const; // Body in GPUConstantMem.h to avoid circular headers + GPUd() GPUconstantref() const GPUParam& Param() const; // ... GPUd() void raiseError(uint32_t code, uint32_t param1 = 0, uint32_t param2 = 0, uint32_t param3 = 0) const; const GPUReconstruction& GetRec() const { return *mRec; } @@ -152,7 +150,7 @@ class GPUProcessor GPUReconstruction* mRec; ProcessorType mGPUProcessorType; GPUProcessor* mLinkedProcessor; - GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mConstantMem; + GPUconstantref() const GPUConstantMem* mConstantMem; private: bool mAllocateAndInitializeLate; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index efad0b41fd571..6951646dff840 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -390,7 +390,7 @@ class GPUReconstruction void* mGPULib; void* mGPUEntry; }; - static std::shared_ptr sLibCUDA, sLibHIP, sLibOCL, sLibOCL2; + static std::shared_ptr sLibCUDA, sLibHIP, sLibOCL; static GPUReconstruction* GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); }; diff --git a/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h b/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h index 77c57533ba541..35892db121f50 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h @@ -14,5 +14,4 @@ #cmakedefine CUDA_ENABLED #cmakedefine HIP_ENABLED -#cmakedefine OPENCL1_ENABLED -#cmakedefine OPENCL2_ENABLED +#cmakedefine OPENCL_ENABLED diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 70eedd0ca86d1..c9155c1cb8f60 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -28,7 +28,6 @@ using namespace GPUCA_NAMESPACE::gpu; #endif #include -MEM_CLASS_PRE() class GPUTPCRow; #define SemLockName "AliceHLTTPCGPUTrackerInitLockSem" diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 9746250ea3bd3..5a3f02efe7e3b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -25,7 +25,7 @@ namespace GPUCA_NAMESPACE { namespace gpu { -#if !(defined(__CINT__) || defined(__ROOTCINT__) || defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) +#if !(defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) extern template class GPUReconstructionKernels; #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h b/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h index 8194214a180e4..1def09c61e606 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h @@ -15,14 +15,14 @@ // No header protection, this may be used multiple times #include "GPUReconstructionKernelMacros.h" -#if !defined(GPUCA_OPENCL1) && (!defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE)) -#define GPUCA_KRNL_NOOCL1 +#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) +#define GPUCA_KRNL_NOALIROOT #endif // clang-format off $,> // clang-format on -#ifdef GPUCA_KRNL_NOOCL1 -#undef GPUCA_KRNL_NOOCL1 +#ifdef GPUCA_KRNL_NOALIROOT +#undef GPUCA_KRNL_NOALIROOT #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index de6d5d079cd00..295e6e1a5d9b7 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -59,7 +59,7 @@ #else #define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ - GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::MEM_LOCAL(GPUSharedMemory) smem; \ + GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSlice_internal] GPUCA_M_STRIP(x_forward)); \ } #endif @@ -76,7 +76,7 @@ const int32_t nSliceBlockOffset = get_num_groups(0) * iSlice_internal / nSliceCount; \ const int32_t sliceBlockId = get_group_id(0) - nSliceBlockOffset; \ const int32_t sliceGridDim = get_num_groups(0) * (iSlice_internal + 1) / nSliceCount - get_num_groups(0) * (iSlice_internal) / nSliceCount; \ - GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::MEM_LOCAL(GPUSharedMemory) smem; \ + GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ GPUCA_M_STRIP_FIRST(x_class)::template Thread(sliceGridDim, get_local_size(0), sliceBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSlice + iSlice_internal] GPUCA_M_STRIP(x_forward)); \ } #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index d4d7b12dc8cc6..f3749c160c3ff 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -103,12 +103,8 @@ std::shared_ptr* GPUReconstruction::GetLibrary return &sLibHIP; #endif } else if (type == DeviceType::OCL) { -#ifdef OPENCL1_ENABLED +#ifdef OPENCL_ENABLED return &sLibOCL; -#endif - } else if (type == DeviceType::OCL2) { -#ifdef OPENCL2_ENABLED - return &sLibOCL2; #endif } else { GPUError("Error: Invalid device type %u", (uint32_t)type); @@ -133,7 +129,6 @@ GPUReconstruction* GPUReconstruction::CreateInstance(const char* type, bool forc std::shared_ptr GPUReconstruction::sLibCUDA(new GPUReconstruction::LibraryLoader("lib" LIBRARY_PREFIX "GPUTrackingCUDA" LIBRARY_EXTENSION, "GPUReconstruction_Create_CUDA")); std::shared_ptr GPUReconstruction::sLibHIP(new GPUReconstruction::LibraryLoader("lib" LIBRARY_PREFIX "GPUTrackingHIP" LIBRARY_EXTENSION, "GPUReconstruction_Create_HIP")); std::shared_ptr GPUReconstruction::sLibOCL(new GPUReconstruction::LibraryLoader("lib" LIBRARY_PREFIX "GPUTrackingOCL" LIBRARY_EXTENSION, "GPUReconstruction_Create_OCL")); -std::shared_ptr GPUReconstruction::sLibOCL2(new GPUReconstruction::LibraryLoader("lib" LIBRARY_PREFIX "GPUTrackingOCL2" LIBRARY_EXTENSION, "GPUReconstruction_Create_OCL2")); GPUReconstruction::LibraryLoader::LibraryLoader(const char* lib, const char* func) : mLibName(lib), mFuncName(func), mGPULib(nullptr), mGPUEntry(nullptr) {} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 9f043915efb19..dd35a23d67c21 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -625,7 +625,7 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0; GPUFailedMsg(cudaSetDevice(mDeviceId)); for (uint32_t i = 0; i < mInternals->kernelFunctions.size(); i++) { - GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); + GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy GPUFailedMsg(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); GPUFailedMsg(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); GPUFailedMsg(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); diff --git a/GPU/GPUTracking/Base/opencl-common/CMakeLists.txt b/GPU/GPUTracking/Base/opencl-common/CMakeLists.txt deleted file mode 100644 index 5e49b7a81a85b..0000000000000 --- a/GPU/GPUTracking/Base/opencl-common/CMakeLists.txt +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -set(MODULE GPUTrackingOpenCLCommon) - -set(SRCS GPUReconstructionOCL.cxx) -set(HDRS GPUReconstructionOCL.h GPUReconstructionOCLInternals.h) - -if(ALIGPU_BUILD_TYPE STREQUAL "O2") - o2_add_library(${MODULE} - SOURCES ${SRCS} - PUBLIC_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_LIST_DIR} - PUBLIC_LINK_LIBRARIES OpenCL::OpenCL O2::GPUTracking - TARGETVARNAME targetName) - - target_compile_definitions(${targetName} PRIVATE $) - # the compile_defitions are not propagated automatically on purpose (they are - # declared PRIVATE) so we are not leaking them outside of the GPU** - # directories - - install(FILES ${HDRS} DESTINATION include/GPU) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary("Ali${MODULE}" "" "GPUReconstructionOCL.h" "${incdirs} .") - - # Generate the ROOT map - generate_rootmap("Ali${MODULE}" "" "") - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} SHARED ${SRCS} G__Ali${MODULE}.cxx) - # AMD OpenCL run-time and driver - target_link_libraries(Ali${MODULE} PUBLIC OpenCL AliGPUTracking) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS} DESTINATION include) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - add_library(${MODULE} SHARED ${SRCS}) - target_link_libraries(${MODULE} GPUTracking OpenCL) - target_include_directories(${MODULE} PUBLIC ${CMAKE_CURRENT_LIST_DIR}) - install(TARGETS ${MODULE}) -endif() diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 1ad9041f70997..58d0e5f40b593 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -12,17 +12,7 @@ set(MODULE GPUTrackingOCL) enable_language(ASM) -# AMD APP SDK required for OpenCL tracker as it's using specific extensions -# (currently) not provided by other vendors - -if(NOT AMDAPPSDKROOT) - message( - FATAL_ERROR - "AMDAPPSDKROOT not set. Please install AMD APP SDK and set $AMDAPPSDKROOT or disable ENABLE_OPENCL1." - ) -endif() - -message(STATUS "Building GPUTracking with OpenCL 1.2 support") +message(STATUS "Building GPUTracking with OpenCL support") # convenience variables if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -30,53 +20,72 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") else() set(GPUDIR ${CMAKE_SOURCE_DIR}/GPU/GPUTracking) endif() -set(CL_SRC ${GPUDIR}/Base/opencl-common/GPUReconstructionOCL.cl) -set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCL1Code.bin) +set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) +set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) -# build the OpenCL compile wrapper : -# -# * checks the correct vendor implementation (AMD) -# * builds binary code (blob) for the found platform(s) -add_executable(opencl_compiler - ${GPUDIR}/utils/makefile_opencl_compiler.cxx) -target_link_libraries(opencl_compiler PUBLIC OpenCL::OpenCL) -set_property(TARGET opencl_compiler - PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) - -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - set(OPENCL_HEADER_FILTER "${CMAKE_SOURCE_DIR}") +set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) +if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH}) + set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() - set(OPENCL_HEADER_FILTER "${CMAKE_SOURCE_DIR}/GPU") +set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) +endif() +set(OCL_DEFINECL "-D$,$-D>" + "-I$,EXCLUDE,^/usr/include/?>,$-I>" + -I${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src + -I${CMAKE_SOURCE_DIR}/Detectors/Base/src + -I${CMAKE_SOURCE_DIR}/DataFormats/Reconstruction/src +) + +set(SRCS GPUReconstructionOCL.cxx) +set(HDRS GPUReconstructionOCL.h GPUReconstructionOCLInternals.h) + +if(OPENCL_ENABLED_SPIRV) # BUILD OpenCL intermediate code for SPIR-V target + # executes clang to create llvm IL code + # Add -fintegrated-objemitter once we switch to clang >= 17 + cmake_path(GET LLVM_SPIRV PARENT_PATH TMP_LLVM_SPIRV_PATH) + add_custom_command( + OUTPUT ${CL_BIN}.spirv + COMMAND ${CMAKE_COMMAND} -E env "PATH=${TMP_LLVM_SPIRV_PATH}:\$$PATH" ${LLVM_CLANG} + -O0 + --target=spirv64 + -fno-integrated-objemitter + -ferror-limit=1000 -Wno-invalid-constexpr -Wno-unused-command-line-argument + ${OCL_FLAGS} + ${OCL_DEFINECL} + -o ${CL_BIN}.spirv -c ${CL_SRC} + MAIN_DEPENDENCY ${CL_SRC} + IMPLICIT_DEPENDS CXX ${CL_SRC} + COMMAND_EXPAND_LISTS + COMMENT "Compiling OpenCL CL source file ${CL_SRC} to SPIRV ${CL_BIN}.spirv") + + create_binary_resource(${CL_BIN}.spirv ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.spirv.o) + set(SRCS ${SRCS} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.spirv.o) +endif() + +if(OPENCL_ENABLED) # BUILD OpenCL source code for runtime compilation target + # executes clang to preprocess + add_custom_command( + OUTPUT ${CL_BIN}.src + COMMAND ${LLVM_CLANG} + -Wno-unused-command-line-argument + ${OCL_FLAGS} + ${OCL_DEFINECL} + -cl-no-stdinc + -nostdinc + -E ${CL_SRC} > ${CL_BIN}.src + MAIN_DEPENDENCY ${CL_SRC} + IMPLICIT_DEPENDS CXX ${CL_SRC} + COMMAND_EXPAND_LISTS + COMMENT "Preparing OpenCL CL source file for run time compilation ${CL_BIN}.src") + + create_binary_resource(${CL_BIN}.src ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.src.o) + set(SRCS ${SRCS} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.src.o) endif() -set(OPENCL_HEADER_FILTER "^${OPENCL_HEADER_FILTER}|^${CMAKE_BINARY_DIR}.*include_gpu_onthefly") - -# executes OpenCL compiler wrapper to build binary object -add_custom_command( - OUTPUT ${CL_BIN} - COMMAND LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$ - $ - -output-file - ${CL_BIN} - ${CL_SRC} - -- - "-D$,$-D>" - "-I$,EXCLUDE,^/usr>,INCLUDE,${OPENCL_HEADER_FILTER}>,$-I>" - -x clc++ - MAIN_DEPENDENCY ${CL_SRC} - IMPLICIT_DEPENDS CXX ${CL_SRC} - COMMAND_EXPAND_LISTS - COMMENT "Compiling OpenCL1 CL source file ${CL_SRC}") - -create_binary_resource(${CL_BIN} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.o) - -set(SRCS GPUReconstructionOCL1.cxx - ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.o) -set(HDRS GPUReconstructionOCL1.h GPUReconstructionOCL1Internals.h) if(ALIGPU_BUILD_TYPE STREQUAL "O2") o2_add_library(${MODULE} SOURCES ${SRCS} - PUBLIC_LINK_LIBRARIES O2::GPUTrackingOpenCLCommon + PUBLIC_LINK_LIBRARIES OpenCL::OpenCL O2::GPUTracking TARGETVARNAME targetName) target_compile_definitions(${targetName} PRIVATE $) @@ -90,23 +99,30 @@ endif() if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") # Generate the dictionary get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary("Ali${MODULE}" "" "GPUReconstructionOCL1.h" "${incdirs} .") + generate_dictionary("Ali${MODULE}" "" "GPUReconstructionOCL.h" "${incdirs} .") # Generate the ROOT map generate_rootmap("Ali${MODULE}" "" "") # Add a library to the project using the specified source files add_library_tested(Ali${MODULE} SHARED ${SRCS} G__Ali${MODULE}.cxx) - target_link_libraries(Ali${MODULE} PUBLIC AliGPUTrackingOpenCLCommon) + target_link_libraries(Ali${MODULE} PUBLIC OpenCL AliGPUTracking) # Installation install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) install(FILES ${HDRS} DESTINATION include) + set(targetName Ali${MODULE}) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS}) - target_link_libraries(${MODULE} GPUTrackingOpenCLCommon) + target_link_libraries(${MODULE} GPUTracking OpenCL) install(TARGETS ${MODULE}) + set(targetName ${MODULE}) +endif() + +if(OPENCL_ENABLED_SPIRV) + target_compile_definitions(${targetName} PRIVATE OPENCL_ENABLED_SPIRV) endif() +target_compile_definitions(${targetName} PRIVATE OCL_FLAGS=$) diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl similarity index 78% rename from GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl rename to GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 672c4b63eb476..863cd82cb56eb 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -15,14 +15,12 @@ // clang-format off #define __OPENCL__ #if defined(__cplusplus) && __cplusplus >= 201703L - #define __OPENCLCPP__ -#else - #define __OPENCL1__ + #define __OPENCL__ #endif #define GPUCA_GPUTYPE_OPENCL -#ifdef __OPENCLCPP__ - #ifdef GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY +#ifdef __OPENCL__ + #ifdef GPUCA_OPENCL_NO_CONSTANT_MEMORY #define GPUCA_NO_CONSTANT_MEMORY #endif #pragma OPENCL EXTENSION cl_khr_fp64 : enable // Allow double precision variables @@ -57,9 +55,6 @@ #define M_PI 3.1415926535f #endif #else - #ifdef GPUCA_OPENCL_NO_CONSTANT_MEMORY - #define GPUCA_NO_CONSTANT_MEMORY - #endif #define nullptr NULL #define NULL (0x0) #endif @@ -77,39 +72,16 @@ typedef signed char int8_t; #undef assert #endif #define assert(param) -#ifndef __OPENCLCPP__ -#define static_assert(...) -#define GPUCA_OPENCL1 -#endif #include "GPUConstantMem.h" -#ifdef __OPENCLCPP__ #include "GPUReconstructionIncludesDeviceAll.h" -#else // Workaround, since OpenCL1 cannot digest all files -#include "GPUTPCTrackParam.cxx" -#include "GPUTPCTrack.cxx" -#include "GPUTPCGrid.cxx" -#include "GPUTPCRow.cxx" -#include "GPUTPCTracker.cxx" - -#include "GPUGeneralKernels.cxx" -#include "GPUErrors.cxx" - -#include "GPUTPCTrackletSelector.cxx" -#include "GPUTPCNeighboursFinder.cxx" -#include "GPUTPCNeighboursCleaner.cxx" -#include "GPUTPCStartHitsFinder.cxx" -#include "GPUTPCStartHitsSorter.cxx" -#include "GPUTPCTrackletConstructor.cxx" -#include "GPUTPCGlobalTracking.cxx" -#endif // if (gpu_mem != pTracker.GPUParametersConst()->gpumem) return; //TODO! #define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) #define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) #define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__) -#define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() MEM_CONSTANT(GPUConstantMem) * pConstant, +#define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() GPUConstantMem* pConstant, #define GPUCA_CONSMEM (*pConstant) #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx similarity index 76% rename from GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx rename to GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index d5b10afeb68f2..f05780e86fe62 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -33,11 +33,19 @@ using namespace GPUCA_NAMESPACE::gpu; } #define GPUCA_KRNL(x_class, x_attributes, ...) GPUCA_KRNL_PROP(x_class, x_attributes) -#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCL +#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -GPUReconstructionOCL::GPUReconstructionOCL(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) +#include "utils/qGetLdBinarySymbols.h" +QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src); +#ifdef OPENCL_ENABLED_SPIRV +QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv); +#endif + +GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL(cfg); } + +GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) { if (mMaster == nullptr) { mInternals = new GPUReconstructionOCLInternals; @@ -45,7 +53,7 @@ GPUReconstructionOCL::GPUReconstructionOCL(const GPUSettingsDeviceBackend& cfg) mDeviceBackendSettings.deviceType = DeviceType::OCL; } -GPUReconstructionOCL::~GPUReconstructionOCL() +GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() { Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit if (mMaster == nullptr) { @@ -53,7 +61,7 @@ GPUReconstructionOCL::~GPUReconstructionOCL() } } -int32_t GPUReconstructionOCL::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) +int32_t GPUReconstructionOCLBackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error == CL_SUCCESS) { @@ -63,7 +71,7 @@ int32_t GPUReconstructionOCL::GPUFailedMsgAI(const int64_t error, const char* fi return 1; } -void GPUReconstructionOCL::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) +void GPUReconstructionOCLBackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) { if (GPUFailedMsgAI(error, file, line)) { static bool runningCallbacks = false; @@ -75,12 +83,12 @@ void GPUReconstructionOCL::GPUFailedMsgA(const int64_t error, const char* file, } } -void GPUReconstructionOCL::UpdateAutomaticProcessingSettings() +void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() { GPUCA_GPUReconstructionUpdateDefaults(); } -int32_t GPUReconstructionOCL::InitDevice_Runtime() +int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() { if (mMaster == nullptr) { cl_int ocl_error; @@ -386,7 +394,7 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime() return (0); } -int32_t GPUReconstructionOCL::ExitDevice_Runtime() +int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime() { // Uninitialize OPENCL SynchronizeGPU(); @@ -418,7 +426,7 @@ int32_t GPUReconstructionOCL::ExitDevice_Runtime() return (0); } -size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) +size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) { if (evList == nullptr) { nEvents = 0; @@ -442,7 +450,7 @@ size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size, return size; } -size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev) +size_t GPUReconstructionOCLBackend::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev) { if (stream == -1) { SynchronizeGPU(); @@ -454,11 +462,11 @@ size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* sr return size; } -void GPUReconstructionOCL::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get())); } +void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get())); } -void GPUReconstructionOCL::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } +void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } -int32_t GPUReconstructionOCL::DoStuckProtection(int32_t stream, deviceEvent event) +int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEvent event) { if (mProcessingSettings.stuckProtection) { cl_int tmp = 0; @@ -479,25 +487,25 @@ int32_t GPUReconstructionOCL::DoStuckProtection(int32_t stream, deviceEvent even return 0; } -void GPUReconstructionOCL::SynchronizeGPU() +void GPUReconstructionOCLBackend::SynchronizeGPU() { for (int32_t i = 0; i < mNStreams; i++) { GPUFailedMsg(clFinish(mInternals->command_queue[i])); } } -void GPUReconstructionOCL::SynchronizeStream(int32_t stream) { GPUFailedMsg(clFinish(mInternals->command_queue[stream])); } +void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUFailedMsg(clFinish(mInternals->command_queue[stream])); } -void GPUReconstructionOCL::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUFailedMsg(clWaitForEvents(nEvents, evList->getEventList())); } +void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUFailedMsg(clWaitForEvents(nEvents, evList->getEventList())); } -void GPUReconstructionOCL::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) +void GPUReconstructionOCLBackend::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { if (nEvents) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); } } -bool GPUReconstructionOCL::IsEventDone(deviceEvent* evList, int32_t nEvents) +bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEvents) { cl_int eventdone; for (int32_t i = 0; i < nEvents; i++) { @@ -509,7 +517,7 @@ bool GPUReconstructionOCL::IsEventDone(deviceEvent* evList, int32_t nEvents) return true; } -int32_t GPUReconstructionOCL::GPUDebug(const char* state, int32_t stream, bool force) +int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, bool force) { // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode if (!force && mProcessingSettings.debugLevel <= 0) { @@ -525,3 +533,108 @@ int32_t GPUReconstructionOCL::GPUDebug(const char* state, int32_t stream, bool f } return (0); } + +template +int32_t GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) +{ + cl_kernel k = args.s.y.num > 1 ? getKernelObject() : getKernelObject(); + return std::apply([this, &args, &k](auto&... vals) { return runKernelBackendInternal(args.s, k, vals...); }, args.v); +} + +template +S& GPUReconstructionOCLBackend::getKernelObject() +{ + static uint32_t krnl = FindKernel(MULTI ? 2 : 1); + return mInternals->kernels[krnl].first; +} + +int32_t GPUReconstructionOCLBackend::GetOCLPrograms() +{ + char platform_version[256] = {}; + GPUFailedMsg(clGetPlatformInfo(mInternals->platform, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr)); + float ver = 0; + sscanf(platform_version, "OpenCL %f", &ver); + + cl_int ocl_error; + + const char* ocl_flags = GPUCA_M_STR(OCL_FLAGS); + +#ifdef OPENCL_ENABLED_SPIRV // clang-format off + if (ver >= 2.2f && !GetProcessingSettings().oclCompileFromSources) { + GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", ver); + mInternals->program = clCreateProgramWithIL(mInternals->context, _binary_GPUReconstructionOCLCode_spirv_start, _binary_GPUReconstructionOCLCode_spirv_len, &ocl_error); + ocl_flags = ""; + } else +#endif // clang-format on + { + GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", ver); + size_t program_sizes[1] = {_binary_GPUReconstructionOCLCode_src_len}; + char* programs_sources[1] = {_binary_GPUReconstructionOCLCode_src_start}; + mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error); + } + + if (GPUFailedMsgI(ocl_error)) { + GPUError("Error creating OpenCL program from binary"); + return 1; + } + + if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, ocl_flags, nullptr, nullptr))) { + cl_build_status status; + if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { + size_t log_size; + clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size); + std::unique_ptr build_log(new char[log_size + 1]); + clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, log_size, build_log.get(), nullptr); + build_log[log_size] = 0; + GPUError("Build Log:\n\n%s\n", build_log.get()); + } + return 1; + } + +#define GPUCA_KRNL(...) \ + GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) +#define GPUCA_KRNL_LOAD_single(x_class, ...) \ + if (AddKernel(false)) { \ + return 1; \ + } +#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ + if (AddKernel(true)) { \ + return 1; \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL +#undef GPUCA_KRNL_LOAD_single +#undef GPUCA_KRNL_LOAD_multi + + return 0; +} + +bool GPUReconstructionOCLBackend::CheckPlatform(uint32_t i) +{ + char platform_version[64] = {}, platform_vendor[64] = {}; + clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); + clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); + float ver1 = 0; + sscanf(platform_version, "OpenCL %f", &ver1); + if (ver1 >= 2.2f) { + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("OpenCL 2.2 capable platform found"); + } + return true; + } + + if (strcmp(platform_vendor, "Advanced Micro Devices, Inc.") == 0 && ver1 >= 2.0f) { + float ver2 = 0; + const char* pos = strchr(platform_version, '('); + if (pos) { + sscanf(pos, "(%f)", &ver2); + } + if ((ver1 >= 2.f && ver2 >= 2000.f) || ver1 >= 2.1f) { + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("AMD ROCm OpenCL Platform found"); + } + return true; + } + } + return false; +} diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h similarity index 81% rename from GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.h rename to GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 6abe1045b550a..4d0c51e65a517 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -18,7 +18,7 @@ #include "GPUReconstructionDeviceBase.h" #ifdef _WIN32 -extern "C" __declspec(dllexport) GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCLconst GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); +extern "C" __declspec(dllexport) GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); #else extern "C" GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); #endif @@ -27,13 +27,14 @@ namespace GPUCA_NAMESPACE::gpu { struct GPUReconstructionOCLInternals; -class GPUReconstructionOCL : public GPUReconstructionDeviceBase +class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase { public: - ~GPUReconstructionOCL() override; - GPUReconstructionOCL(const GPUSettingsDeviceBackend& cfg); + ~GPUReconstructionOCLBackend() override; protected: + GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg); + int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; void UpdateAutomaticProcessingSettings() override; @@ -54,8 +55,6 @@ class GPUReconstructionOCL : public GPUReconstructionDeviceBase void ReleaseEvent(deviceEvent ev) override; void RecordMarker(deviceEvent* ev, int32_t stream) override; - virtual int32_t GetOCLPrograms() = 0; - virtual bool CheckPlatform(uint32_t i) = 0; virtual bool ContextForAllPlatforms() { return false; } template @@ -68,8 +67,17 @@ class GPUReconstructionOCL : public GPUReconstructionDeviceBase gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); GPUReconstructionOCLInternals* mInternals; + + template + int32_t runKernelBackend(const krnlSetupArgs& args); + template + S& getKernelObject(); + + int32_t GetOCLPrograms(); + bool CheckPlatform(uint32_t i); }; +using GPUReconstructionOCL = GPUReconstructionKernels; } // namespace GPUCA_NAMESPACE::gpu #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.cxx deleted file mode 100644 index 3f84ab0f6ac15..0000000000000 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.cxx +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL1.cxx -/// \author David Rohr - -#define GPUCA_GPUTYPE_OPENCL -#define __OPENCL_HOST__ - -#include "GPUReconstructionOCL1.h" -#include "GPUReconstructionOCL1Internals.h" -#include "GPUReconstructionIncludes.h" - -using namespace GPUCA_NAMESPACE::gpu; - -#include -#include -#include -#include - -#include "utils/opencl_obtain_program.h" -#include "utils/qGetLdBinarySymbols.h" -QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCL1Code_bin); - -GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL1(cfg); } - -GPUReconstructionOCL1Backend::GPUReconstructionOCL1Backend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionOCL(cfg) -{ -} - -template -int32_t GPUReconstructionOCL1Backend::runKernelBackend(const krnlSetupArgs& args) -{ - cl_kernel k = args.s.y.num > 1 ? getKernelObject() : getKernelObject(); - return std::apply([this, &args, &k](auto&... vals) { return runKernelBackendInternal(args.s, k, vals...); }, args.v); -} - -template -S& GPUReconstructionOCL1Backend::getKernelObject() -{ - static uint32_t krnl = FindKernel(MULTI ? 2 : 1); - return mInternals->kernels[krnl].first; -} - -int32_t GPUReconstructionOCL1Backend::GetOCLPrograms() -{ - cl_uint count; - if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &count))) { - GPUError("Error getting OPENCL Device Count"); - return (1); - } - - if (_makefiles_opencl_obtain_program_helper(mInternals->context, count, mInternals->devices.get(), &mInternals->program, _binary_GPUReconstructionOCL1Code_bin_start)) { - clReleaseContext(mInternals->context); - GPUError("Could not obtain OpenCL progarm"); - return 1; - } - -#define GPUCA_OPENCL1 -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ - if (AddKernel(false)) { \ - return 1; \ - } -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - if (AddKernel(true)) { \ - return 1; \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL -#undef GPUCA_OPENCL1 -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi - - return 0; -} - -bool GPUReconstructionOCL1Backend::CheckPlatform(uint32_t i) -{ - char platform_version[64] = {}, platform_vendor[64] = {}; - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); - if (strcmp(platform_vendor, "Advanced Micro Devices, Inc.") == 0 && strstr(platform_version, "OpenCL 2.0 AMD-APP (") != nullptr) { - float ver = 0; - sscanf(platform_version, "OpenCL 2.0 AMD-APP (%f)", &ver); - if (ver < 2000.f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("AMD APP OpenCL Platform found"); - } - return true; - } - } - return false; -} diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.h deleted file mode 100644 index c9a3b89a79cd1..0000000000000 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL1.h -/// \author David Rohr - -#ifndef GPURECONSTRUCTIONOCL1_H -#define GPURECONSTRUCTIONOCL1_H - -#include "GPUReconstructionOCL.h" - -#ifdef _WIN32 -extern "C" __declspec(dllexport) GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); -#else -extern "C" GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); -#endif - -namespace GPUCA_NAMESPACE::gpu -{ -struct GPUReconstructionOCL1Internals; - -class GPUReconstructionOCL1Backend : public GPUReconstructionOCL -{ - public: - ~GPUReconstructionOCL1Backend() override = default; - - protected: - GPUReconstructionOCL1Backend(const GPUSettingsDeviceBackend& cfg); - - template - int32_t runKernelBackend(const krnlSetupArgs& args); - template - S& getKernelObject(); - - RecoStepField AvailableGPURecoSteps() override { return (RecoStep::TPCSliceTracking); } - bool ContextForAllPlatforms() override { return true; } - bool CheckPlatform(uint32_t i) override; - int32_t GetOCLPrograms() override; -}; - -using GPUReconstructionOCL1 = GPUReconstructionKernels; -} // namespace GPUCA_NAMESPACE::gpu - -#endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1Internals.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1Internals.h deleted file mode 100644 index 997a108ac26d0..0000000000000 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1Internals.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL1Internals.h -/// \author David Rohr, Sergey Gorbunov - -#ifndef GPUTPCGPUTRACKEROPENCLINTERNALS1_H -#define GPUTPCGPUTRACKEROPENCLINTERNALS1_H - -#include "GPUReconstructionOCLInternals.h" - -namespace GPUCA_NAMESPACE::gpu -{ - -struct GPUReconstructionOCL1Internals : public GPUReconstructionOCLInternals { -}; - -} // namespace GPUCA_NAMESPACE::gpu - -#endif diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCLInternals.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h similarity index 96% rename from GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCLInternals.h rename to GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h index 182bef9f9d739..fdcd7ff7f12c9 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCLInternals.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h @@ -173,7 +173,7 @@ struct GPUReconstructionOCLInternals { }; template -inline int32_t GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args) +inline int32_t GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args) { auto& x = _xyz.x; auto& y = _xyz.y; @@ -208,7 +208,7 @@ inline int32_t GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTim } template -int32_t GPUReconstructionOCL::AddKernel(bool multi) +int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) { std::string name(GetKernelName()); if (multi) { @@ -227,7 +227,7 @@ int32_t GPUReconstructionOCL::AddKernel(bool multi) } template -inline uint32_t GPUReconstructionOCL::FindKernel(int32_t num) +inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) { std::string name(GetKernelName()); if (num > 1) { diff --git a/GPU/GPUTracking/Base/opencl2/CMakeLists.txt b/GPU/GPUTracking/Base/opencl2/CMakeLists.txt deleted file mode 100644 index 5030b7ab3b94f..0000000000000 --- a/GPU/GPUTracking/Base/opencl2/CMakeLists.txt +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -set(MODULE GPUTrackingOCL2) -enable_language(ASM) - -message(STATUS "Building GPUTracking with OpenCL 2 support") - -# convenience variables -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - set(GPUDIR ${CMAKE_SOURCE_DIR}/../) -else() - set(GPUDIR ${CMAKE_SOURCE_DIR}/GPU/GPUTracking) -endif() -set(CL_SRC ${GPUDIR}/Base/opencl-common/GPUReconstructionOCL.cl) -set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCL2Code) - -set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) -if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH}) - set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) -else() -set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) -endif() -set(OCL_DEFINECL "-D$,$-D>" - "-I$,EXCLUDE,^/usr/include/?>,$-I>" - -I${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src - -I${CMAKE_SOURCE_DIR}/Detectors/Base/src - -I${CMAKE_SOURCE_DIR}/DataFormats/Reconstruction/src -) - -set(SRCS GPUReconstructionOCL2.cxx) -set(HDRS GPUReconstructionOCL2.h GPUReconstructionOCL2Internals.h) - -if(OPENCL2_ENABLED_SPIRV) # BUILD OpenCL2 intermediate code for SPIR-V target - # executes clang to create llvm IL code - # Add -fintegrated-objemitter once we switch to clang >= 17 - cmake_path(GET LLVM_SPIRV PARENT_PATH TMP_LLVM_SPIRV_PATH) - add_custom_command( - OUTPUT ${CL_BIN}.spirv - COMMAND ${CMAKE_COMMAND} -E env "PATH=${TMP_LLVM_SPIRV_PATH}:\$$PATH" ${LLVM_CLANG} - -O0 - --target=spirv64 - -fno-integrated-objemitter - -ferror-limit=1000 -Wno-invalid-constexpr -Wno-unused-command-line-argument - ${OCL_FLAGS} - ${OCL_DEFINECL} - -o ${CL_BIN}.spirv -c ${CL_SRC} - MAIN_DEPENDENCY ${CL_SRC} - IMPLICIT_DEPENDS CXX ${CL_SRC} - COMMAND_EXPAND_LISTS - COMMENT "Compiling OpenCL2 CL source file ${CL_SRC} to SPIRV ${CL_BIN}.spirv") - - create_binary_resource(${CL_BIN}.spirv ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.spirv.o) - set(SRCS ${SRCS} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.spirv.o) -endif() - -if(OPENCL2_ENABLED) # BUILD OpenCL2 source code for runtime compilation target - # executes clang to preprocess - add_custom_command( - OUTPUT ${CL_BIN}.src - COMMAND ${LLVM_CLANG} - -Wno-unused-command-line-argument - ${OCL_FLAGS} - ${OCL_DEFINECL} - -cl-no-stdinc - -nostdinc - -E ${CL_SRC} > ${CL_BIN}.src - MAIN_DEPENDENCY ${CL_SRC} - IMPLICIT_DEPENDS CXX ${CL_SRC} - COMMAND_EXPAND_LISTS - COMMENT "Preparing OpenCL2 CL source file for run time compilation ${CL_BIN}.src") - - create_binary_resource(${CL_BIN}.src ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.src.o) - set(SRCS ${SRCS} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.src.o) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "O2") - o2_add_library(${MODULE} - SOURCES ${SRCS} - PUBLIC_LINK_LIBRARIES O2::GPUTrackingOpenCLCommon - TARGETVARNAME targetName) - - target_compile_definitions(${targetName} PRIVATE $) - # the compile_defitions are not propagated automatically on purpose (they are - # declared PRIVATE) so we are not leaking them outside of the GPU** - # directories - - install(FILES ${HDRS} DESTINATION include/GPU) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary("Ali${MODULE}" "" "GPUReconstructionOCL2.h" "${incdirs} .") - - # Generate the ROOT map - generate_rootmap("Ali${MODULE}" "" "") - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} SHARED ${SRCS} G__Ali${MODULE}.cxx) - target_link_libraries(Ali${MODULE} PUBLIC AliGPUTrackingOpenCLCommon) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS} DESTINATION include) - set(targetName Ali${MODULE}) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - add_library(${MODULE} SHARED ${SRCS}) - target_link_libraries(${MODULE} GPUTrackingOpenCLCommon) - install(TARGETS ${MODULE}) - set(targetName ${MODULE}) -endif() - -if(OPENCL2_ENABLED_SPIRV) - target_compile_definitions(${targetName} PRIVATE OPENCL2_ENABLED_SPIRV) -endif() -target_compile_definitions(${targetName} PRIVATE OCL_FLAGS=$) diff --git a/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2.cxx b/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2.cxx deleted file mode 100644 index 435e69e91f5fe..0000000000000 --- a/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2.cxx +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL2.cxx -/// \author David Rohr - -#define GPUCA_GPUTYPE_OPENCL -#define __OPENCL_HOST__ - -#include "GPUReconstructionOCL2.h" -#include "GPUReconstructionOCL2Internals.h" -#include "GPUReconstructionIncludes.h" - -using namespace GPUCA_NAMESPACE::gpu; - -#include -#include -#include -#include - -#include "utils/qGetLdBinarySymbols.h" -QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCL2Code_src); -#ifdef OPENCL2_ENABLED_SPIRV -QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCL2Code_spirv); -#endif - -GPUReconstruction* GPUReconstruction_Create_OCL2(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL2(cfg); } - -GPUReconstructionOCL2Backend::GPUReconstructionOCL2Backend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionOCL(cfg) -{ -} - -template -int32_t GPUReconstructionOCL2Backend::runKernelBackend(const krnlSetupArgs& args) -{ - cl_kernel k = args.s.y.num > 1 ? getKernelObject() : getKernelObject(); - return std::apply([this, &args, &k](auto&... vals) { return runKernelBackendInternal(args.s, k, vals...); }, args.v); -} - -template -S& GPUReconstructionOCL2Backend::getKernelObject() -{ - static uint32_t krnl = FindKernel(MULTI ? 2 : 1); - return mInternals->kernels[krnl].first; -} - -int32_t GPUReconstructionOCL2Backend::GetOCLPrograms() -{ - char platform_version[256] = {}; - GPUFailedMsg(clGetPlatformInfo(mInternals->platform, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr)); - float ver = 0; - sscanf(platform_version, "OpenCL %f", &ver); - - cl_int ocl_error; - - const char* ocl_flags = GPUCA_M_STR(OCL_FLAGS); - -#ifdef OPENCL2_ENABLED_SPIRV // clang-format off - if (ver >= 2.2f && !GetProcessingSettings().oclCompileFromSources) { - GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", ver); - mInternals->program = clCreateProgramWithIL(mInternals->context, _binary_GPUReconstructionOCL2Code_spirv_start, _binary_GPUReconstructionOCL2Code_spirv_len, &ocl_error); - ocl_flags = ""; - } else -#endif // clang-format on - { - GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", ver); - size_t program_sizes[1] = {_binary_GPUReconstructionOCL2Code_src_len}; - char* programs_sources[1] = {_binary_GPUReconstructionOCL2Code_src_start}; - mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error); - } - - if (GPUFailedMsgI(ocl_error)) { - GPUError("Error creating OpenCL program from binary"); - return 1; - } - - if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, ocl_flags, nullptr, nullptr))) { - cl_build_status status; - if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { - size_t log_size; - clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size); - std::unique_ptr build_log(new char[log_size + 1]); - clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, log_size, build_log.get(), nullptr); - build_log[log_size] = 0; - GPUError("Build Log:\n\n%s\n", build_log.get()); - } - return 1; - } - -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ - if (AddKernel(false)) { \ - return 1; \ - } -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - if (AddKernel(true)) { \ - return 1; \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi - - return 0; -} - -bool GPUReconstructionOCL2Backend::CheckPlatform(uint32_t i) -{ - char platform_version[64] = {}, platform_vendor[64] = {}; - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); - float ver1 = 0; - sscanf(platform_version, "OpenCL %f", &ver1); - if (ver1 >= 2.2f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("OpenCL 2.2 capable platform found"); - } - return true; - } - - if (strcmp(platform_vendor, "Advanced Micro Devices, Inc.") == 0 && ver1 >= 2.0f) { - float ver2 = 0; - const char* pos = strchr(platform_version, '('); - if (pos) { - sscanf(pos, "(%f)", &ver2); - } - if ((ver1 >= 2.f && ver2 >= 2000.f) || ver1 >= 2.1f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("AMD ROCm OpenCL Platform found"); - } - return true; - } - } - return false; -} diff --git a/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2.h b/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2.h deleted file mode 100644 index 8ce73df32b701..0000000000000 --- a/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL2.h -/// \author David Rohr - -#ifndef GPURECONSTRUCTIONOCL2_H -#define GPURECONSTRUCTIONOCL2_H - -#include "GPUReconstructionOCL.h" - -#ifdef _WIN32 -extern "C" __declspec(dllexport) GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL2(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); -#else -extern "C" GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL2(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); -#endif - -namespace GPUCA_NAMESPACE::gpu -{ -struct GPUReconstructionOCL2Internals; - -class GPUReconstructionOCL2Backend : public GPUReconstructionOCL -{ - public: - ~GPUReconstructionOCL2Backend() override = default; - - protected: - GPUReconstructionOCL2Backend(const GPUSettingsDeviceBackend& cfg); - - template - int32_t runKernelBackend(const krnlSetupArgs& args); - template - S& getKernelObject(); - - int32_t GetOCLPrograms() override; - bool CheckPlatform(uint32_t i) override; -}; - -using GPUReconstructionOCL2 = GPUReconstructionKernels; -} // namespace GPUCA_NAMESPACE::gpu - -#endif diff --git a/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2Internals.h b/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2Internals.h deleted file mode 100644 index 8debdc47be8e8..0000000000000 --- a/GPU/GPUTracking/Base/opencl2/GPUReconstructionOCL2Internals.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL2Internals.h -/// \author David Rohr, Sergey Gorbunov - -#ifndef GPUTPCGPUTRACKEROPENCLINTERNALS2_H -#define GPUTPCGPUTRACKEROPENCLINTERNALS2_H - -#include "GPUReconstructionOCLInternals.h" - -namespace GPUCA_NAMESPACE::gpu -{ - -struct GPUReconstructionOCL2Internals : public GPUReconstructionOCLInternals { -}; - -} // namespace GPUCA_NAMESPACE::gpu - -#endif diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index dd3480cae86bd..b04a8c12dd598 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -22,7 +22,7 @@ endif() include(cmake/helpers.cmake) if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - if(ENABLE_CUDA OR ENABLE_OPENCL1 OR ENABLE_OPENCL2 OR ENABLE_HIP) + if(ENABLE_CUDA OR ENABLE_OPENCL OR ENABLE_HIP) include(FeatureSummary) find_package(O2GPU) else() @@ -137,7 +137,6 @@ set(HDRS_INSTALL Definitions/GPUDefGPUParameters.h Definitions/GPUDef.h Definitions/GPUDefMacros.h - Definitions/GPUDefOpenCL12Templates.h Definitions/GPULogging.h Definitions/GPUSettingsList.h Global/GPUChainTrackingDefs.h @@ -414,9 +413,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/utils ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/cuda ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/hip - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/opencl-common ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/opencl - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/opencl2 ${CMAKE_SOURCE_DIR}/GPU/TPCFastTransformation) alice_usevc() @@ -523,22 +520,16 @@ endif() target_compile_options(${targetName} PRIVATE -Wno-instantiation-after-specialization) # Add CMake recipes for GPU Tracking librararies -if(CUDA_ENABLED OR OPENCL1_ENABLED OR OPENCL2_ENABLED OR HIP_ENABLED) +if(CUDA_ENABLED OR OPENCL_ENABLED OR HIP_ENABLED) if(CMAKE_SYSTEM_NAME MATCHES Darwin) message(WARNING "GPU Tracking disabled on MacOS") else() if(CUDA_ENABLED) add_subdirectory(Base/cuda) endif() - if(OPENCL1_ENABLED OR OPENCL2_ENABLED) - add_subdirectory(Base/opencl-common) - endif() - if(OPENCL1_ENABLED) + if(OPENCL_ENABLED) add_subdirectory(Base/opencl) endif() - if(OPENCL2_ENABLED) - add_subdirectory(Base/opencl2) - endif() if(HIP_ENABLED) add_subdirectory(Base/hip) endif() diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h index 152bb67daacc5..5781984b33222 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h +++ b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h @@ -61,17 +61,17 @@ class CalibdEdxContainer : public o2::gpu::FlatObject public: /// Default constructor: creates an empty uninitialized object #ifndef GPUCA_GPUCODE - CalibdEdxContainer() CON_DEFAULT; + CalibdEdxContainer() = default; #endif /// Copy constructor: disabled to avoid ambiguity. Use cloneFromObject() instead - CalibdEdxContainer(const CalibdEdxContainer&) CON_DELETE; + CalibdEdxContainer(const CalibdEdxContainer&) = delete; /// Assignment operator: disabled to avoid ambiguity. Use cloneFromObject() instead - CalibdEdxContainer& operator=(const CalibdEdxContainer&) CON_DELETE; + CalibdEdxContainer& operator=(const CalibdEdxContainer&) = delete; /// Destructor - ~CalibdEdxContainer() CON_DEFAULT; + ~CalibdEdxContainer() = default; /// \return returns the topology correction for the cluster charge /// \param region region of the TPC diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h index ff053e1f4bf48..20d53ff80a9c8 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h @@ -54,10 +54,10 @@ class CalibdEdxTrackTopologyPol : public o2::gpu::FlatObject /// \parma name name of the object CalibdEdxTrackTopologyPol(std::string_view fileName, std::string_view name = "CalibdEdxTrackTopologyPol") { loadFromFile(fileName.data(), name.data()); }; /// Default constructor: creates an empty uninitialized object - CalibdEdxTrackTopologyPol() CON_DEFAULT; + CalibdEdxTrackTopologyPol() = default; /// destructor - ~CalibdEdxTrackTopologyPol() CON_DEFAULT; + ~CalibdEdxTrackTopologyPol() = default; #endif #ifdef GPUCA_HAVE_O2HEADERS diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h index d9d4b9e35592d..9d7cc1d3b8dfc 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h @@ -76,7 +76,7 @@ class CalibdEdxTrackTopologySpline : public o2::gpu::FlatObject #if !defined(GPUCA_GPUCODE) /// Default constructor - CalibdEdxTrackTopologySpline() CON_DEFAULT; + CalibdEdxTrackTopologySpline() = default; /// constructor with initialization of the splines from file /// \param dEdxSplinesFile path to root file containing the splines @@ -92,13 +92,13 @@ class CalibdEdxTrackTopologySpline : public o2::gpu::FlatObject #else /// Disable constructors for the GPU implementation - CalibdEdxTrackTopologySpline() CON_DELETE; - CalibdEdxTrackTopologySpline(const CalibdEdxTrackTopologySpline&) CON_DELETE; - CalibdEdxTrackTopologySpline& operator=(const CalibdEdxTrackTopologySpline&) CON_DELETE; + CalibdEdxTrackTopologySpline() = delete; + CalibdEdxTrackTopologySpline(const CalibdEdxTrackTopologySpline&) = delete; + CalibdEdxTrackTopologySpline& operator=(const CalibdEdxTrackTopologySpline&) = delete; #endif /// Destructor - ~CalibdEdxTrackTopologySpline() CON_DEFAULT; + ~CalibdEdxTrackTopologySpline() = default; /// _____________ FlatObject functionality, see FlatObject class for description ____________ diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index d3b88f0239c7b..d252bb39857c2 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -21,11 +21,8 @@ // Please add complex data types required on the host but not GPU to GPUHostDataTypes.h and forward-declare! #ifndef GPUCA_GPUCODE_DEVICE #include -#ifdef GPUCA_NOCOMPAT_ALLOPENCL -#include #endif -#endif -#ifdef GPUCA_NOCOMPAT +#include "GPUCommonTypeTraits.h" #include "GPUTRDDef.h" struct AliHLTTPCClusterMCLabel; @@ -43,7 +40,6 @@ namespace constants } // namespace constants } // namespace tpc } // namespace o2 -#endif namespace o2 { @@ -58,13 +54,9 @@ class MatLayerCylSet; } // namespace base namespace track { -#ifdef GPUCA_NOCOMPAT template class TrackParametrizationWithError; using TrackParCov = TrackParametrizationWithError; -#else -class TrackParCov; -#endif } // namespace track namespace trd { @@ -114,20 +106,11 @@ namespace GPUCA_NAMESPACE { namespace gpu { -#ifdef GPUCA_NOCOMPAT_ALLOPENCL #include "utils/bitfield.h" #define ENUM_CLASS class #define ENUM_UINT : uint32_t #define GPUCA_RECO_STEP GPUDataTypes::RecoStep -#else -#define ENUM_CLASS -#define ENUM_UINT -#define GPUCA_RECO_STEP GPUDataTypes -#endif -#if defined(__OPENCL1__) -MEM_CLASS_PRE() // Macro with some template magic for OpenCL 1.2 -#endif class GPUTPCTrack; class GPUTPCHitId; class GPUTPCGMMergedTrack; @@ -150,8 +133,7 @@ class GPUDataTypes CPU = 1, CUDA = 2, HIP = 3, - OCL = 4, - OCL2 = 5 }; + OCL = 4 }; enum ENUM_CLASS GeneralStep { Prepare = 1, QA = 2 }; @@ -176,32 +158,25 @@ class GPUDataTypes TPCRaw = 64, ITSClusters = 128, ITSTracks = 256 }; - -#ifdef GPUCA_NOCOMPAT_ALLOPENCL - static constexpr const char* const DEVICE_TYPE_NAMES[] = {"INVALID", "CPU", "CUDA", "HIP", "OCL", "OCL2"}; +#ifndef __OPENCL__ + static constexpr const char* const DEVICE_TYPE_NAMES[] = {"INVALID", "CPU", "CUDA", "HIP", "OCL"}; static constexpr const char* const RECO_STEP_NAMES[] = {"TPC Transformation", "TPC Sector Tracking", "TPC Track Merging and Fit", "TPC Compression", "TRD Tracking", "ITS Tracking", "TPC dEdx Computation", "TPC Cluster Finding", "TPC Decompression", "Global Refit"}; static constexpr const char* const GENERAL_STEP_NAMES[] = {"Prepare", "QA"}; - typedef bitfield RecoStepField; - typedef bitfield InOutTypeField; constexpr static int32_t N_RECO_STEPS = sizeof(GPUDataTypes::RECO_STEP_NAMES) / sizeof(GPUDataTypes::RECO_STEP_NAMES[0]); constexpr static int32_t N_GENERAL_STEPS = sizeof(GPUDataTypes::GENERAL_STEP_NAMES) / sizeof(GPUDataTypes::GENERAL_STEP_NAMES[0]); #endif -#ifdef GPUCA_NOCOMPAT + typedef bitfield RecoStepField; + typedef bitfield InOutTypeField; static constexpr uint32_t NSLICES = 36; -#endif static DeviceType GetDeviceType(const char* type); }; -#ifdef GPUCA_NOCOMPAT_ALLOPENCL struct GPURecoStepConfiguration { GPUDataTypes::RecoStepField steps = 0; GPUDataTypes::RecoStepField stepsGPUMask = GPUDataTypes::RecoStep::AllRecoSteps; GPUDataTypes::InOutTypeField inputs = 0; GPUDataTypes::InOutTypeField outputs = 0; }; -#endif - -#ifdef GPUCA_NOCOMPAT template struct DefaultPtr { @@ -347,12 +322,6 @@ struct GPUTrackingInOutPointers { // Common const GPUSettingsTF* settingsTF = nullptr; }; -#else -struct GPUTrackingInOutPointers { -}; -struct GPUCalibObjectsConst { -}; -#endif #undef ENUM_CLASS #undef ENUM_UINT diff --git a/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h b/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h index 1015b31fe6556..810e4dd58ca0e 100644 --- a/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h @@ -17,7 +17,7 @@ // Pull in several O2 headers with basic data types, or load a header with empty fake classes if O2 headers not available -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(__OPENCL1__) +#if defined(GPUCA_HAVE_O2HEADERS) #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/Digit.h" #include "DetectorsBase/MatLayerCylSet.h" @@ -27,8 +27,6 @@ #include "GPUO2FakeClasses.h" #endif -#if !defined(__OPENCL1__) #include "GPUdEdxInfo.h" -#endif #endif diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index b967a7ce42620..499287dc3200d 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -45,12 +45,9 @@ class GPUSettings RejectionStrategyA = 1, RejectionStrategyB = 2 }; -#if !defined(__OPENCL1__) static CONSTEXPR const uint32_t TPC_MAX_TF_TIME_BIN = ((256 * 3564 + 2 * 8 - 2) / 8); -#endif }; -#ifdef GPUCA_NOCOMPAT // Settings describing the global run parameters struct GPUSettingsGRP { // All new members must be sizeof(int32_t) resp. sizeof(float) for alignment reasons!, default value for newly added members for old data will be 0. @@ -81,7 +78,6 @@ struct GPUSettingsDeviceBackend { uint8_t forceDeviceType = 1; // Fail if device initialization fails, otherwise falls back to CPU GPUReconstruction* master = nullptr; // GPUReconstruction master object }; -#endif } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx index cb367a0f4b416..37d32ed4c1bc5 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx @@ -29,9 +29,7 @@ void GPUTPCGMPolynomialField::Print() const const double kCLight = gpu_common_constants::kCLight; typedef std::numeric_limits flt; cout << std::scientific; -#if __cplusplus >= 201103L cout << std::setprecision(flt::max_digits10 + 2); -#endif cout << " nominal field " << mNominalBz << " [kG * (2.99792458E-4 GeV/c/kG/cm)]" << " == " << mNominalBz / kCLight << " [kG]" << endl; diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h index 09193e76b9382..88294b2b06c25 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h @@ -29,7 +29,6 @@ namespace gpu class GPUTPCGMPolynomialField { public: -#if !defined(__OPENCL1__) GPUTPCGMPolynomialField() : mNominalBz(0.f) { Reset(); @@ -75,11 +74,6 @@ class GPUTPCGMPolynomialField const float* GetCoefmItsBx() const { return mItsBx; } const float* GetCoefmItsBy() const { return mItsBy; } const float* GetCoefmItsBz() const { return mItsBz; } -#else -#define NTPCM 10 -#define NTRDM 20 -#define NITSM 10 -#endif private: float mNominalBz; // nominal constant field value in [kG * 2.99792458E-4 GeV/c/cm] @@ -94,8 +88,6 @@ class GPUTPCGMPolynomialField float mItsBz[NITSM]; }; -#if !defined(__OPENCL1__) - inline void GPUTPCGMPolynomialField::Reset() { mNominalBz = 0.f; @@ -297,7 +289,6 @@ GPUdi() float GPUTPCGMPolynomialField::GetFieldItsBz(float x, float y, float z) return bz; } -#endif // __OPENCL__ } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index 515905abe48b5..fcafa34547828 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -34,38 +34,33 @@ namespace gpu // Should be unified, but cannot take the contants from the official headers for now, since we want it to be constexpr class GPUTPCGeometry // TODO: Make values constexpr { -#if defined(__OPENCL1__) - GPUTPCGeometry(); // Fake constructor declaration for OpenCL due to static members, does not exist! -#endif #ifdef GPUCA_TPC_GEOMETRY_O2 - const float mX[GPUCA_ROW_COUNT] GPUCA_CPP11_INIT(= {85.225f, 85.975f, 86.725f, 87.475f, 88.225f, 88.975f, 89.725f, 90.475f, 91.225f, 91.975f, 92.725f, 93.475f, 94.225f, 94.975f, 95.725f, 96.475f, 97.225f, 97.975f, 98.725f, 99.475f, 100.225f, 100.975f, - 101.725f, 102.475f, 103.225f, 103.975f, 104.725f, 105.475f, 106.225f, 106.975f, 107.725f, 108.475f, 109.225f, 109.975f, 110.725f, 111.475f, 112.225f, 112.975f, 113.725f, 114.475f, 115.225f, 115.975f, 116.725f, 117.475f, - 118.225f, 118.975f, 119.725f, 120.475f, 121.225f, 121.975f, 122.725f, 123.475f, 124.225f, 124.975f, 125.725f, 126.475f, 127.225f, 127.975f, 128.725f, 129.475f, 130.225f, 130.975f, 131.725f, 135.2f, 136.2f, 137.2f, - 138.2f, 139.2f, 140.2f, 141.2f, 142.2f, 143.2f, 144.2f, 145.2f, 146.2f, 147.2f, 148.2f, 149.2f, 150.2f, 151.2f, 152.2f, 153.2f, 154.2f, 155.2f, 156.2f, 157.2f, 158.2f, 159.2f, - 160.2f, 161.2f, 162.2f, 163.2f, 164.2f, 165.2f, 166.2f, 167.2f, 168.2f, 171.4f, 172.6f, 173.8f, 175.f, 176.2f, 177.4f, 178.6f, 179.8f, 181.f, 182.2f, 183.4f, 184.6f, 185.8f, - 187.f, 188.2f, 189.4f, 190.6f, 191.8f, 193.f, 194.2f, 195.4f, 196.6f, 197.8f, 199.f, 200.2f, 201.4f, 202.6f, 203.8f, 205.f, 206.2f, 209.65f, 211.15f, 212.65f, 214.15f, 215.65f, - 217.15f, 218.65f, 220.15f, 221.65f, 223.15f, 224.65f, 226.15f, 227.65f, 229.15f, 230.65f, 232.15f, 233.65f, 235.15f, 236.65f, 238.15f, 239.65f, 241.15f, 242.65f, 244.15f, 245.65f}); - - const uint8_t mNPads[GPUCA_ROW_COUNT] GPUCA_CPP11_INIT(= {66, 66, 66, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, - 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 92, 92, 92, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, - 82, 84, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 102, 102, 102, 104, 104, 104, 106, 110, - 110, 112, 112, 112, 114, 114, 114, 116, 116, 116, 118, 118, 118, 118, 118, 120, 120, 122, 122, 124, 124, 124, 126, 126, 128, 128, 128, 130, 130, 132, 132, 132, 134, 134, 136, 136, 138, 138}); - - const uint8_t mRegion[GPUCA_ROW_COUNT] GPUCA_CPP11_INIT(= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}); - const uint8_t mRegionRows[10] GPUCA_CPP11_INIT(= {17, 15, 16, 15, 18, 16, 16, 14, 13, 12}); - const uint8_t mRegionStart[10] GPUCA_CPP11_INIT(= {0, 17, 32, 48, 63, 81, 97, 113, 127, 140}); - - const uint8_t mSampaMapping[10] GPUCA_CPP11_INIT(= {0, 0, 1, 1, 2, 3, 3, 4, 4, 2}); - const uint8_t mChannelOffset[10] GPUCA_CPP11_INIT(= {0, 16, 0, 16, 0, 0, 16, 0, 16, 16}); - const uint8_t mSectorFECOffset[5] GPUCA_CPP11_INIT(= {0, 15, 15 + 18, 15 + 18 + 18, 15 + 18 + 18 + 20}); - - const float mPadHeight[10] GPUCA_CPP11_INIT(= {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}); - const float mPadWidth[10] GPUCA_CPP11_INIT(= {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}); - -#if !defined(__OPENCL1__) - static CONSTEXPR float FACTOR_T2Z GPUCA_CPP11_INIT(= 250.f / 512.f); // Used in compression, must remain constant at 250cm, 512 time bins! -#endif + const float mX[GPUCA_ROW_COUNT] = {85.225f, 85.975f, 86.725f, 87.475f, 88.225f, 88.975f, 89.725f, 90.475f, 91.225f, 91.975f, 92.725f, 93.475f, 94.225f, 94.975f, 95.725f, 96.475f, 97.225f, 97.975f, 98.725f, 99.475f, 100.225f, 100.975f, + 101.725f, 102.475f, 103.225f, 103.975f, 104.725f, 105.475f, 106.225f, 106.975f, 107.725f, 108.475f, 109.225f, 109.975f, 110.725f, 111.475f, 112.225f, 112.975f, 113.725f, 114.475f, 115.225f, 115.975f, 116.725f, 117.475f, + 118.225f, 118.975f, 119.725f, 120.475f, 121.225f, 121.975f, 122.725f, 123.475f, 124.225f, 124.975f, 125.725f, 126.475f, 127.225f, 127.975f, 128.725f, 129.475f, 130.225f, 130.975f, 131.725f, 135.2f, 136.2f, 137.2f, + 138.2f, 139.2f, 140.2f, 141.2f, 142.2f, 143.2f, 144.2f, 145.2f, 146.2f, 147.2f, 148.2f, 149.2f, 150.2f, 151.2f, 152.2f, 153.2f, 154.2f, 155.2f, 156.2f, 157.2f, 158.2f, 159.2f, + 160.2f, 161.2f, 162.2f, 163.2f, 164.2f, 165.2f, 166.2f, 167.2f, 168.2f, 171.4f, 172.6f, 173.8f, 175.f, 176.2f, 177.4f, 178.6f, 179.8f, 181.f, 182.2f, 183.4f, 184.6f, 185.8f, + 187.f, 188.2f, 189.4f, 190.6f, 191.8f, 193.f, 194.2f, 195.4f, 196.6f, 197.8f, 199.f, 200.2f, 201.4f, 202.6f, 203.8f, 205.f, 206.2f, 209.65f, 211.15f, 212.65f, 214.15f, 215.65f, + 217.15f, 218.65f, 220.15f, 221.65f, 223.15f, 224.65f, 226.15f, 227.65f, 229.15f, 230.65f, 232.15f, 233.65f, 235.15f, 236.65f, 238.15f, 239.65f, 241.15f, 242.65f, 244.15f, 245.65f}; + + const uint8_t mNPads[GPUCA_ROW_COUNT] = {66, 66, 66, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, + 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 92, 92, 92, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, + 82, 84, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 102, 102, 102, 104, 104, 104, 106, 110, + 110, 112, 112, 112, 114, 114, 114, 116, 116, 116, 118, 118, 118, 118, 118, 120, 120, 122, 122, 124, 124, 124, 126, 126, 128, 128, 128, 130, 130, 132, 132, 132, 134, 134, 136, 136, 138, 138}; + + const uint8_t mRegion[GPUCA_ROW_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}; + const uint8_t mRegionRows[10] = {17, 15, 16, 15, 18, 16, 16, 14, 13, 12}; + const uint8_t mRegionStart[10] = {0, 17, 32, 48, 63, 81, 97, 113, 127, 140}; + + const uint8_t mSampaMapping[10] = {0, 0, 1, 1, 2, 3, 3, 4, 4, 2}; + const uint8_t mChannelOffset[10] = {0, 16, 0, 16, 0, 0, 16, 0, 16, 16}; + const uint8_t mSectorFECOffset[5] = {0, 15, 15 + 18, 15 + 18 + 18, 15 + 18 + 18 + 20}; + + const float mPadHeight[10] = {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}; + const float mPadWidth[10] = {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}; + + static CONSTEXPR float FACTOR_T2Z = 250.f / 512.f; // Used in compression, must remain constant at 250cm, 512 time bins! public: GPUd() int32_t GetRegion(int32_t row) const { return mRegion[row]; } @@ -79,25 +74,23 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() int32_t EndOROC1() const { return 97; } GPUd() int32_t EndOROC2() const { return 127; } #else - const float mX[GPUCA_ROW_COUNT] GPUCA_CPP11_INIT(= {85.195f, 85.945f, 86.695f, 87.445f, 88.195f, 88.945f, 89.695f, 90.445f, 91.195f, 91.945f, 92.695f, 93.445f, 94.195f, 94.945f, 95.695f, 96.445f, 97.195f, 97.945f, 98.695f, 99.445f, 100.195f, 100.945f, 101.695f, - 102.445f, 103.195f, 103.945f, 104.695f, 105.445f, 106.195f, 106.945f, 107.695f, 108.445f, 109.195f, 109.945f, 110.695f, 111.445f, 112.195f, 112.945f, 113.695f, 114.445f, 115.195f, 115.945f, 116.695f, 117.445f, 118.195f, 118.945f, - 119.695f, 120.445f, 121.195f, 121.945f, 122.695f, 123.445f, 124.195f, 124.945f, 125.695f, 126.445f, 127.195f, 127.945f, 128.695f, 129.445f, 130.195f, 130.945f, 131.695f, 135.180f, 136.180f, 137.180f, 138.180f, 139.180f, 140.180f, - 141.180f, 142.180f, 143.180f, 144.180f, 145.180f, 146.180f, 147.180f, 148.180f, 149.180f, 150.180f, 151.180f, 152.180f, 153.180f, 154.180f, 155.180f, 156.180f, 157.180f, 158.180f, 159.180f, 160.180f, 161.180f, 162.180f, 163.180f, - 164.180f, 165.180f, 166.180f, 167.180f, 168.180f, 169.180f, 170.180f, 171.180f, 172.180f, 173.180f, 174.180f, 175.180f, 176.180f, 177.180f, 178.180f, 179.180f, 180.180f, 181.180f, 182.180f, 183.180f, 184.180f, 185.180f, 186.180f, - 187.180f, 188.180f, 189.180f, 190.180f, 191.180f, 192.180f, 193.180f, 194.180f, 195.180f, 196.180f, 197.180f, 198.180f, 199.430f, 200.930f, 202.430f, 203.930f, 205.430f, 206.930f, 208.430f, 209.930f, 211.430f, 212.930f, 214.430f, - 215.930f, 217.430f, 218.930f, 220.430f, 221.930f, 223.430f, 224.930f, 226.430f, 227.930f, 229.430f, 230.930f, 232.430f, 233.930f, 235.430f, 236.930f, 238.430f, 239.930f, 241.430f, 242.930f, 244.430f, 245.930f}); - - const uint8_t mNPads[GPUCA_ROW_COUNT] GPUCA_CPP11_INIT(= {68, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 92, 92, 92, - 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 108, 108, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, 82, 84, 84, - 84, 86, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 96, 96, 96, 96, 98, 98, 98, 100, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 106, 108, - 108, 108, 110, 110, 110, 110, 112, 112, 114, 114, 114, 116, 116, 118, 118, 120, 120, 122, 122, 122, 124, 124, 126, 126, 128, 128, 130, 130, 130, 132, 132, 134, 134, 136, 136, 138, 138, 138, 140}); - - const float mPadHeight[3] GPUCA_CPP11_INIT(= {.75f, 1.f, 1.5f}); - const float mPadWidth[3] GPUCA_CPP11_INIT(= {.4f, .6f, .6f}); - -#if !defined(__OPENCL1__) - static CONSTEXPR float FACTOR_T2Z GPUCA_CPP11_INIT(= 250.f / 1024.f); // Used in compression, must remain constant at 250cm, 1024 time bins! -#endif + const float mX[GPUCA_ROW_COUNT] = {85.195f, 85.945f, 86.695f, 87.445f, 88.195f, 88.945f, 89.695f, 90.445f, 91.195f, 91.945f, 92.695f, 93.445f, 94.195f, 94.945f, 95.695f, 96.445f, 97.195f, 97.945f, 98.695f, 99.445f, 100.195f, 100.945f, 101.695f, + 102.445f, 103.195f, 103.945f, 104.695f, 105.445f, 106.195f, 106.945f, 107.695f, 108.445f, 109.195f, 109.945f, 110.695f, 111.445f, 112.195f, 112.945f, 113.695f, 114.445f, 115.195f, 115.945f, 116.695f, 117.445f, 118.195f, 118.945f, + 119.695f, 120.445f, 121.195f, 121.945f, 122.695f, 123.445f, 124.195f, 124.945f, 125.695f, 126.445f, 127.195f, 127.945f, 128.695f, 129.445f, 130.195f, 130.945f, 131.695f, 135.180f, 136.180f, 137.180f, 138.180f, 139.180f, 140.180f, + 141.180f, 142.180f, 143.180f, 144.180f, 145.180f, 146.180f, 147.180f, 148.180f, 149.180f, 150.180f, 151.180f, 152.180f, 153.180f, 154.180f, 155.180f, 156.180f, 157.180f, 158.180f, 159.180f, 160.180f, 161.180f, 162.180f, 163.180f, + 164.180f, 165.180f, 166.180f, 167.180f, 168.180f, 169.180f, 170.180f, 171.180f, 172.180f, 173.180f, 174.180f, 175.180f, 176.180f, 177.180f, 178.180f, 179.180f, 180.180f, 181.180f, 182.180f, 183.180f, 184.180f, 185.180f, 186.180f, + 187.180f, 188.180f, 189.180f, 190.180f, 191.180f, 192.180f, 193.180f, 194.180f, 195.180f, 196.180f, 197.180f, 198.180f, 199.430f, 200.930f, 202.430f, 203.930f, 205.430f, 206.930f, 208.430f, 209.930f, 211.430f, 212.930f, 214.430f, + 215.930f, 217.430f, 218.930f, 220.430f, 221.930f, 223.430f, 224.930f, 226.430f, 227.930f, 229.430f, 230.930f, 232.430f, 233.930f, 235.430f, 236.930f, 238.430f, 239.930f, 241.430f, 242.930f, 244.430f, 245.930f}; + + const uint8_t mNPads[GPUCA_ROW_COUNT] = {68, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 92, 92, 92, + 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 108, 108, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, 82, 84, 84, + 84, 86, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 96, 96, 96, 96, 98, 98, 98, 100, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 106, 108, + 108, 108, 110, 110, 110, 110, 112, 112, 114, 114, 114, 116, 116, 118, 118, 120, 120, 122, 122, 122, 124, 124, 126, 126, 128, 128, 130, 130, 130, 132, 132, 134, 134, 136, 136, 138, 138, 138, 140}; + + const float mPadHeight[3] = {.75f, 1.f, 1.5f}; + const float mPadWidth[3] = {.4f, .6f, .6f}; + + static CONSTEXPR float FACTOR_T2Z = 250.f / 1024.f; // Used in compression, must remain constant at 250cm, 1024 time bins! public: GPUd() int32_t GetRegion(int32_t row) const { return (row < 63 ? 0 : row < 63 + 64 ? 1 : 2); } @@ -109,9 +102,8 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() int32_t EndOROC2() const { return GPUCA_ROW_COUNT; } #endif private: -#if !defined(__OPENCL1__) - static CONSTEXPR float FACTOR_Z2T GPUCA_CPP11_INIT(= 1.f / FACTOR_T2Z); -#endif + static CONSTEXPR float FACTOR_Z2T = 1.f / FACTOR_T2Z; + public: GPUd() static CONSTEXPR float TPCLength() { return 250.f - 0.275f; } GPUd() float Row2X(int32_t row) const { return (mX[row]); } @@ -120,7 +112,6 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() float PadWidth(int32_t row) const { return (mPadWidth[GetRegion(row)]); } GPUd() uint8_t NPads(int32_t row) const { return mNPads[row]; } -#if !defined(__OPENCL1__) GPUd() float LinearPad2Y(int32_t slice, int32_t row, float pad) const { const float u = (pad - 0.5f * mNPads[row]) * PadWidth(row); @@ -144,7 +135,6 @@ class GPUTPCGeometry // TODO: Make values constexpr const float v = (slice >= GPUCA_NSLICES / 2) ? -z : z; return (250.f - v) * FACTOR_Z2T; // Used in compression, must remain constant at 250cm } -#endif }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 437dd32154beb..21080499f6443 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -36,16 +36,6 @@ class GlobalTrackID; } // namespace o2 //_____________________________________________________________________________ -#if (defined(__CINT__) || defined(__ROOTCINT__)) && !defined(__CLING__) -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -template -class GPUTRDTrack_t; -} // namespace gpu -} // namespace GPUCA_NAMESPACE -#else #if (!defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB)) || defined(GPUCA_HAVE_O2HEADERS) #include "GPUTRDInterfaceO2Track.h" #endif @@ -150,6 +140,4 @@ class GPUTRDTrack_t : public T } // namespace gpu } // namespace GPUCA_NAMESPACE -#endif // !((defined(__CINT__) || defined(__ROOTCINT__)) && !defined(__CLING__)) - #endif // GPUTRDTRACK_H diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index 38784b1ded80e..f01e3e6d38332 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -19,13 +19,12 @@ #include "GPUCommonDef.h" #include "GPUDefConstantsAndSettings.h" #include "GPUDefGPUParameters.h" -#include "GPUDefOpenCL12Templates.h" #include "GPUCommonRtypes.h" // Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects) #ifdef __OPENCL__ #define GPUPtr1(a, b) uint64_t b - #ifdef __OPENCLCPP__ + #ifdef __OPENCL__ #define GPUPtr2(a, b) ((__generic a) (a) b) #else #define GPUPtr2(a, b) ((__global a) (a) b) @@ -42,7 +41,7 @@ #endif #ifdef GPUCA_GPUCODE - #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUsharedref() MEM_LOCAL(vartype) & __restrict__ varname = varshared; + #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUsharedref() vartype& __restrict__ varname = varshared; #define CA_SHARED_STORAGE(storage) storage #define CA_SHARED_CACHE(target, src, size) \ static_assert((size) % sizeof(int32_t) == 0, "Invalid shared cache size"); \ @@ -53,7 +52,7 @@ CA_SHARED_CACHE(target, src, size) \ GPUsharedref() const reftype* __restrict__ ref = (target) #else - #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUglobalref() MEM_GLOBAL(vartype) & __restrict__ varname = varglobal; + #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUglobalref() vartype & __restrict__ varname = varglobal; #define CA_SHARED_STORAGE(storage) #define CA_SHARED_CACHE(target, src, size) #define CA_SHARED_CACHE_REF(target, src, size, reftype, ref) GPUglobalref() const reftype* __restrict__ ref = src diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 7693ee8553b77..1c8134f11efda 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -51,7 +51,7 @@ #if defined(GPUCA_NSLICES) || defined(GPUCA_ROW_COUNT) #error GPUCA_NSLICES or GPUCA_ROW_COUNT already defined, do not include GPUTPCGeometry.h before! #endif -#if defined(GPUCA_HAVE_O2HEADERS) && defined(GPUCA_TPC_GEOMETRY_O2) && !defined(__OPENCL1__) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) +#if defined(GPUCA_HAVE_O2HEADERS) && defined(GPUCA_TPC_GEOMETRY_O2) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) //Use definitions from the O2 headers if available for nicer code and type safety #include "DataFormatsTPC/Constants.h" #define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 3852d37f6facf..7cd41e1a4f846 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -617,13 +617,11 @@ // #define GPUCA_KERNEL_DEBUGGER_OUTPUT // Some assertions to make sure out parameters are not invalid -#ifdef GPUCA_NOCOMPAT static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); #ifdef GPUCA_GPUCODE static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); #endif -#endif // Derived parameters #ifdef GPUCA_USE_TEXTURES diff --git a/GPU/GPUTracking/Definitions/GPUDefOpenCL12Templates.h b/GPU/GPUTracking/Definitions/GPUDefOpenCL12Templates.h deleted file mode 100644 index f65e670399f34..0000000000000 --- a/GPU/GPUTracking/Definitions/GPUDefOpenCL12Templates.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUDefOpenCL12Templates.h -/// \author David Rohr, Sergey Gorbunov - -// clang-format off -#ifndef GPUDEFOPENCL12TEMPLATES_H -#define GPUDEFOPENCL12TEMPLATES_H - -// Special macros for OpenCL rev. 1.2 (encode address space in template parameter) -enum LocalOrGlobal { Mem_Local, Mem_Global, Mem_Constant, Mem_Plain }; -#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_USE_TEMPLATE_ADDRESS_SPACES) - template struct MakeTypeHelper; - template struct MakeTypeHelper { typedef L type; }; - template struct MakeTypeHelper { typedef G type; }; - template struct MakeTypeHelper { typedef C type; }; - template struct MakeTypeHelper { typedef P type; }; - #define MakeType(base_type) typename MakeTypeHelper::type - #define MEM_CLASS_PRE() template - #define MEM_CLASS_PRE_TEMPLATE(t) template - #define MEM_LG(type) type - #define MEM_CLASS_PRE2() template - #define MEM_CLASS_PRE2_TEMPLATE(t) template - #define MEM_LG2(type) type - #define MEM_CLASS_PRE12() template template - #define MEM_CLASS_PRE23() template - #define MEM_LG3(type) type - #define MEM_CLASS_PRE234() template - #define MEM_LG4(type) type - #define MEM_GLOBAL(type) type - #define MEM_LOCAL(type) type - #define MEM_LOCAL_TEMPLATE(type, t) type - #define MEM_CONSTANT(type) type - #define MEM_PLAIN(type) type - #define MEM_TEMPLATE() template - #define MEM_TYPE(type) T - #define MEM_TEMPLATE2() template - #define MEM_TYPE2(type) T2 - #define MEM_TEMPLATE3() template - #define MEM_TYPE3(type) T3 - #define MEM_TEMPLATE4() template - #define MEM_TYPE4(type) T4 -#else - #define MakeType(base_type) base_type - #define MEM_CLASS_PRE() - #define MEM_CLASS_PRE_TEMPLATE(t) template - #define MEM_LG(type) type - #define MEM_CLASS_PRE2() - #define MEM_CLASS_PRE2_TEMPLATE(t) template - #define MEM_LG2(type) type - #define MEM_CLASS_PRE12() - #define MEM_CLASS_PRE23() - #define MEM_LG3(type) type - #define MEM_CLASS_PRE234() - #define MEM_LG4(type) type - #define MEM_GLOBAL(type) type - #define MEM_LOCAL(type) type - #define MEM_LOCAL_TEMPLATE(type, t) type - #define MEM_CONSTANT(type) type - #define MEM_PLAIN(type) type - #define MEM_TEMPLATE() - #define MEM_TYPE(type) type - #define MEM_TEMPLATE2() - #define MEM_TYPE2(type) type - #define MEM_TEMPLATE3() - #define MEM_TYPE3(type) type - #define MEM_TEMPLATE4() - #define MEM_TYPE4(type) type -#endif - -#if defined(GPUCA_NO_CONSTANT_MEMORY) - #undef MEM_CONSTANT - #define MEM_CONSTANT(type) MEM_GLOBAL(type) -#endif - -#endif // GPUDEFOPENCL12TEMPLATES_H -// clang-format on diff --git a/GPU/GPUTracking/Definitions/GPULogging.h b/GPU/GPUTracking/Definitions/GPULogging.h index f3c6c019f593b..79f888501745f 100644 --- a/GPU/GPUTracking/Definitions/GPULogging.h +++ b/GPU/GPUTracking/Definitions/GPULogging.h @@ -17,15 +17,7 @@ #include "GPUCommonDef.h" // clang-format off -#if !defined(GPUCA_NOCOMPAT) - // Cannot do anything for ROOT5CINT / OpenCL1, so just disable - #define GPUInfo(...) - #define GPUImportant(...) - #define GPUWarning(...) - #define GPUAlarm(...) - #define GPUError(...) - #define GPUFatal(...) -#elif defined(GPUCA_GPUCODE_DEVICE) && !defined(GPUCA_GPU_DEBUG_PRINT) +#if defined(GPUCA_GPUCODE_DEVICE) && !defined(GPUCA_GPU_DEBUG_PRINT) // Compile-time disable for performance-reasons #define GPUInfo(...) #define GPUImportant(...) @@ -73,19 +65,11 @@ } #define GPUAlarm(...) GPUWarning(__VA_ARGS__) #define GPUError(...) GPUWarning(__VA_ARGS__) - #ifdef GPUCA_NOCOMPAT - #define GPUFatal(string, ...) \ - { \ - fprintf(stderr, string "\n", ##__VA_ARGS__); \ - throw std::exception(); \ - } - #else - #define GPUFatal(string, ...) \ - { \ - fprintf(stderr, string "\n", __VA_ARGS__); \ - exit(1); \ - } - #endif + #define GPUFatal(string, ...) \ + { \ + fprintf(stderr, string "\n", __VA_ARGS__); \ + exit(1); \ + } #endif #elif defined(GPUCA_ALIROOT_LIB) // Forward to HLT Logging functions for AliRoot diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 07cd320140909..b7881bd61978c 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -473,7 +473,7 @@ EndConfig() BeginConfig(GPUSettingsStandalone, configStandalone) AddOption(runGPU, uint8_t, 1, "", 'g', "Use GPU for processing", message("GPU processing enabled"), set(2)) AddOptionSet(runGPU, uint8_t, 0, "", 'c', "Use CPU for processing", message("CPU enabled")) -AddOption(gpuType, std::string, "AUTO", "", 0, "GPU type (CUDA / HIP / OCL / OCL2) or CPU or AUTO") +AddOption(gpuType, std::string, "AUTO", "", 0, "GPU type (CUDA / HIP / OCL / OCL) or CPU or AUTO") AddOption(runGPUforce, bool, true, "", 0, "Force usage of the specified GPU device type, no CPU fallback") AddOption(noprompt, bool, true, "", 0, "Do prompt for keypress before exiting") AddOption(continueOnError, bool, false, "", 0, "Continue processing after an error") @@ -537,7 +537,7 @@ AddOption(constBz, bool, false, "", 0, "force constant Bz for tests") AddOption(setMaxTimeBin, int32_t, -2, "", 0, "maximum time bin of continuous data, 0 for triggered events, -1 for automatic continuous mode, -2 for automatic continuous / triggered") AddOption(overrideNHbfPerTF, int32_t, 0, "", 0, "Overrides the number of HBF per TF if != 0") AddOption(overrideTPCTimeBinCur, int32_t, 0, "", 0, "Overrides TPC time bin cut if > 0") -AddOption(deviceType, std::string, "CPU", "", 0, "Device type, CPU | CUDA | HIP | OCL1 | OCL2") +AddOption(deviceType, std::string, "CPU", "", 0, "Device type, CPU | CUDA | HIP | OCL") AddOption(forceDeviceType, bool, true, "", 0, "force device type, otherwise allows fall-back to CPU") AddOption(synchronousProcessing, bool, false, "", 0, "Apply performance shortcuts for synchronous processing, disable unneeded steps") AddOption(dump, int32_t, 0, "", 0, "Dump events for standalone benchmark: 1 = dump events, 2 = dump events and skip processing in workflow") diff --git a/GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h b/GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h index 266228dd79ff6..8974b4bd89460 100644 --- a/GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h +++ b/GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h @@ -12,7 +12,7 @@ /// \file GPUTrackingLinkDef_AliRoot.h /// \author David Rohr -#if defined(__CINT__) || defined(__CLING__) +#if defined(__CLING__) #pragma link off all globals; #pragma link off all classes; diff --git a/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h b/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h index 4174b6b8aab46..a4977e6859968 100644 --- a/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h +++ b/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h @@ -42,8 +42,8 @@ class AliHLTGPUDumpComponent : public AliHLTProcessor AliHLTGPUDumpComponent(); - AliHLTGPUDumpComponent(const AliHLTGPUDumpComponent&) CON_DELETE; - AliHLTGPUDumpComponent& operator=(const AliHLTGPUDumpComponent&) CON_DELETE; + AliHLTGPUDumpComponent(const AliHLTGPUDumpComponent&) = delete; + AliHLTGPUDumpComponent& operator=(const AliHLTGPUDumpComponent&) = delete; virtual ~AliHLTGPUDumpComponent(); diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 6eb20f3093b2f..9ca370d6cc308 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -293,7 +293,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega // Synchronization and Locks eventStruct* mEvents = nullptr; - VOLATILE int32_t mSliceSelectorReady = 0; + volatile int32_t mSliceSelectorReady = 0; std::array mWriteOutputDone; std::vector mOutputQueue; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index ff4133d9b2ce3..8eddab63df35c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -567,7 +567,6 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) } #endif -// TODO: Clusterizer not working with OCL1 (Clusterizer on CPU, Tracking on GPU) int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) { if (param().rec.fwdTPCDigitsAsClusters) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index b3b23270f566f..af5f7d177e6cc 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -63,9 +63,9 @@ class GPUTPCGMMerger : public GPUProcessor { public: GPUTPCGMMerger(); - ~GPUTPCGMMerger() CON_DEFAULT; - GPUTPCGMMerger(const GPUTPCGMMerger&) CON_DELETE; - const GPUTPCGMMerger& operator=(const GPUTPCGMMerger&) const CON_DELETE; + ~GPUTPCGMMerger() = default; + GPUTPCGMMerger(const GPUTPCGMMerger&) = delete; + const GPUTPCGMMerger& operator=(const GPUTPCGMMerger&) const = delete; static CONSTEXPR const int32_t NSLICES = GPUCA_NSLICES; //* N slices struct memory { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h index c77ef99e3790c..c65e6df6c320f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h @@ -34,7 +34,7 @@ namespace gpu class GPUTPCGMPhysicalTrackModel { public: - GPUdDefault() GPUTPCGMPhysicalTrackModel() CON_DEFAULT; + GPUdDefault() GPUTPCGMPhysicalTrackModel() = default; GPUd() GPUTPCGMPhysicalTrackModel(const GPUTPCGMTrackParam& t); GPUd() void Set(const GPUTPCGMTrackParam& t); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h index 59f9cfcacb3d6..a58c3485321fd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h @@ -39,7 +39,7 @@ class GPUTPCGMPolynomialFieldManager k2kG, k5kG }; // known fitted polynomial fields, stored in constants - GPUTPCGMPolynomialFieldManager() CON_DEFAULT; + GPUTPCGMPolynomialFieldManager() = default; /* Get appropriate pre-calculated polynomial field for the given field value nominalFieldkG */ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index c6a188ced5435..0a35875764ae5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -65,7 +65,7 @@ class GPUTPCGMPropagator rejectInterReject = 3 }; - GPUdDefault() GPUTPCGMPropagator() CON_DEFAULT; + GPUdDefault() GPUTPCGMPropagator() = default; struct MaterialCorrection { GPUhd() MaterialCorrection() : radLen(28811.7f), rho(1.025e-3f), radLenInv(1.f / radLen), DLMax(0.f), EP2(0.f), sigmadE2(0.f), k22(0.f), k33(0.f), k43(0.f), k44(0.f) {} diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 8220b743dde0e..8cca91c0a0033 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -431,7 +431,7 @@ template GPUdni() int32_t GPUTrackingRefit::RefitTrackioPtrs.mergedTrackHitStates; mPclusterNative = v->ioPtrs.clustersNative; diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.h b/GPU/GPUTracking/Refit/GPUTrackingRefit.h index 2cc414bbc2d81..bb45709d08165 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.h +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.h @@ -48,9 +48,7 @@ namespace o2::gpu class CorrectionMapsHelper; class GPUTPCGMTrackParam; class GPUTPCGMMergedTrack; -MEM_CLASS_PRE() struct GPUConstantMem; -MEM_CLASS_PRE() struct GPUParam; struct GPUTPCGMMergedTrackHit; @@ -58,13 +56,13 @@ class GPUTrackingRefit { public: void SetClusterStateArray(const uint8_t* v) { mPclusterState = v; } - void SetPtrsFromGPUConstantMem(const GPUConstantMem* v, MEM_CONSTANT(GPUParam) * p = nullptr); + void SetPtrsFromGPUConstantMem(const GPUConstantMem* v, GPUParam* p = nullptr); void SetPropagator(const o2::base::Propagator* v) { mPpropagator = v; } void SetClusterNative(const o2::tpc::ClusterNativeAccess* v) { mPclusterNative = v; } void SetTrackHits(const GPUTPCGMMergedTrackHit* v) { mPtrackHits = v; } void SetTrackHitReferences(const uint32_t* v) { mPtrackHitReferences = v; } void SetFastTransformHelper(const CorrectionMapsHelper* v) { mPfastTransformHelper = v; } - void SetGPUParam(const MEM_CONSTANT(GPUParam) * v) { mPparam = v; } + void SetGPUParam(const GPUParam* v) { mPparam = v; } GPUd() int32_t RefitTrackAsGPU(GPUTPCGMMergedTrack& trk, bool outward = false, bool resetCov = false) { return RefitTrack(trk, outward, resetCov); } GPUd() int32_t RefitTrackAsTrackParCov(GPUTPCGMMergedTrack& trk, bool outward = false, bool resetCov = false) { return RefitTrack(trk, outward, resetCov); } GPUd() int32_t RefitTrackAsGPU(o2::tpc::TrackTPC& trk, bool outward = false, bool resetCov = false) { return RefitTrack(trk, outward, resetCov); } @@ -97,7 +95,7 @@ class GPUTrackingRefit const GPUTPCGMMergedTrackHit* mPtrackHits = nullptr; // Ptr to hits for GPUTPCGMMergedTrack tracks const uint32_t* mPtrackHitReferences = nullptr; // Ptr to hits for TrackTPC tracks const CorrectionMapsHelper* mPfastTransformHelper = nullptr; // Ptr to TPC fast transform object helper - const MEM_CONSTANT(GPUParam) * mPparam = nullptr; // Ptr to GPUParam + const GPUParam* mPparam = nullptr; // Ptr to GPUParam template GPUd() int32_t RefitTrack(T& trk, bool outward, bool resetCov); template diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h b/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h index 28fa54544e292..c2fc7e58061da 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h @@ -21,7 +21,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTrackParam; /** @@ -31,7 +30,6 @@ class GPUTPCTrackParam; * used in output of the GPUTPCTracker slice tracker. * This class is used for transfer between tracker and merger and does not contain the covariance matrice */ -MEM_CLASS_PRE() struct GPUTPCBaseTrackParam { GPUd() float X() const { return mX; } GPUd() float Y() const { return mP[0]; } @@ -60,8 +58,8 @@ struct GPUTPCBaseTrackParam { GPUd() float GetKappa(float Bz) const { return -mP[4] * Bz; } - GPUhd() MakeType(const float*) Par() const { return mP; } - GPUd() const MakeType(float*) GetPar() const { return mP; } + GPUhd() const float* Par() const { return mP; } + GPUd() const float* GetPar() const { return mP; } GPUd() float GetPar(int32_t i) const { return (mP[i]); } GPUhd() void SetPar(int32_t i, float v) { mP[i] = v; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx index 3ddedd702f784..5c3e473aab0c9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx @@ -19,7 +19,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCCreateSliceData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCCreateSliceData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { tracker.Data().InitFromClusterData(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem(), tracker.ISlice(), s.tmp); } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h index 9a64d04e7ca6d..916891c2035ef 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h @@ -33,15 +33,14 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate float tmp[4]; }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx index c1a3c685947d6..c86249fbb6f77 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx @@ -22,9 +22,7 @@ using namespace GPUCA_NAMESPACE::gpu; -#if !defined(__OPENCL1__) - -GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) +GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) { /*for (int32_t j = 0;j < Tracks()[j].NHits();j++) { @@ -120,7 +118,7 @@ GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tra return (nHits >= tracker.Param().rec.tpc.globalTrackingMinHits); } -GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) +GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) { for (int32_t i = iBlock * nThreads + iThread; i < tracker.CommonMemory()->nLocalTracks; i += nThreads * nBlocks) { { @@ -160,9 +158,9 @@ GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t } template <> -GPUdii() void GPUTPCGlobalTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCGlobalTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { - CA_SHARED_CACHE(&smem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(MEM_PLAIN(GPUTPCRow))); + CA_SHARED_CACHE(&smem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); if (tracker.NHitsTotal() == 0) { @@ -200,13 +198,12 @@ GPUd() void GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(uint32_t iSlice, right += GPUDataTypes::NSLICES / 2; } } -#endif // !__OPENCL1__ template <> -GPUdii() void GPUTPCGlobalTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() tracker, int32_t n) +GPUdii() void GPUTPCGlobalTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker, int32_t n) { for (int32_t i = get_global_id(0); i < n; i += get_global_size(0)) { - GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() trk = (&tracker)[i]; + GPUconstantref() GPUTPCTracker& GPUrestrict() trk = (&tracker)[i]; trk.CommonMemory()->nLocalTracks = trk.CommonMemory()->nTracks; trk.CommonMemory()->nLocalTrackHits = trk.CommonMemory()->nTrackHits; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h index 9d732a582b1c4..367b4314814fe 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h @@ -22,47 +22,43 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; -#if !defined(__OPENCL1__) class GPUTPCGlobalTracking : public GPUKernelTemplate { public: struct GPUSharedMemory { - CA_SHARED_STORAGE(MEM_LG(GPUTPCRow) mRows[GPUCA_ROW_COUNT]); + CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); GPUd() static int32_t GlobalTrackingSliceOrder(int32_t iSlice); GPUd() static void GlobalTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right); private: - GPUd() static int32_t PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); - GPUd() static void PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, GPUTPCTracker& sliceTarget, bool right); + GPUd() static int32_t PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); + GPUd() static void PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sliceTarget, bool right); }; -#endif class GPUTPCGlobalTrackingCopyNumbers : public GPUKernelTemplate { public: - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker, int32_t n); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker, int32_t n); }; } // namespace gpu diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx index 00fceaf8a5874..56d2e88db1c28 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx @@ -20,8 +20,7 @@ using namespace GPUCA_NAMESPACE::gpu; #include #endif -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::CreateEmpty() +GPUd() void GPUTPCGrid::CreateEmpty() { // Create an empty grid mYMin = 0.f; @@ -37,8 +36,7 @@ GPUd() void MEM_LG(GPUTPCGrid)::CreateEmpty() mStepZInv = 1.f; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::Create(float yMin, float yMax, float zMin, float zMax, int32_t ny, int32_t nz) +GPUd() void GPUTPCGrid::Create(float yMin, float yMax, float zMin, float zMax, int32_t ny, int32_t nz) { //* Create the grid mYMin = yMin; @@ -59,8 +57,7 @@ GPUd() void MEM_LG(GPUTPCGrid)::Create(float yMin, float yMax, float zMin, float mZMax = mZMin + mNz * sz; } -MEM_CLASS_PRE() -GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z) const +GPUd() int32_t GPUTPCGrid::GetBin(float Y, float Z) const { //* get the bin pointer const int32_t yBin = static_cast((Y - mYMin) * mStepYInv); @@ -73,8 +70,7 @@ GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z) const return bin; } -MEM_CLASS_PRE() -GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBinBounded(float Y, float Z) const +GPUd() int32_t GPUTPCGrid::GetBinBounded(float Y, float Z) const { //* get the bin pointer const int32_t yBin = static_cast((Y - mYMin) * mStepYInv); @@ -89,8 +85,7 @@ GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBinBounded(float Y, float Z) const return bin; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z, int32_t* const bY, int32_t* const bZ) const +GPUd() void GPUTPCGrid::GetBin(float Y, float Z, int32_t* const bY, int32_t* const bZ) const { //* get the bin pointer @@ -114,8 +109,7 @@ GPUd() void MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z, int32_t* const bY, int3 *bZ = (uint32_t)bbZ; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::GetBinArea(float Y, float Z, float dy, float dz, int32_t& bin, int32_t& ny, int32_t& nz) const +GPUd() void GPUTPCGrid::GetBinArea(float Y, float Z, float dy, float dz, int32_t& bin, int32_t& ny, int32_t& nz) const { Y -= mYMin; int32_t by = (int32_t)((Y - dy) * mStepYInv); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h index a069282e2a0a9..a3cd7916f0e6d 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h @@ -29,7 +29,6 @@ namespace gpu * used by GPUTPCTracker to speed-up the hit operations * grid axis are named Z,Y to be similar to TPC row coordinates. */ -MEM_CLASS_PRE() class GPUTPCGrid { public: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx index 7842a57f47794..9293801f5f5f9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx @@ -18,7 +18,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // * // * kill link to the neighbour if the neighbour is not pointed to the cluster @@ -38,9 +38,9 @@ GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int32_t /*nBlocks*/, int32_t nT #ifdef GPUCA_GPUCODE int32_t Up = s.mIRowUp; int32_t Dn = s.mIRowDn; - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.Row(s.mIRow); - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowUp = tracker.Row(Up); - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowDn = tracker.Row(Dn); + GPUglobalref() const GPUTPCRow& GPUrestrict() row = tracker.Row(s.mIRow); + GPUglobalref() const GPUTPCRow& GPUrestrict() rowUp = tracker.Row(Up); + GPUglobalref() const GPUTPCRow& GPUrestrict() rowDn = tracker.Row(Dn); #else const GPUTPCRow& GPUrestrict() row = tracker.Row(s.mIRow); const GPUTPCRow& GPUrestrict() rowUp = tracker.Row(s.mIRowUp); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h index 26e85907bc6ab..23c1e21e87ab0 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h @@ -23,7 +23,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -33,7 +32,6 @@ class GPUTPCTracker; class GPUTPCNeighboursCleaner : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mIRow; // current row index int32_t mIRowUp; // current row index @@ -41,15 +39,14 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate int32_t mNHits; // number of hits }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx index b7cfccfa15408..69d05fc3176b4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx @@ -20,12 +20,12 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { //* find neighbours #ifdef GPUCA_GPUCODE - for (uint32_t i = iThread; i < sizeof(MEM_PLAIN(GPUTPCRow)) / sizeof(int32_t); i += nThreads) { + for (uint32_t i = iThread; i < sizeof(GPUTPCRow) / sizeof(int32_t); i += nThreads) { reinterpret_cast(&s.mRow)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock])[i]; if (iBlock >= 2 && iBlock < GPUCA_ROW_COUNT - 2) { reinterpret_cast(&s.mRowUp)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock + 2])[i]; @@ -33,13 +33,13 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } } GPUbarrier(); - const GPUsharedref() MEM_LOCAL(GPUTPCRow) & GPUrestrict() row = s.mRow; - const GPUsharedref() MEM_LOCAL(GPUTPCRow) & GPUrestrict() rowUp = s.mRowUp; - const GPUsharedref() MEM_LOCAL(GPUTPCRow) & GPUrestrict() rowDn = s.mRowDown; + const GPUsharedref() GPUTPCRow& GPUrestrict() row = s.mRow; + const GPUsharedref() GPUTPCRow& GPUrestrict() rowUp = s.mRowUp; + const GPUsharedref() GPUTPCRow& GPUrestrict() rowDn = s.mRowDown; #else - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.mData.mRows[iBlock]; - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowUp = tracker.mData.mRows[iBlock + 2]; - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowDn = tracker.mData.mRows[iBlock - 2]; + const GPUglobalref() GPUTPCRow& GPUrestrict() row = tracker.mData.mRows[iBlock]; + const GPUglobalref() GPUTPCRow& GPUrestrict() rowUp = tracker.mData.mRows[iBlock + 2]; + const GPUglobalref() GPUTPCRow& GPUrestrict() rowDn = tracker.mData.mRows[iBlock - 2]; #endif if (iThread == 0) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h index 7174286fde948..a121a0f14eb67 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,7 +33,6 @@ class GPUTPCTracker; class GPUTPCNeighboursFinder : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mNHits; // n hits float mUpDx; // x distance to the next row @@ -49,19 +47,17 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; #endif - MEM_LG(GPUTPCRow) - mRow, mRowUp, mRowDown; + GPUTPCRow mRow, mRowUp, mRowDown; }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h b/GPU/GPUTracking/SliceTracker/GPUTPCRow.h index ed25e18e90c46..7c8e96c8352a8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCRow.h @@ -29,10 +29,8 @@ namespace gpu * It is the internal class of the GPUTPCTracker algorithm. * */ -MEM_CLASS_PRE() class GPUTPCRow { - MEM_CLASS_PRE2() friend class GPUTPCSliceData; public: @@ -46,7 +44,7 @@ class GPUTPCRow } GPUhd() float X() const { return mX; } GPUhd() float MaxY() const { return mMaxY; } - GPUhd() MakeType(const MEM_LG(GPUTPCGrid) &) Grid() const { return mGrid; } + GPUhd() const GPUTPCGrid& Grid() const { return mGrid; } GPUhd() float Hy0() const { return mHy0; } GPUhd() float Hz0() const { return mHz0; } @@ -66,8 +64,7 @@ class GPUTPCRow int32_t mNHits; // number of hits float mX; // X coordinate of the row float mMaxY; // maximal Y coordinate of the row - MEM_LG(GPUTPCGrid) - mGrid; // grid of hits + GPUTPCGrid mGrid; // grid of hits // hit packing: float mHy0; // offset diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx index 99088a1e99c53..ba5da49ff6ff9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx @@ -28,8 +28,8 @@ template <> GPUdii() void GPUTPCSectorDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { const uint32_t iRow = iBlock; - const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.Data().Row(iRow); - const MEM_GLOBAL(GPUTPCGrid) & GPUrestrict() grid = row.Grid(); + const GPUTPCRow& GPUrestrict() row = tracker.Data().Row(iRow); + const GPUTPCGrid& GPUrestrict() grid = row.Grid(); for (uint32_t i = iThread; i < grid.N(); i += nThreads) { uint32_t jMin = tracker.Data().FirstHitInBin(row, i); uint32_t jMax = tracker.Data().FirstHitInBin(row, i + 1); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx index 6c456a28918ab..5177c48b6a834 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx @@ -32,7 +32,7 @@ using namespace GPUCA_NAMESPACE::gpu; #ifndef GPUCA_GPUCODE -void GPUTPCSliceData::InitializeRows(const MEM_CONSTANT(GPUParam) & p) +void GPUTPCSliceData::InitializeRows(const GPUParam& p) { // initialisation of rows for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; ++i) { @@ -109,7 +109,7 @@ void* GPUTPCSliceData::SetPointersRows(void* mem) #endif -GPUd() void GPUTPCSliceData::GetMaxNBins(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) +GPUd() void GPUTPCSliceData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) { maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1; maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50; @@ -121,7 +121,7 @@ GPUd() uint32_t GPUTPCSliceData::GetGridSize(uint32_t nHits, uint32_t nRows) return 128 * nRows + 4 * nHits; } -GPUdi() void GPUTPCSliceData::CreateGrid(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) +GPUdi() void GPUTPCSliceData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) { float dz = zMax - zMin; float tfFactor = 1.f; @@ -172,7 +172,7 @@ GPUdii() void GPUTPCSliceData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) } } -GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GPUrestrict() mem, int32_t iSlice, float* tmpMinMax) +GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSlice, float* tmpMinMax) { #ifdef GPUCA_GPUCODE constexpr bool EarlyTransformWithoutClusterNative = false; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h index a75cba8dd861b..c45c35c667f25 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h @@ -28,15 +28,14 @@ namespace gpu struct GPUTPCClusterData; class GPUTPCHit; -MEM_CLASS_PRE() class GPUTPCSliceData { public: GPUTPCSliceData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mGPUTextureBase(nullptr), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} #ifndef GPUCA_GPUCODE_DEVICE - ~GPUTPCSliceData() CON_DEFAULT; - void InitializeRows(const MEM_CONSTANT(GPUParam) & p); + ~GPUTPCSliceData() = default; + void InitializeRows(const GPUParam& p); void SetMaxData(); void SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset); void* SetPointersInput(void* mem, bool idsOnGPU, bool sliceDataOnGPU); @@ -47,7 +46,7 @@ class GPUTPCSliceData void* SetPointersRows(void* mem); #endif - GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, int32_t iSlice, float* tmpMinMax); + GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* mem, int32_t iSlice, float* tmpMinMax); /** * Return the number of hits in this slice. @@ -61,39 +60,26 @@ class GPUTPCSliceData * * The links values give the hit index in the row above/below. Or -1 if there is no link. */ - MEM_TEMPLATE() - GPUd() calink HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const; - MEM_TEMPLATE() - GPUd() calink HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const; - - MEM_TEMPLATE() - GPUhdi() GPUglobalref() const cahit2* HitData(const MEM_TYPE(GPUTPCRow) & row) const { return &mHitData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUhdi() GPUglobalref() cahit2* HitData(const MEM_TYPE(GPUTPCRow) & row) { return &mHitData[row.mHitNumberOffset]; } + GPUd() calink HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const; + GPUd() calink HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const; + + GPUhdi() GPUglobalref() const cahit2* HitData(const GPUTPCRow& row) const { return &mHitData[row.mHitNumberOffset]; } + GPUhdi() GPUglobalref() cahit2* HitData(const GPUTPCRow& row) { return &mHitData[row.mHitNumberOffset]; } GPUhd() GPUglobalref() const cahit2* HitData() const { return (mHitData); } - MEM_TEMPLATE() - GPUdi() GPUglobalref() const calink* HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row) const { return &mLinkUpData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUdi() GPUglobalref() calink* HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row) { return &mLinkUpData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUdi() GPUglobalref() const calink* HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row) const { return &mLinkDownData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUdi() GPUglobalref() const calink* FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row) const { return &mFirstHitInBin[row.mFirstHitInBinOffset]; } - - MEM_TEMPLATE() - GPUd() void SetHitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value); - MEM_TEMPLATE() - GPUd() void SetHitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value); + GPUdi() GPUglobalref() const calink* HitLinkUpData(const GPUTPCRow& row) const { return &mLinkUpData[row.mHitNumberOffset]; } + GPUdi() GPUglobalref() calink* HitLinkUpData(const GPUTPCRow& row) { return &mLinkUpData[row.mHitNumberOffset]; } + GPUdi() GPUglobalref() const calink* HitLinkDownData(const GPUTPCRow& row) const { return &mLinkDownData[row.mHitNumberOffset]; } + GPUdi() GPUglobalref() const calink* FirstHitInBin(const GPUTPCRow& row) const { return &mFirstHitInBin[row.mFirstHitInBinOffset]; } + + GPUd() void SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value); + GPUd() void SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value); /** * Return the y and z coordinate(s) of the given hit(s). */ - MEM_TEMPLATE() - GPUd() cahit HitDataY(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const; - MEM_TEMPLATE() - GPUd() cahit HitDataZ(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const; - MEM_TEMPLATE() - GPUd() cahit2 HitData(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const; + GPUd() cahit HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const; + GPUd() cahit HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const; + GPUd() cahit2 HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const; /** * For a given bin index, content tells how many hits there are in the preceding bins. This maps @@ -101,36 +87,31 @@ class GPUTPCSliceData * * \param binIndexes in the range 0 to row.Grid.N + row.Grid.Ny + 3. */ - MEM_TEMPLATE() - GPUd() calink FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row, calink binIndex) const; + GPUd() calink FirstHitInBin(const GPUTPCRow& row, calink binIndex) const; /** * If the given weight is higher than what is currently stored replace with the new weight. */ - MEM_TEMPLATE() - GPUd() void MaximizeHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight); - MEM_TEMPLATE() - GPUd() void SetHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight); + GPUd() void MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); + GPUd() void SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); /** * Return the maximal weight the given hit got from one tracklet */ - MEM_TEMPLATE() - GPUd() int32_t HitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const; + GPUd() int32_t HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const; /** * Returns the index in the original GPUTPCClusterData object of the given hit */ - MEM_TEMPLATE() - GPUhd() int32_t ClusterDataIndex(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const; + GPUhd() int32_t ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const; GPUd() GPUglobalref() const int32_t* ClusterDataIndex() const { return mClusterDataIndex; } GPUd() GPUglobalref() int32_t* ClusterDataIndex() { return mClusterDataIndex; } /** * Return the row object for the given row index. */ - GPUhdi() GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & Row(int32_t rowIndex) const { return mRows[rowIndex]; } - GPUhdi() GPUglobalref() MEM_GLOBAL(GPUTPCRow) * Rows() const { return mRows; } + GPUhdi() GPUglobalref() const GPUTPCRow& Row(int32_t rowIndex) const { return mRows[rowIndex]; } + GPUhdi() GPUglobalref() GPUTPCRow* Rows() const { return mRows; } GPUhdi() GPUglobalref() GPUAtomic(uint32_t) * HitWeights() { return (mHitWeights); } @@ -142,12 +123,12 @@ class GPUTPCSliceData private: #ifndef GPUCA_GPUCODE - GPUTPCSliceData& operator=(const GPUTPCSliceData&) CON_DELETE; // ROOT 5 tries to use this if it is not private - GPUTPCSliceData(const GPUTPCSliceData&) CON_DELETE; // + GPUTPCSliceData& operator=(const GPUTPCSliceData&) = delete; // ROOT 5 tries to use this if it is not private + GPUTPCSliceData(const GPUTPCSliceData&) = delete; // #endif - GPUd() void CreateGrid(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, MEM_GLOBAL(GPUTPCRow) * GPUrestrict() row, float yMin, float yMax, float zMin, float zMax); - GPUd() void SetRowGridEmpty(MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row); - GPUd() static void GetMaxNBins(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, MEM_GLOBAL(GPUTPCRow) * GPUrestrict() row, int32_t& maxY, int32_t& maxZ); + GPUd() void CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax); + GPUd() void SetRowGridEmpty(GPUTPCRow& GPUrestrict() row); + GPUd() static void GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ); GPUd() uint32_t GetGridSize(uint32_t nHits, uint32_t nRows); friend class GPUTPCNeighboursFinder; @@ -159,7 +140,7 @@ class GPUTPCSliceData GPUglobalref() const void* mGPUTextureBase; // pointer to start of GPU texture - GPUglobalref() MEM_GLOBAL(GPUTPCRow) * mRows; // The row objects needed for most accessor functions + GPUglobalref() GPUTPCRow* mRows; // The row objects needed for most accessor functions GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index GPUglobalref() calink* mLinkDownData; // hit index in the row below which is linked to the given (global) hit index @@ -175,65 +156,41 @@ class GPUTPCSliceData GPUglobalref() const GPUTPCClusterData* mClusterData; }; -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() calink MEM_LG(GPUTPCSliceData)::HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCSliceData::HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() calink MEM_LG(GPUTPCSliceData)::HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCSliceData::HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::SetHitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCSliceData::SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkUpData[row.mHitNumberOffset + hitIndex] = value; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::SetHitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCSliceData::SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkDownData[row.mHitNumberOffset + hitIndex] = value; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() cahit MEM_LG(GPUTPCSliceData)::HitDataY(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } +GPUdi() cahit GPUTPCSliceData::HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() cahit MEM_LG(GPUTPCSliceData)::HitDataZ(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } +GPUdi() cahit GPUTPCSliceData::HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() cahit2 MEM_LG(GPUTPCSliceData)::HitData(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } +GPUdi() cahit2 GPUTPCSliceData::HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() calink MEM_LG(GPUTPCSliceData)::FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } +GPUdi() calink GPUTPCSliceData::FirstHitInBin(const GPUTPCRow& row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUhdi() int32_t MEM_LG(GPUTPCSliceData)::ClusterDataIndex(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } +GPUhdi() int32_t GPUTPCSliceData::ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::MaximizeHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCSliceData::MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { CAMath::AtomicMax(&mHitWeights[row.mHitNumberOffset + hitIndex], weight); } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::SetHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCSliceData::SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { mHitWeights[row.mHitNumberOffset + hitIndex] = weight; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() int32_t MEM_LG(GPUTPCSliceData)::HitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } +GPUdi() int32_t GPUTPCSliceData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h index 3ab5b0a331f31..6d322601789b6 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h @@ -44,7 +44,6 @@ class GPUTPCSliceOutput } GPUhd() uint32_t NLocalTracks() const { return mNLocalTracks; } GPUhd() uint32_t NTrackClusters() const { return mNTrackClusters; } -#if !defined(__OPENCL1__) GPUhd() const GPUTPCTrack* GetFirstTrack() const { return (const GPUTPCTrack*)((const char*)this + sizeof(*this)); @@ -53,7 +52,6 @@ class GPUTPCSliceOutput { return (GPUTPCTrack*)((char*)this + sizeof(*this)); } -#endif GPUhd() size_t Size() const { return (mMemorySize); @@ -67,10 +65,10 @@ class GPUTPCSliceOutput GPUhd() void SetNTrackClusters(uint32_t v) { mNTrackClusters = v; } private: - GPUTPCSliceOutput() CON_DELETE; // NOLINT: Must be private or ROOT tries to use them! - ~GPUTPCSliceOutput() CON_DELETE; // NOLINT - GPUTPCSliceOutput(const GPUTPCSliceOutput&) CON_DELETE; // NOLINT - GPUTPCSliceOutput& operator=(const GPUTPCSliceOutput&) CON_DELETE; // NOLINT + GPUTPCSliceOutput() = delete; // NOLINT: Must be private or ROOT tries to use them! + ~GPUTPCSliceOutput() = delete; // NOLINT + GPUTPCSliceOutput(const GPUTPCSliceOutput&) = delete; // NOLINT + GPUTPCSliceOutput& operator=(const GPUTPCSliceOutput&) = delete; // NOLINT GPUhd() void SetMemorySize(size_t val) { mMemorySize = val; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx index e9bbcdf91ca6c..2b097ab8f1835 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx @@ -19,7 +19,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // find start hits for tracklets if (iThread == 0) { @@ -32,8 +32,8 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr } } GPUbarrier(); - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.mData.mRows[s.mIRow]; - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowUp = tracker.mData.mRows[s.mIRow + 2]; + GPUglobalref() const GPUTPCRow& GPUrestrict() row = tracker.mData.mRows[s.mIRow]; + GPUglobalref() const GPUTPCRow& GPUrestrict() rowUp = tracker.mData.mRows[s.mIRow + 2]; for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) { int64_t lHitNumberOffset = row.mHitNumberOffset; uint32_t linkUpData = tracker.mData.mLinkUpData[lHitNumberOffset + ih]; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h index f0adf3985a613..b2b9bfb355fa1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,22 +33,20 @@ class GPUTPCTracker; class GPUTPCStartHitsFinder : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mIRow; // row index int32_t mNHits; // n hits in the row GPUAtomic(uint32_t) mNRowStartHits; // start hits found in the row }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx index 4275306999531..84ad70b58b964 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx @@ -21,7 +21,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCStartHitsSorter::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCStartHitsSorter::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // Sorts the Start Hits by Row Index if (iThread == 0) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h index d5f9cc41e2a1a..838fcf7e7d7e1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,22 +33,20 @@ class GPUTPCTracker; class GPUTPCStartHitsSorter : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mStartRow; // start row index int32_t mNRows; // number of rows to process int32_t mStartOffset; // start offset for hits sorted by this block }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h index 759f4e0f954bd..9553435fc49ab 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h @@ -30,7 +30,6 @@ namespace gpu * The class is dedicated for internal use by the GPUTPCTracker algorithm. * The track parameters at both ends are stored separately in the GPUTPCEndPoint class */ -MEM_CLASS_PRE() class GPUTPCTrack { public: @@ -38,20 +37,19 @@ class GPUTPCTrack GPUTPCTrack() : mFirstHitID(0), mNHits(0), mLocalTrackId(-1), mParam() { } - ~GPUTPCTrack() CON_DEFAULT; + ~GPUTPCTrack() = default; #endif //! GPUCA_GPUCODE GPUhd() int32_t NHits() const { return mNHits; } GPUhd() int32_t LocalTrackId() const { return mLocalTrackId; } GPUhd() int32_t FirstHitID() const { return mFirstHitID; } - GPUhd() MakeType(const MEM_LG(GPUTPCBaseTrackParam) &) Param() const { return mParam; } + GPUhd() const GPUTPCBaseTrackParam& Param() const { return mParam; } GPUhd() void SetNHits(int32_t v) { mNHits = v; } GPUhd() void SetLocalTrackId(int32_t v) { mLocalTrackId = v; } GPUhd() void SetFirstHitID(int32_t v) { mFirstHitID = v; } - MEM_TEMPLATE() - GPUhd() void SetParam(const MEM_TYPE(GPUTPCBaseTrackParam) & v) { mParam = v; } + GPUhd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = v; } // Only if used as replacement for SliceOutTrack GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSliceOutCluster); } @@ -65,8 +63,7 @@ class GPUTPCTrack int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array int32_t mNHits; // number of track cells int32_t mLocalTrackId; // Id of local track this global track belongs to, index of this track itself if it is a local track - MEM_LG(GPUTPCBaseTrackParam) - mParam; // track parameters + GPUTPCBaseTrackParam mParam; // track parameters private: }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h index d9f332beabd7d..972c62ffe7e20 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h @@ -41,7 +41,7 @@ class GPUTPCTrackLinearisation GPUTPCTrackLinearisation() : mSinPhi(0), mCosPhi(1), mDzDs(0), mQPt(0) {} GPUTPCTrackLinearisation(float SinPhi1, float CosPhi1, float DzDs1, float QPt1) : mSinPhi(SinPhi1), mCosPhi(CosPhi1), mDzDs(DzDs1), mQPt(QPt1) {} - GPUd() MEM_CLASS_PRE2() GPUTPCTrackLinearisation(const MEM_LG2(GPUTPCTrackParam) & t); + GPUd() GPUTPCTrackLinearisation(const GPUTPCTrackParam& t); GPUd() void Set(float SinPhi1, float CosPhi1, float DzDs1, float QPt1); @@ -67,8 +67,7 @@ class GPUTPCTrackLinearisation float mQPt; // QPt }; -MEM_CLASS_PRE2() -GPUdi() GPUTPCTrackLinearisation::GPUTPCTrackLinearisation(const MEM_LG2(GPUTPCTrackParam) & GPUrestrict() t) : mSinPhi(t.SinPhi()), mCosPhi(0), mDzDs(t.DzDs()), mQPt(t.QPt()) +GPUdi() GPUTPCTrackLinearisation::GPUTPCTrackLinearisation(const GPUTPCTrackParam& GPUrestrict() t) : mSinPhi(t.SinPhi()), mCosPhi(0), mDzDs(t.DzDs()), mQPt(t.QPt()) { if (mSinPhi > GPUCA_MAX_SIN_PHI) { mSinPhi = GPUCA_MAX_SIN_PHI; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx index 18245c48ab578..5c1c99c4d75b2 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx @@ -28,8 +28,7 @@ using namespace GPUCA_NAMESPACE::gpu; // Yc = Y + CAMath::Cos(Phi)/Kappa; // -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::GetDist2(const MEM_LG(GPUTPCTrackParam) & GPUrestrict() t) const +GPUd() float GPUTPCTrackParam::GetDist2(const GPUTPCTrackParam& GPUrestrict() t) const { // get squared distance between tracks @@ -39,8 +38,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::GetDist2(const MEM_LG(GPUTPCTrackParam) & return dx * dx + dy * dy + dz * dz; } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::GetDistXZ2(const MEM_LG(GPUTPCTrackParam) & GPUrestrict() t) const +GPUd() float GPUTPCTrackParam::GetDistXZ2(const GPUTPCTrackParam& GPUrestrict() t) const { // get squared distance between tracks in X&Z @@ -49,8 +47,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::GetDistXZ2(const MEM_LG(GPUTPCTrackParam) return dx * dx + dz * dz; } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::GetS(float x, float y, float Bz) const +GPUd() float GPUTPCTrackParam::GetS(float x, float y, float Bz) const { //* Get XY path length to the given point @@ -66,8 +63,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::GetS(float x, float y, float Bz) const return dS; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::GetDCAPoint(float x, float y, float z, float& GPUrestrict() xp, float& GPUrestrict() yp, float& GPUrestrict() zp, float Bz) const +GPUd() void GPUTPCTrackParam::GetDCAPoint(float x, float y, float z, float& GPUrestrict() xp, float& GPUrestrict() yp, float& GPUrestrict() zp, float Bz) const { //* Get the track point closest to the (x,y,z) @@ -97,8 +93,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::GetDCAPoint(float x, float y, float z, flo //* Transport routines //* -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, float Bz, float maxSinPhi, float* GPUrestrict() DL) +GPUd() bool GPUTPCTrackParam::TransportToX(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, float Bz, float maxSinPhi, float* GPUrestrict() DL) { //* Transport the track parameters to X=x, using linearization at t0, and the field value Bz //* maxSinPhi is the max. allowed value for |t0.SinPhi()| @@ -218,8 +213,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, GPUTPCTrackLinearisa return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, float sinPhi0, float cosPhi0, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToX(float x, float sinPhi0, float cosPhi0, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x, using linearization at phi0 with 0 curvature, //* and the field value Bz @@ -299,16 +293,14 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, float sinPhi0, float return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToX(float x, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x GPUTPCTrackLinearisation t0(*this); return TransportToX(x, t0, Bz, maxSinPhi); } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToXWithMaterial(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x taking into account material budget @@ -326,8 +318,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTr return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToXWithMaterial(float x, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x taking into account material budget @@ -335,8 +326,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTr return TransportToXWithMaterial(x, t0, par, Bz, maxSinPhi); } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToXWithMaterial(float x, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x taking into account material budget @@ -348,8 +338,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, float Bz //* //* Multiple scattering and energy losses //* -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGeant(float bg2, float kp0, float kp1, float kp2, float kp3, float kp4) +GPUd() float GPUTPCTrackParam::BetheBlochGeant(float bg2, float kp0, float kp1, float kp2, float kp3, float kp4) { // // This is the parameterization of the Bethe-Bloch formula inspired by Geant. @@ -388,8 +377,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGeant(float bg2, float kp0, flo return mK * mZA * (1 + bg2) / bg2 * (0.5f * CAMath::Log(2 * me * bg2 * maxT / (mI * mI)) - bg2 / (1 + bg2) - d2); } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochSolid(float bg) +GPUd() float GPUTPCTrackParam::BetheBlochSolid(float bg) { //------------------------------------------------------------------ // This is an approximation of the Bethe-Bloch formula, @@ -401,8 +389,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochSolid(float bg) return BetheBlochGeant(bg); } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGas(float bg) +GPUd() float GPUTPCTrackParam::BetheBlochGas(float bg) { //------------------------------------------------------------------ // This is an approximation of the Bethe-Bloch formula, @@ -420,8 +407,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGas(float bg) return BetheBlochGeant(bg, rho, x0, x1, mI, mZA); } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::ApproximateBetheBloch(float beta2) +GPUd() float GPUTPCTrackParam::ApproximateBetheBloch(float beta2) { //------------------------------------------------------------------ // This is an approximation of the Bethe-Bloch formula with @@ -438,8 +424,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::ApproximateBetheBloch(float beta2) return 0.153e-3f / beta2 * (CAMath::Log(5940 * beta2 / (1 - beta2)) - beta2); } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::CalculateFitParameters(GPUTPCTrackFitParam& par, float mass) +GPUd() void GPUTPCTrackParam::CalculateFitParameters(GPUTPCTrackFitParam& par, float mass) { //*! @@ -473,8 +458,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::CalculateFitParameters(GPUTPCTrackFitParam par.k44 = GetPar(3) * GetPar(3) * k2; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::CorrectForMeanMaterial(float xOverX0, float xTimesRho, const GPUTPCTrackFitParam& par) +GPUd() bool GPUTPCTrackParam::CorrectForMeanMaterial(float xOverX0, float xTimesRho, const GPUTPCTrackFitParam& par) { //------------------------------------------------------------------ // This function corrects the track parameters for the crossed material. @@ -523,8 +507,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::CorrectForMeanMaterial(float xOverX0, floa //* //* Rotation //* -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::Rotate(float alpha, float maxSinPhi) { //* Rotate the coordinate system in XY on the angle alpha @@ -581,8 +564,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, float maxSinPhi) return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, GPUTPCTrackLinearisation& t0, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::Rotate(float alpha, GPUTPCTrackLinearisation& t0, float maxSinPhi) { //* Rotate the coordinate system in XY on the angle alpha @@ -628,8 +610,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, GPUTPCTrackLinearisati return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::Filter(float y, float z, float err2Y, float err2Z, float maxSinPhi, bool paramOnly) +GPUd() bool GPUTPCTrackParam::Filter(float y, float z, float err2Y, float err2Z, float maxSinPhi, bool paramOnly) { //* Add the y,z measurement with the Kalman filter @@ -690,8 +671,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::Filter(float y, float z, float err2Y, floa return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::CheckNumericalQuality() const +GPUd() bool GPUTPCTrackParam::CheckNumericalQuality() const { //* Check that the track parameters and covariance matrix are reasonable @@ -727,8 +707,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::CheckNumericalQuality() const return ok; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::ConstrainZ(float& z, int32_t sector, float& z0, float& lastZ) +GPUd() void GPUTPCTrackParam::ConstrainZ(float& z, int32_t sector, float& z0, float& lastZ) { if (sector < GPUCA_NSLICES / 2) { if (z < 0) { @@ -763,8 +742,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::ConstrainZ(float& z, int32_t sector, float } } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::ShiftZ(float z1, float z2, float x1, float x2, float bz, float defaultZOffsetOverR) +GPUd() void GPUTPCTrackParam::ShiftZ(float z1, float z2, float x1, float x2, float bz, float defaultZOffsetOverR) { const float r1 = CAMath::Max(0.0001f, CAMath::Abs(mParam.mP[4] * bz)); @@ -826,8 +804,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::ShiftZ(float z1, float z2, float x1, float #include #endif -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::Print() const +GPUd() void GPUTPCTrackParam::Print() const { //* print parameters @@ -837,8 +814,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::Print() const #endif } -MEM_CLASS_PRE() -GPUd() int32_t MEM_LG(GPUTPCTrackParam)::GetPropagatedYZ(float bz, float x, float& projY, float& projZ) const +GPUd() int32_t GPUTPCTrackParam::GetPropagatedYZ(float bz, float x, float& projY, float& projZ) const { float k = mParam.mP[4] * bz; float dx = x - mParam.mX; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h index ffc28af6f4e32..792cba4f519e1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h @@ -32,7 +32,6 @@ class GPUTPCTrackLinearisation; * which is used by the GPUTPCTracker slice tracker. * */ -MEM_CLASS_PRE() class GPUTPCTrackParam { public: @@ -40,8 +39,8 @@ class GPUTPCTrackParam float bethe, e, theta2, EP2, sigmadE2, k22, k33, k43, k44; // parameters }; - GPUd() MakeType(const MEM_LG(GPUTPCBaseTrackParam) &) GetParam() const { return mParam; } - GPUd() void SetParam(const MEM_LG(GPUTPCBaseTrackParam) & v) { mParam = v; } + GPUd() const GPUTPCBaseTrackParam& GetParam() const { return mParam; } + GPUd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = v; } GPUd() void InitParam(); GPUd() float X() const { return mParam.X(); } @@ -74,7 +73,7 @@ class GPUTPCTrackParam GPUd() float GetKappa(float Bz) const { return mParam.GetKappa(Bz); } GPUd() float GetCosPhi() const { return mSignCosPhi * CAMath::Sqrt(1 - SinPhi() * SinPhi()); } - GPUhd() MakeType(const float*) Par() const { return mParam.Par(); } + GPUhd() const float* Par() const { return mParam.Par(); } GPUhd() const float* Cov() const { return mParam.Cov(); } GPUd() const float* GetPar() const { return mParam.GetPar(); } @@ -145,8 +144,7 @@ class GPUTPCTrackParam #ifndef GPUCA_GPUCODE private: #endif //! GPUCA_GPUCODE - MEM_LG(GPUTPCBaseTrackParam) - mParam; // Track Parameters + GPUTPCBaseTrackParam mParam; // Track Parameters private: // WARNING, Track Param Data is copied in the GPU Tracklet Constructor element by element instead of using copy constructor!!! @@ -157,8 +155,7 @@ class GPUTPCTrackParam int32_t mNDF; // the Number of Degrees of Freedom }; -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUTPCTrackParam)::InitParam() +GPUdi() void GPUTPCTrackParam::InitParam() { // Initialize Tracklet Parameters using default values SetSinPhi(0); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx index 84bdc52ab6f46..4970ff90a934c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx @@ -22,9 +22,7 @@ #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" -#if !defined(__OPENCL1__) #include "GPUTPCConvertImpl.h" -#endif #if !defined(GPUCA_GPUCODE) #include diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h index da8d3d1fb28d4..bd1ca018186e4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h @@ -33,31 +33,23 @@ namespace gpu { class GPUTPCSliceOutput; struct GPUTPCClusterData; -MEM_CLASS_PRE() struct GPUParam; -MEM_CLASS_PRE() class GPUTPCTrack; -MEM_CLASS_PRE() class GPUTPCTrackParam; -MEM_CLASS_PRE() class GPUTPCRow; -MEM_CLASS_PRE() class GPUTPCTracker : public GPUProcessor { public: #ifndef GPUCA_GPUCODE_DEVICE GPUTPCTracker(); ~GPUTPCTracker(); - GPUTPCTracker(const GPUTPCTracker&) CON_DELETE; - GPUTPCTracker& operator=(const GPUTPCTracker&) CON_DELETE; + GPUTPCTracker(const GPUTPCTracker&) = delete; + GPUTPCTracker& operator=(const GPUTPCTracker&) = delete; - MEM_CLASS_PRE2() void SetSlice(int32_t iSlice); - MEM_CLASS_PRE2() void InitializeProcessor(); - MEM_CLASS_PRE2() - void InitializeRows(const MEM_CONSTANT(GPUParam) * param) { mData.InitializeRows(*param); } + void InitializeRows(const GPUParam* param) { mData.InitializeRows(*param); } int32_t CheckEmptySlice(); void WriteOutputPrepare(); @@ -77,7 +69,6 @@ class GPUTPCTracker : public GPUProcessor GPUAtomic(uint32_t) nextStartHit; // Next Tracklet to process }; - MEM_CLASS_PRE2() struct StructGPUParametersConst { GPUglobalref() char* gpumem; // Base pointer to GPU memory (Needed for OpenCL for verification) }; @@ -94,28 +85,24 @@ class GPUTPCTracker : public GPUProcessor StructGPUParameters gpuParameters; // GPU parameters }; -#if !defined(__OPENCL1__) GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mData.ClusterData(); } - GPUhdi() MakeType(const MEM_LG(GPUTPCRow) &) Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } + GPUhdi() const GPUTPCRow& Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } GPUhdi() GPUglobalref() GPUTPCSliceOutput* Output() const { return mOutput; } -#endif GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { return (mCommonMem); } - MEM_CLASS_PRE2() - GPUdi() static void GetErrors2Seeding(const MEM_CONSTANT(GPUParam) & param, char sector, int32_t iRow, const MEM_LG2(GPUTPCTrackParam) & t, float time, float& ErrY2, float& ErrZ2) + GPUdi() static void GetErrors2Seeding(const GPUParam& param, char sector, int32_t iRow, const GPUTPCTrackParam& t, float time, float& ErrY2, float& ErrZ2) { // param.GetClusterErrors2(sector, iRow, param.GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); param.GetClusterErrorsSeeding2(sector, iRow, param.par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); } - MEM_CLASS_PRE2() - GPUdi() void GetErrors2Seeding(int32_t iRow, const MEM_LG2(GPUTPCTrackParam) & t, float time, float& ErrY2, float& ErrZ2) const + GPUdi() void GetErrors2Seeding(int32_t iRow, const GPUTPCTrackParam& t, float time, float& ErrY2, float& ErrZ2) const { // Param().GetClusterErrors2(mISlice, iRow, Param().GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); Param().GetClusterErrorsSeeding2(mISlice, iRow, Param().par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); @@ -153,13 +140,13 @@ class GPUTPCTracker : public GPUProcessor GPUhd() int32_t ISlice() const { return mISlice; } - GPUhd() GPUconstantref() const MEM_LG(GPUTPCSliceData) & Data() const { return mData; } - GPUhdi() GPUconstantref() MEM_LG(GPUTPCSliceData) & Data() + GPUhd() GPUconstantref() const GPUTPCSliceData& Data() const { return mData; } + GPUhdi() GPUconstantref() GPUTPCSliceData& Data() { return mData; } - GPUhd() GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & Row(int32_t rowIndex) const { return mData.Row(rowIndex); } + GPUhd() GPUglobalref() const GPUTPCRow& Row(int32_t rowIndex) const { return mData.Row(rowIndex); } GPUhd() uint32_t NHitsTotal() const { return mData.NumberOfHits(); } GPUhd() uint32_t NMaxTracklets() const { return mNMaxTracklets; } @@ -169,36 +156,23 @@ class GPUTPCTracker : public GPUProcessor GPUhd() uint32_t NMaxStartHits() const { return mNMaxStartHits; } GPUhd() uint32_t NMaxRowStartHits() const { return mNMaxRowStartHits; } - MEM_TEMPLATE() - GPUd() void SetHitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, calink v) { mData.SetHitLinkUpData(row, hitIndex, v); } - MEM_TEMPLATE() - GPUd() void SetHitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, calink v) { mData.SetHitLinkDownData(row, hitIndex, v); } - MEM_TEMPLATE() - GPUd() calink HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitLinkUpData(row, hitIndex); } - MEM_TEMPLATE() - GPUd() calink HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitLinkDownData(row, hitIndex); } - - MEM_TEMPLATE() - GPUd() GPUglobalref() const cahit2* HitData(const MEM_TYPE(GPUTPCRow) & row) const { return mData.HitData(row); } - MEM_TEMPLATE() - GPUd() GPUglobalref() const calink* HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row) const { return mData.HitLinkUpData(row); } - MEM_TEMPLATE() - GPUd() GPUglobalref() const calink* HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row) const { return mData.HitLinkDownData(row); } - MEM_TEMPLATE() - GPUd() GPUglobalref() const calink* FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row) const { return mData.FirstHitInBin(row); } - - MEM_TEMPLATE() - GPUd() int32_t FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row, int32_t binIndex) const { return mData.FirstHitInBin(row, binIndex); } - - MEM_TEMPLATE() - GPUd() cahit HitDataY(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitDataY(row, hitIndex); } - MEM_TEMPLATE() - GPUd() cahit HitDataZ(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitDataZ(row, hitIndex); } - MEM_TEMPLATE() - GPUd() cahit2 HitData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitData(row, hitIndex); } - - MEM_TEMPLATE() - GPUhd() int32_t HitInputID(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.ClusterDataIndex(row, hitIndex); } + GPUd() void SetHitLinkUpData(const GPUTPCRow& row, int32_t hitIndex, calink v) { mData.SetHitLinkUpData(row, hitIndex, v); } + GPUd() void SetHitLinkDownData(const GPUTPCRow& row, int32_t hitIndex, calink v) { mData.SetHitLinkDownData(row, hitIndex, v); } + GPUd() calink HitLinkUpData(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitLinkUpData(row, hitIndex); } + GPUd() calink HitLinkDownData(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitLinkDownData(row, hitIndex); } + + GPUd() GPUglobalref() const cahit2* HitData(const GPUTPCRow& row) const { return mData.HitData(row); } + GPUd() GPUglobalref() const calink* HitLinkUpData(const GPUTPCRow& row) const { return mData.HitLinkUpData(row); } + GPUd() GPUglobalref() const calink* HitLinkDownData(const GPUTPCRow& row) const { return mData.HitLinkDownData(row); } + GPUd() GPUglobalref() const calink* FirstHitInBin(const GPUTPCRow& row) const { return mData.FirstHitInBin(row); } + + GPUd() int32_t FirstHitInBin(const GPUTPCRow& row, int32_t binIndex) const { return mData.FirstHitInBin(row, binIndex); } + + GPUd() cahit HitDataY(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitDataY(row, hitIndex); } + GPUd() cahit HitDataZ(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitDataZ(row, hitIndex); } + GPUd() cahit2 HitData(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitData(row, hitIndex); } + + GPUhd() int32_t HitInputID(const GPUTPCRow& row, int32_t hitIndex) const { return mData.ClusterDataIndex(row, hitIndex); } /** * The hit weight is used to determine whether a hit belongs to a certain tracklet or another one @@ -218,12 +192,9 @@ class GPUTPCTracker : public GPUProcessor return ((int32_t)weight); // return( (NHits << 16) + num); } - MEM_TEMPLATE() - GPUd() void MaximizeHitWeight(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, int32_t weight) { mData.MaximizeHitWeight(row, hitIndex, weight); } - MEM_TEMPLATE() - GPUd() void SetHitWeight(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, int32_t weight) { mData.SetHitWeight(row, hitIndex, weight); } - MEM_TEMPLATE() - GPUd() int32_t HitWeight(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitWeight(row, hitIndex); } + GPUd() void MaximizeHitWeight(const GPUTPCRow& row, int32_t hitIndex, int32_t weight) { mData.MaximizeHitWeight(row, hitIndex, weight); } + GPUd() void SetHitWeight(const GPUTPCRow& row, int32_t hitIndex, int32_t weight) { mData.SetHitWeight(row, hitIndex, weight); } + GPUd() int32_t HitWeight(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitWeight(row, hitIndex); } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTracklets() const { return &mCommonMem->nTracklets; } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NRowHits() const { return &mCommonMem->nRowHits; } @@ -233,24 +204,23 @@ class GPUTPCTracker : public GPUProcessor GPUhd() GPUglobalref() const GPUTPCHitId* TrackletStartHits() const { return mTrackletStartHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackletStartHits() { return mTrackletStartHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackletTmpStartHits() const { return mTrackletTmpStartHits; } - MEM_CLASS_PRE2() - GPUhd() GPUglobalref() const MEM_LG2(GPUTPCTracklet) & Tracklet(int32_t i) const { return mTracklets[i]; } - GPUhd() GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) * Tracklets() const { return mTracklets; } + GPUhd() GPUglobalref() const GPUTPCTracklet& Tracklet(int32_t i) const { return mTracklets[i]; } + GPUhd() GPUglobalref() GPUTPCTracklet* Tracklets() const { return mTracklets; } GPUhd() GPUglobalref() calink* TrackletRowHits() const { return mTrackletRowHits; } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTracks() const { return &mCommonMem->nTracks; } - GPUhd() GPUglobalref() MEM_GLOBAL(GPUTPCTrack) * Tracks() const { return mTracks; } + GPUhd() GPUglobalref() GPUTPCTrack* Tracks() const { return mTracks; } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTrackHits() const { return &mCommonMem->nTrackHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackHits() const { return mTrackHits; } - GPUhd() GPUglobalref() MEM_GLOBAL(GPUTPCRow) * SliceDataRows() const { return (mData.Rows()); } + GPUhd() GPUglobalref() GPUTPCRow* SliceDataRows() const { return (mData.Rows()); } GPUhd() GPUglobalref() int32_t* RowStartHitCountOffset() const { return (mRowStartHitCountOffset); } GPUhd() GPUglobalref() StructGPUParameters* GPUParameters() const { return (&mCommonMem->gpuParameters); } - GPUhd() MakeType(MEM_LG(StructGPUParametersConst) *) GPUParametersConst() + GPUhd() StructGPUParametersConst* GPUParametersConst() { return (&mGPUParametersConst); } - GPUhd() MakeType(MEM_LG(const StructGPUParametersConst) *) GetGPUParametersConst() const { return (&mGPUParametersConst); } + GPUhd() const StructGPUParametersConst* GetGPUParametersConst() const { return (&mGPUParametersConst); } GPUhd() void SetGPUTextureBase(GPUglobalref() const void* val) { mData.SetGPUTextureBase(val); } struct trackSortData { @@ -272,10 +242,7 @@ class GPUTPCTracker : public GPUProcessor int32_t mISlice; // Number of slice - /** A pointer to the ClusterData object that the SliceData was created from. This can be used to - * merge clusters from inside the SliceTracker code and recreate the SliceData. */ - MEM_LG(GPUTPCSliceData) - mData; // The SliceData object. It is used to encapsulate the storage in memory from the access + GPUTPCSliceData mData; // The SliceData object. It is used to encapsulate the storage in memory from the access uint32_t mNMaxStartHits; uint32_t mNMaxRowStartHits; @@ -297,15 +264,14 @@ class GPUTPCTracker : public GPUProcessor GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits; // Unsorted start hits GPUglobalref() char* mGPUTrackletTemp; // Temp Memory for GPU Tracklet Constructor - MEM_LG(StructGPUParametersConst) - mGPUParametersConst; // Parameters for GPU if this is a GPU tracker + StructGPUParametersConst mGPUParametersConst; // Parameters for GPU if this is a GPU tracker // event GPUglobalref() commonMemoryStruct* mCommonMem; // common event memory GPUglobalref() GPUTPCHitId* mTrackletStartHits; // start hits for the tracklets - GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) * mTracklets; // tracklets + GPUglobalref() GPUTPCTracklet* mTracklets; // tracklets GPUglobalref() calink* mTrackletRowHits; // Hits for each Tracklet in each row - GPUglobalref() MEM_GLOBAL(GPUTPCTrack) * mTracks; // reconstructed tracks + GPUglobalref() GPUTPCTrack* mTracks; // reconstructed tracks GPUglobalref() GPUTPCHitId* mTrackHits; // array of track hit numbers // output diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h index 08ec8d8bf54e7..9190cdb94aa5f 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h @@ -28,7 +28,6 @@ namespace gpu * The class describes the reconstructed TPC track candidate. * The class is dedicated for internal use by the GPUTPCTracker algorithm. */ -MEM_CLASS_PRE() class GPUTPCTracklet { public: @@ -40,20 +39,18 @@ class GPUTPCTracklet GPUhd() int32_t LastRow() const { return mLastRow; } GPUhd() int32_t HitWeight() const { return mHitWeight; } GPUhd() uint32_t FirstHit() const { return mFirstHit; } - GPUhd() MakeType(const MEM_LG(GPUTPCBaseTrackParam) &) Param() const { return mParam; } + GPUhd() const GPUTPCBaseTrackParam& Param() const { return mParam; } GPUhd() void SetFirstRow(int32_t v) { mFirstRow = v; } GPUhd() void SetLastRow(int32_t v) { mLastRow = v; } GPUhd() void SetFirstHit(uint32_t v) { mFirstHit = v; } - MEM_CLASS_PRE2() - GPUhd() void SetParam(const MEM_LG2(GPUTPCBaseTrackParam) & v) { mParam = reinterpret_cast(v); } + GPUhd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = reinterpret_cast(v); } GPUhd() void SetHitWeight(const int32_t w) { mHitWeight = w; } private: int32_t mFirstRow; // first TPC row // TODO: We can use smaller data format here! int32_t mLastRow; // last TPC row - MEM_LG(GPUTPCBaseTrackParam) - mParam; // tracklet parameters + GPUTPCBaseTrackParam mParam; // tracklet parameters int32_t mHitWeight; // Hit Weight of Tracklet uint32_t mFirstHit; // first hit in row hit array }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index 05e75232297a3..e7735b4b2580c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -21,27 +21,23 @@ #include "GPUTPCTracker.h" #include "GPUTPCTracklet.h" #include "GPUTPCTrackletConstructor.h" -#if !defined(__OPENCL1__) #include "GPUTPCGlobalTracking.h" #include "CorrectionMapsHelper.h" #ifdef GPUCA_HAVE_O2HEADERS #include "CalibdEdxContainer.h" #endif // GPUCA_HAVE_O2HEADERS -#endif // OPENCL1 #include "GPUParam.inc" #include "GPUCommonMath.h" using namespace GPUCA_NAMESPACE::gpu; -MEM_CLASS_PRE2() -GPUdii() void GPUTPCTrackletConstructor::InitTracklet(MEM_LG2(GPUTPCTrackParam) & GPUrestrict() tParam) +GPUdii() void GPUTPCTrackletConstructor::InitTracklet(GPUTPCTrackParam& GPUrestrict() tParam) { // Initialize Tracklet Parameters using default values tParam.InitParam(); } -MEM_CLASS_PRE2() -GPUd() bool GPUTPCTrackletConstructor::CheckCov(MEM_LG2(GPUTPCTrackParam) & GPUrestrict() tParam) +GPUd() bool GPUTPCTrackletConstructor::CheckCov(GPUTPCTrackParam& GPUrestrict() tParam) { bool ok = 1; const float* c = tParam.Cov(); @@ -58,8 +54,7 @@ GPUd() bool GPUTPCTrackletConstructor::CheckCov(MEM_LG2(GPUTPCTrackParam) & GPUr return (ok); } -MEM_CLASS_PRE23() -GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() MEM_LG2(GPUTPCTracker) & GPUrestrict() tracker, MEM_LG3(GPUTPCTrackParam) & GPUrestrict() tParam, calink* rowHits) +GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() GPUSharedMemory& s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUTPCTrackParam& GPUrestrict() tParam, calink* rowHits) { // reconstruction of tracklets, tracklet store step const uint32_t nHits = r.mLastRow + 1 - r.mFirstRow; @@ -85,7 +80,7 @@ GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_ return; } - GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) & GPUrestrict() tracklet = tracker.Tracklets()[itrout]; + GPUglobalref() GPUTPCTracklet& GPUrestrict() tracklet = tracker.Tracklets()[itrout]; CADEBUG(printf(" Storing tracklet: %d rows\n", nHits)); @@ -109,8 +104,8 @@ GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_ } } -MEM_CLASS_PRE2_TEMPLATE(class T) -GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() T& s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, MEM_LG2(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t iRow, calink& rowHit, calink* rowHits) +template +GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() T& s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUTPCTrackParam& GPUrestrict() tParam, int32_t iRow, calink& rowHit, calink* rowHits) { // reconstruction of tracklets, tracklets update step CA_MAKE_SHARED_REF(GPUTPCRow, row, tracker.Row(iRow), s.mRows[iRow]); @@ -140,18 +135,14 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, float z = z0 + hh.y * stepZ; if (iRow != r.mStartRow || !tracker.Param().par.continuousTracking) { tParam.ConstrainZ(z, tracker.ISlice(), z0, r.mLastZ); -#if !defined(__OPENCL1__) tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, z); -#endif } if (iRow == r.mStartRow) { if (tracker.Param().par.continuousTracking) { float refZ = ((z > 0) ? tracker.Param().rec.tpc.defaultZOffsetOverR : -tracker.Param().rec.tpc.defaultZOffsetOverR) * x; -#if !defined(__OPENCL1__) float zTmp = refZ; tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, zTmp); z += zTmp - refZ; // Add zCorrection (=zTmp - refZ) to z, such that zOffset is set such, that transformed (z - zOffset) becomes refZ -#endif tParam.SetZOffset(z - refZ); tParam.SetZ(refZ); r.mLastZ = refZ; @@ -266,7 +257,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, r.mNMissed++; float x = row.X(); -#if !defined(__OPENCL1__) { float tmpY, tmpZ; if (!tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { @@ -277,7 +267,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, tParam.ConstrainZ(tmpZ, tracker.ISlice(), z0, r.mLastZ); tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISlice(), iRow, tmpY, tmpZ, x); } -#endif CADEBUG(printf("%14s: SEA TRACK ROW %3d X %8.3f -", "", iRow, tParam.X()); for (int32_t i = 0; i < 5; i++) { printf(" %8.3f", tParam.Par()[i]); } printf(" -"); for (int32_t i = 0; i < 15; i++) { printf(" %8.3f", tParam.Cov()[i]); } printf("\n")); if (!tParam.TransportToX(x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().bzCLight, GPUCA_MAX_SIN_PHI_LOW)) { @@ -299,9 +288,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); #endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR -#if !defined(__OPENCL1__) tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISlice(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); -#endif if (tracker.Param().rec.tpc.rejectEdgeClustersInSeeding && tracker.Param().rejectEdgeClusterByY(yUncorrected, iRow, CAMath::Sqrt(tParam.Err2Y()))) { rowHit = CALINK_INVAL; @@ -310,13 +297,13 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, calink best = CALINK_INVAL; float err2Y, err2Z; - tracker.GetErrors2Seeding(iRow, *((MEM_LG2(GPUTPCTrackParam)*)&tParam), -1.f, err2Y, err2Z); // TODO: Use correct time + tracker.GetErrors2Seeding(iRow, *((GPUTPCTrackParam*)&tParam), -1.f, err2Y, err2Z); // TODO: Use correct time if (r.mNHits >= 10) { const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISlice() >= 18); err2Y += sErr2; err2Z += sErr2; } - if (CAMath::Abs(yUncorrected) < x * MEM_GLOBAL(GPUTPCRow)::getTPCMaxY1X()) { // search for the closest hit + if (CAMath::Abs(yUncorrected) < x * GPUTPCRow::getTPCMaxY1X()) { // search for the closest hit const float kFactor = tracker.Param().rec.tpc.hitPickUpFactor * tracker.Param().rec.tpc.hitPickUpFactor * 7.0f * 7.0f; const float maxWindow2 = tracker.Param().rec.tpc.hitSearchArea2; const float sy2 = CAMath::Min(maxWindow2, kFactor * (tParam.Err2Y() + err2Y)); @@ -391,7 +378,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } while (false); (void)found; -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(__OPENCL1__) +#if defined(GPUCA_HAVE_O2HEADERS) if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected)); if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) { @@ -403,8 +390,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } while (0); } if (r.mNHits == 8 && r.mNMissed == 0 && rowHit != CALINK_INVAL && rowHit != CALINK_DEAD_CHANNEL && rowHits && tracker.Param().par.continuousTracking && rowHits[r.mFirstRow] != CALINK_INVAL && rowHits[r.mFirstRow] != CALINK_DEAD_CHANNEL && rowHits[r.mLastRow] != CALINK_INVAL && rowHits[r.mLastRow] != CALINK_DEAD_CHANNEL) { - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row1 = tracker.Row(r.mFirstRow); - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row2 = tracker.Row(r.mLastRow); + const GPUglobalref() GPUTPCRow& GPUrestrict() row1 = tracker.Row(r.mFirstRow); + const GPUglobalref() GPUTPCRow& GPUrestrict() row2 = tracker.Row(r.mLastRow); GPUglobalref() const cahit2* hits1 = tracker.HitData(row1); GPUglobalref() const cahit2* hits2 = tracker.HitData(row2); const cahit2 hh1 = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits1, rowHits[r.mFirstRow]); @@ -418,11 +405,10 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } -GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCTrackletConstructor::MEM_LOCAL(GPUSharedMemory) & s, GPUTPCThreadMemory& GPUrestrict() r) +GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCTrackletConstructor::GPUSharedMemory& s, GPUTPCThreadMemory& GPUrestrict() r) { int32_t iRow = 0, iRowEnd = GPUCA_ROW_COUNT; - MEM_PLAIN(GPUTPCTrackParam) - tParam; + GPUTPCTrackParam tParam; calink rowHits[GPUCA_ROW_COUNT]; if (r.mGo) { GPUTPCHitId id = tracker.TrackletStartHits()[r.mISH]; @@ -461,7 +447,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLO iRow = r.mEndRow; iRowEnd = -1; float x = tracker.Row(r.mEndRow).X(); -#if !defined(__OPENCL1__) { float tmpY, tmpZ; if (tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { @@ -476,7 +461,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLO continue; } } -#endif if ((r.mGo = (tParam.TransportToX(x, tracker.Param().bzCLight, GPUCA_MAX_SIN_PHI) && tParam.Filter(r.mLastY, r.mLastZ, tParam.Err2Y() * 0.5f, tParam.Err2Z() * 0.5f, GPUCA_MAX_SIN_PHI_LOW, true)))) { CADEBUG(printf("%14s: SEA BACK ROW %3d X %8.3f -", "", iRow, tParam.X()); for (int32_t i = 0; i < 5; i++) { printf(" %8.3f", tParam.Par()[i]); } printf(" -"); for (int32_t i = 0; i < 15; i++) { printf(" %8.3f", tParam.Cov()[i]); } printf("\n")); float err2Y, err2Z; @@ -496,12 +480,12 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLO } template <> -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) { if (get_local_id(0) == 0) { sMem.mNStartHits = *tracker.NStartHits(); } - CA_SHARED_CACHE(&sMem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(MEM_PLAIN(GPUTPCRow))); + CA_SHARED_CACHE(&sMem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); GPUTPCThreadMemory rMem; @@ -512,9 +496,9 @@ GPUdii() void GPUTPCTrackletConstructor::Thread -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& GPUrestrict() tracker0) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) { - GPUconstantref() MEM_GLOBAL(GPUTPCTracker) * GPUrestrict() pTracker = &tracker0; + GPUconstantref() GPUTPCTracker* GPUrestrict() pTracker = &tracker0; #ifdef GPUCA_GPUCODE int32_t mySlice = get_group_id(0) % GPUCA_NSLICES; int32_t currentSlice = -1; @@ -524,7 +508,7 @@ GPUdii() void GPUTPCTrackletConstructor::Thread // FIXME: GPUgeneric() needed to make the clang spirv output link correctly -GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, MEM_LG(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) +GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, GPUTPCTrackParam& GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { GPUTPCThreadMemory rMem; rMem.mISH = iTracklet; @@ -602,4 +585,3 @@ GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTrackin } return (rMem.mNHits); } -#endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h index effee4fa757b8..b1ef74b9896c1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h @@ -28,7 +28,6 @@ namespace gpu * @class GPUTPCTrackletConstructor * */ -MEM_CLASS_PRE() class GPUTPCTracker; class GPUTPCTrackletConstructor @@ -69,9 +68,8 @@ class GPUTPCTrackletConstructor float mLastZ; // Z of the last fitted cluster }; - MEM_CLASS_PRE() struct GPUSharedMemory { - CA_SHARED_STORAGE(MEM_LG(GPUTPCRow) mRows[GPUCA_ROW_COUNT]); // rows + CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? @@ -82,38 +80,32 @@ class GPUTPCTrackletConstructor #endif // GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE }; - MEM_CLASS_PRE2() - GPUd() static void InitTracklet(MEM_LG2(GPUTPCTrackParam) & tParam); + GPUd() static void InitTracklet(GPUTPCTrackParam& tParam); - MEM_CLASS_PRE2_TEMPLATE(class T) - GPUd() static void UpdateTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() T& s, GPUTPCThreadMemory& r, GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, MEM_LG2(GPUTPCTrackParam) & tParam, int32_t iRow, calink& rowHit, calink* rowHits); + template + GPUd() static void UpdateTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() T& s, GPUTPCThreadMemory& r, GPUconstantref() GPUTPCTracker& tracker, GPUTPCTrackParam& tParam, int32_t iRow, calink& rowHit, calink* rowHits); - MEM_CLASS_PRE23() - GPUd() static void StoreTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, GPUTPCThreadMemory& r, GPUconstantref() MEM_LG2(GPUTPCTracker) & tracker, MEM_LG3(GPUTPCTrackParam) & tParam, calink* rowHits); + GPUd() static void StoreTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, GPUTPCThreadMemory& r, GPUconstantref() GPUTPCTracker& tracker, GPUTPCTrackParam& tParam, calink* rowHits); - MEM_CLASS_PRE2() - GPUd() static bool CheckCov(MEM_LG2(GPUTPCTrackParam) & tParam); + GPUd() static bool CheckCov(GPUTPCTrackParam& tParam); - GPUd() static void DoTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() GPUTPCTrackletConstructor::MEM_LOCAL(GPUSharedMemory) & sMem, GPUTPCThreadMemory& rMem); + GPUd() static void DoTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUTPCTrackletConstructor::GPUSharedMemory& sMem, GPUTPCThreadMemory& rMem); #ifdef GPUCA_GPUCODE - GPUd() static int32_t FetchTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem); + GPUd() static int32_t FetchTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& sMem); #endif // GPUCA_GPUCODE -#if !defined(__OPENCL1__) template - GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); -#endif + GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx index d3da504ab4ec0..b8cbbae06e8b0 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx @@ -22,7 +22,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // select best tracklets and kill clones @@ -39,7 +39,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread for (int32_t itr = s.mItr0 + iThread; itr < s.mNTracklets; itr += s.mNThreadsTotal) { GPUbarrierWarp(); - GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) & GPUrestrict() tracklet = tracker.Tracklets()[itr]; + GPUglobalref() GPUTPCTracklet& GPUrestrict() tracklet = tracker.Tracklets()[itr]; int32_t firstRow = tracklet.FirstRow(); int32_t lastRow = tracklet.LastRow(); @@ -62,7 +62,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread gap++; } if (ih != CALINK_INVAL && ih != CALINK_DEAD_CHANNEL) { - GPUglobalref() const MEM_GLOBAL(GPUTPCRow)& row = tracker.Row(irow); + GPUglobalref() const GPUTPCRow& row = tracker.Row(irow); bool own = (tracker.HitWeight(row, ih) <= w); bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h index bae1cbe2bb876..af13b30022e6f 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,7 +33,6 @@ class GPUTPCTracker; class GPUTPCTrackletSelector : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mItr0; // index of the first track in the block int32_t mNThreadsTotal; // total n threads @@ -45,15 +43,14 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate #endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 09069ba1d104d..8bef787b85e8e 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -265,8 +265,8 @@ int32_t ReadConfiguration(int argc, char** argv) configStandalone.gpuType = "CUDA"; } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::HIP, configStandalone.proc.debugLevel >= 2)) { configStandalone.gpuType = "HIP"; - } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL2, configStandalone.proc.debugLevel >= 2)) { - configStandalone.gpuType = "OCL2"; + } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) { + configStandalone.gpuType = "OCL"; } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) { configStandalone.gpuType = "OCL"; } else { diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index fd5f812facf06..1857d77c11b1a 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -145,7 +145,6 @@ include_directories(${GPU_DIR}/Common ${GPUTRACKING_DIR}/Definitions ${GPUTRACKING_DIR}/DataTypes ${GPUTRACKING_DIR}/Base - ${GPUTRACKING_DIR}/Base/opencl-common ${GPUTRACKING_DIR}/dEdx ${GPUTRACKING_DIR}/TPCConvert ${GPUTRACKING_DIR}/Global diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 29f908c538af7..d8ba50bfee804 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -14,8 +14,7 @@ set(ENABLE_CUDA AUTO) set(ENABLE_HIP AUTO) -set(ENABLE_OPENCL1 AUTO) -set(ENABLE_OPENCL2 AUTO) +set(ENABLE_OPENCL AUTO) set(CONFIG_OPENMP 1) set(GPUCA_CONFIG_VC 1) set(GPUCA_CONFIG_FMT 1) diff --git a/GPU/GPUTracking/Standalone/cmake/makefile b/GPU/GPUTracking/Standalone/cmake/makefile index 87de4d66bb8c8..f2c38b0deee8e 100644 --- a/GPU/GPUTracking/Standalone/cmake/makefile +++ b/GPU/GPUTracking/Standalone/cmake/makefile @@ -4,4 +4,4 @@ all: clean: +$(MAKE) -C build clean - rm -f ca *.so + rm -f ca *.so *.pcm *.rootmap diff --git a/GPU/GPUTracking/Standalone/tools/testCL.sh b/GPU/GPUTracking/Standalone/tools/testCL.sh deleted file mode 100755 index 0257830d8942d..0000000000000 --- a/GPU/GPUTracking/Standalone/tools/testCL.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -COMPILER=clang++ -LLVM_SPIRV=llvm-spirv - -echo "Testing using clang `which clang++`, spirv `which llvm-spirv`" - -#COMPILER=/usr/lib/llvm/roc-2.1.0/bin/clang++ -#COMPILER=/usr/lib/llvm/9/bin/clang++ - -#COMPILER=/home/qon/alice/llvm-project/build/bin/clang++ -#LLVM_SPIRV=/home/qon/alice/llvm-project/build/bin/llvm-spirv - -O2_DIR=${HOME}/alice/O2 -GPU_DIR=${HOME}/alice/O2/GPU/GPUTracking - -INCLUDES="-I${GPU_DIR}/. -I${GPU_DIR}/DataTypes -I${GPU_DIR}/Definitions -I${GPU_DIR}/Base -I${GPU_DIR}/SliceTracker -I${O2_DIR}/GPU/Common -I${GPU_DIR}/Merger -I${GPU_DIR}/Refit -I${GPU_DIR}/TRDTracking -I${GPU_DIR}/ITS -I${GPU_DIR}/dEdx \ - -I${GPU_DIR}/TPCConvert -I${O2_DIR}/GPU/TPCFastTransformation -I${GPU_DIR}/DataCompression -I${GPU_DIR}/TPCClusterFinder -I${GPU_DIR}/Global -I ${O2_DIR}/GPU/Utils \ - -I${O2_DIR}/DataFormats/Detectors/TPC/include -I${O2_DIR}/Detectors/Base/include -I${O2_DIR}/Detectors/Base/src -I${O2_DIR}/Common/MathUtils/include -I${O2_DIR}/DataFormats/Headers/include \ - -I${O2_DIR}/Detectors/TRD/base/include -I${O2_DIR}/Detectors/TRD/base/src -I${O2_DIR}/Detectors/ITSMFT/ITS/tracking/include -I${O2_DIR}/Detectors/ITSMFT/ITS/tracking/cuda/include -I${O2_DIR}/Common/Constants/include \ - -I${O2_DIR}/DataFormats/common/include -I${O2_DIR}/DataFormats/Detectors/Common/include -I${O2_DIR}/DataFormats/Detectors/TRD/include -I${O2_DIR}/DataFormats/Reconstruction/include -I${O2_DIR}/DataFormats/Reconstruction/src \ - -I${O2_DIR}/Detectors/Raw/include" -DEFINES="-DGPUCA_STANDALONE -DNDEBUG -D__OPENCLCPP__ -DGPUCA_HAVE_O2HEADERS -DGPUCA_TPC_GEOMETRY_O2" -FLAGS="-Xclang -fdenormal-fp-math-f32=ieee -cl-mad-enable -cl-no-signed-zeros -ferror-limit=1000 -Dcl_clang_storage_class_specifiers" - -echo Test1 - Preprocess -echo $COMPILER -cl-std=clc++ -x cl $INCLUDES $DEFINES -Dcl_clang_storage_class_specifiers -cl-no-stdinc -E ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl > test.cl - $COMPILER -cl-std=clc++ -x cl $INCLUDES $DEFINES -Dcl_clang_storage_class_specifiers -cl-no-stdinc -E ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl > test.cl -if [ $? != 0 ]; then exit 1; fi -echo Test 1A - Compile Preprocessed -echo $COMPILER -cl-std=clc++ -x cl -emit-llvm --target=spir64-unknown-unknown $FLAGS -c test.cl -o test.bc - $COMPILER -cl-std=clc++ -x cl -emit-llvm --target=spir64-unknown-unknown $FLAGS -c test.cl -o test.bc - -echo -echo Test2 - SPIR-V -echo $COMPILER -O0 -cl-std=clc++ -x cl -emit-llvm --target=spir64-unknown-unknown $FLAGS $INCLUDES $DEFINES -c ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl -o test.bc - $COMPILER -O0 -cl-std=clc++ -x cl -emit-llvm --target=spir64-unknown-unknown $FLAGS $INCLUDES $DEFINES -c ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl -o test.bc -if [ $? != 0 ]; then exit 1; fi -echo $LLVM_SPIRV test.bc -o test.spirv - $LLVM_SPIRV test.bc -o test.spirv -if [ $? != 0 ]; then exit 1; fi - -echo -echo Test3 - amdgcn -echo $COMPILER -O3 -cl-std=clc++ -x cl --target=amdgcn-amd-amdhsa -mcpu=gfx906 $FLAGS $INCLUDES $DEFINES -c ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl -o test.o - $COMPILER -O3 -cl-std=clc++ -x cl --target=amdgcn-amd-amdhsa -mcpu=gfx906 $FLAGS $INCLUDES $DEFINES -c ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl -o test.o -if [ $? != 0 ]; then exit 1; fi - -echo -echo Test4 - Clang OCL -echo clang-ocl -O3 -cl-std=clc++ -mcpu=gfx906 $FLAGS $INCLUDES $DEFINES -o test-clang-ocl.o ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl - clang-ocl -O3 -cl-std=clc++ -mcpu=gfx906 $FLAGS $INCLUDES $DEFINES -o test-clang-ocl.o ${GPU_DIR}/Base/opencl-common/GPUReconstructionOCL.cl -rm -f test-clang-ocl.o.* -if [ $? != 0 ]; then exit 1; fi diff --git a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h index ae95bfdc61358..48e3cbf6200fe 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h @@ -41,7 +41,7 @@ struct CfFragment { tpccf::TPCTime totalSliceLength = 0; tpccf::TPCFragmentTime maxSubSliceLength = 0; - GPUdDefault() CfFragment() CON_DEFAULT; + GPUdDefault() CfFragment() = default; GPUd() CfFragment(tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen) : CfFragment(0, false, 0, totalSliceLen, maxSubSliceLen) {} diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h index f5ca9dbedd5ac..f852212d180f8 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h @@ -28,7 +28,7 @@ struct ChargePos { tpccf::GlobalPad gpad; tpccf::TPCFragmentTime timePadded; - GPUdDefault() ChargePos() CON_DEFAULT; + GPUdDefault() ChargePos() = default; constexpr GPUhdi() ChargePos(tpccf::Row row, tpccf::Pad pad, tpccf::TPCFragmentTime t) : gpad(tpcGlobalPadIdx(row, pad)), timePadded(t + GPUCF_PADDING_TIME) diff --git a/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h b/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h index 644e2074d92ca..9d13f431adc2b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h +++ b/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h @@ -38,7 +38,7 @@ class PackedCharge IsSplitMask = 1 << (ChargeBits + 1), }; - GPUdDefault() PackedCharge() CON_DEFAULT; + GPUdDefault() PackedCharge() = default; GPUdi() explicit PackedCharge(tpccf::Charge q) : PackedCharge(q, false, false) {} GPUdi() PackedCharge(tpccf::Charge q, bool peak3x3, bool wasSplit) { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index 8436f584e59f5..f0ae1bde58334 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -51,7 +51,7 @@ class trackInterface : public AliExternalTrackParam public: trackInterface() : AliExternalTrackParam(){}; trackInterface(const trackInterface& param) : AliExternalTrackParam(param){}; - trackInterface(const AliExternalTrackParam& param) CON_DELETE; + trackInterface(const AliExternalTrackParam& param) = delete; trackInterface(const AliHLTExternalTrackParam& param) : AliExternalTrackParam() { float paramTmp[5] = {param.fY, param.fZ, param.fSinPhi, param.fTgl, param.fq1Pt}; @@ -102,8 +102,8 @@ class propagatorInterface : public AliTrackerBase public: typedef void propagatorParam; propagatorInterface(const propagatorParam* = nullptr) : AliTrackerBase(), mParam(nullptr){}; - propagatorInterface(const propagatorInterface&) CON_DELETE; - propagatorInterface& operator=(const propagatorInterface&) CON_DELETE; + propagatorInterface(const propagatorInterface&) = delete; + propagatorInterface& operator=(const propagatorInterface&) = delete; bool propagateToX(float x, float maxSnp, float maxStep) { return PropagateTrackToBxByBz(mParam, x, 0.13957f, maxStep, false, maxSnp); } int32_t getPropagatedYZ(float x, float& projY, float& projZ) @@ -207,8 +207,8 @@ template <> class trackInterface : public GPUTPCGMTrackParam { public: - GPUdDefault() trackInterface() CON_DEFAULT; - GPUd() trackInterface(const GPUTPCGMTrackParam& param) CON_DELETE; + GPUdDefault() trackInterface() = default; + GPUd() trackInterface(const GPUTPCGMTrackParam& param) = delete; GPUd() trackInterface(const GPUTPCGMMergedTrack& trk) : GPUTPCGMTrackParam(trk.GetParam()), mAlpha(trk.GetAlpha()) {} GPUd() trackInterface(const gputpcgmmergertypes::GPUTPCOuterParam& param) : GPUTPCGMTrackParam(), mAlpha(param.alpha) { @@ -220,10 +220,8 @@ class trackInterface : public GPUTPCGMTrackParam SetCov(i, param.C[i]); } }; -#ifdef GPUCA_NOCOMPAT GPUdDefault() trackInterface(const trackInterface& param) = default; GPUdDefault() trackInterface& operator=(const trackInterface& param) = default; -#endif #ifdef GPUCA_ALIROOT_LIB trackInterface(const AliHLTExternalTrackParam& param) : GPUTPCGMTrackParam(), mAlpha(param.fAlpha) { @@ -319,8 +317,8 @@ class propagatorInterface : public GPUTPCGMPropagator this->SetFitInProjections(0); this->SelectFieldRegion(GPUTPCGMPropagator::TRD); }; - propagatorInterface(const propagatorInterface&) CON_DELETE; - propagatorInterface& operator=(const propagatorInterface&) CON_DELETE; + propagatorInterface(const propagatorInterface&) = delete; + propagatorInterface& operator=(const propagatorInterface&) = delete; GPUd() void setTrack(trackInterface* trk) { SetTrack(trk, trk->getAlpha()); diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h index 1af4812e5b23f..23e26d8354343 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h @@ -60,9 +60,7 @@ class GPUTRDSpacePoint : public o2::trd::CalibratedTracklet { }; -#ifdef GPUCA_NOCOMPAT static_assert(sizeof(GPUTRDSpacePoint) == sizeof(o2::trd::CalibratedTracklet), "Incorrect memory layout"); -#endif } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 3d387d3694fe5..96a5547ad28de 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -54,8 +54,8 @@ class GPUTRDTracker_t : public GPUProcessor public: #ifndef GPUCA_GPUCODE GPUTRDTracker_t(); - GPUTRDTracker_t(const GPUTRDTracker_t& tracker) CON_DELETE; - GPUTRDTracker_t& operator=(const GPUTRDTracker_t& tracker) CON_DELETE; + GPUTRDTracker_t(const GPUTRDTracker_t& tracker) = delete; + GPUTRDTracker_t& operator=(const GPUTRDTracker_t& tracker) = delete; ~GPUTRDTracker_t(); void SetMaxData(const GPUTrackingInOutPointers& io); @@ -194,7 +194,7 @@ class GPUTRDTracker_t : public GPUProcessor float mAngleToDyC; // parameterization for conversion track angle -> tracklet deflection /// ---- end error parametrization ---- bool mDebugOutput; // store debug output - static CONSTEXPR const float sRadialOffset GPUCA_CPP11_INIT(= -0.1f); // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker + static CONSTEXPR const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker float mMaxEta; // TPC tracks with higher eta are ignored float mRoadZ; // in z, a constant search road is used float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index 2b6c058323704..542700b7fe355 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -34,9 +34,9 @@ class GPUTRDTrackletWord public: GPUd() GPUTRDTrackletWord(uint32_t trackletWord = 0); GPUd() GPUTRDTrackletWord(uint32_t trackletWord, int32_t hcid); - GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) CON_DEFAULT; - GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) CON_DEFAULT; - GPUdDefault() ~GPUTRDTrackletWord() CON_DEFAULT; + GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default; + GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default; + GPUdDefault() ~GPUTRDTrackletWord() = default; #ifndef GPUCA_GPUCODE_DEVICE GPUTRDTrackletWord(const AliTRDtrackletWord& rhs); GPUTRDTrackletWord(const AliTRDtrackletMCM& rhs); @@ -88,9 +88,9 @@ class GPUTRDTrackletWord : private o2::trd::Tracklet64 { public: GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord){}; - GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) CON_DEFAULT; - GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) CON_DEFAULT; - GPUdDefault() ~GPUTRDTrackletWord() CON_DEFAULT; + GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default; + GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default; + GPUdDefault() ~GPUTRDTrackletWord() = default; // ----- Override operators < and > to enable tracklet sorting by HCId ----- GPUd() bool operator<(const GPUTRDTrackletWord& t) const { return (getHCID() < t.getHCID()); } @@ -106,9 +106,7 @@ class GPUTRDTrackletWord : private o2::trd::Tracklet64 // IMPORTANT: Do not add members, this class must keep the same memory layout as o2::trd::Tracklet64 }; -#ifdef GPUCA_NOCOMPAT static_assert(sizeof(GPUTRDTrackletWord) == sizeof(o2::trd::Tracklet64), "Incorrect memory layout"); -#endif } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 42fd6b3d2402a..1667ad867a9e7 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -52,8 +52,8 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) endif() set(TMP_PRE "") set(TMP_POST "") - if(NOT kernel_bounds MATCHES "_OCL1") - set(TMP_PRE "#ifdef GPUCA_KRNL_NOOCL1\n") + if(NOT kernel_bounds MATCHES "_ALIR") + set(TMP_PRE "#ifdef GPUCA_KRNL_NOALIROOT\n") set(TMP_POST "#endif\n") endif() set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_type}), (${OPT1}), (${OPT2}), (${OPT3}))\n") diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 516d1fced0a20..8c042d51514c4 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -20,7 +20,7 @@ #include "GPUCommonMath.h" #include "GPUParam.h" #include "GPUdEdxInfo.h" -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(GPUCA_OPENCL1) +#if defined(GPUCA_HAVE_O2HEADERS) #include "DataFormatsTPC/Defs.h" #include "CalibdEdxContainer.h" #include "GPUDebugStreamer.h" @@ -30,7 +30,7 @@ namespace GPUCA_NAMESPACE { namespace gpu { -#if !defined(GPUCA_HAVE_O2HEADERS) || defined(GPUCA_OPENCL1) +#if !defined(GPUCA_HAVE_O2HEADERS) class GPUdEdx { @@ -212,7 +212,7 @@ GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestri mNSubThresh++; } -#endif // !GPUCA_HAVE_O2HEADERS || GPUCA_OPENCL1 +#endif // !GPUCA_HAVE_O2HEADERS } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index f028c6990f267..bfa738201b637 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -33,16 +33,16 @@ o2_gpu_kernel_file_list(MATLUT) o2_gpu_kernel_file_list(TPCMERGER) endif() -o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB_OCL1 both) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking TPCTRACKER" NO_OCL1 single int32_t n) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB_ALIR both) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO_ALIR "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO_ALIR "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking TPCTRACKER" NO_ALIR single int32_t n) o2_gpu_add_kernel("GPUTPCGlobalTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) o2_gpu_add_kernel("GPUTPCCreateSliceData" "= TPCTRACKER TPCSLICEDATA" LB single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index 05e90c9208542..9730f6c6c234f 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -15,7 +15,7 @@ #ifndef Q_BITFIELD_H #define Q_BITFIELD_H -#if !defined(GPUCA_NOCOMPAT_ALLOPENCL) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) +#if !defined(GPUCA_GPUCODE_DEVICE) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) #include #endif @@ -92,7 +92,7 @@ class bitfield return retVal; } -#if defined(GPUCA_NOCOMPAT_ALLOPENCL) && !defined(GPUCA_GPUCODE_DEVICE) +#if !defined(GPUCA_GPUCODE_DEVICE) static_assert(std::is_integral::value, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif diff --git a/GPU/GPUTracking/utils/qconfig.h b/GPU/GPUTracking/utils/qconfig.h index d19ad370acc7d..79a9bd757b531 100644 --- a/GPU/GPUTracking/utils/qconfig.h +++ b/GPU/GPUTracking/utils/qconfig.h @@ -321,7 +321,7 @@ enum qConfigRetVal { qcrOK = 0, #endif #define AddOptionSet(name, type, value, optname, optnameshort, help, ...) #define AddSubConfig(name, instance) name instance; -#if !defined(QCONFIG_GENRTC) && defined(GPUCA_NOCOMPAT) && !defined(GPUCA_GPUCODE_DEVICE) +#if !defined(QCONFIG_GENRTC) && !defined(GPUCA_GPUCODE_DEVICE) #define BeginConfig(name, instance) \ struct name { \ bool operator==(const name&) const = default; diff --git a/GPU/TPCFastTransformation/ChebyshevFit1D.h b/GPU/TPCFastTransformation/ChebyshevFit1D.h index 1378563b4d8f3..f1726ce063f64 100644 --- a/GPU/TPCFastTransformation/ChebyshevFit1D.h +++ b/GPU/TPCFastTransformation/ChebyshevFit1D.h @@ -41,7 +41,7 @@ class ChebyshevFit1D reset(order, xMin, xMax); } - ~ChebyshevFit1D() CON_DEFAULT; + ~ChebyshevFit1D() = default; void reset(int32_t order, double xMin, double xMax); diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.h b/GPU/TPCFastTransformation/CorrectionMapsHelper.h index 7a35077f04aef..100f871f2ec63 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsHelper.h +++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.h @@ -124,7 +124,7 @@ class CorrectionMapsHelper void setUpdatedMapMShape() { mUpdatedFlags |= UpdateFlags::MapMShapeBit; } void setUpdatedLumi() { mUpdatedFlags |= UpdateFlags::LumiBit; } -#if !defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_NOCOMPAT) +#if !defined(GPUCA_GPUCODE_DEVICE) void setCorrMap(std::unique_ptr&& m); void setCorrMapRef(std::unique_ptr&& m); void setCorrMapMShape(std::unique_ptr&& m); diff --git a/GPU/TPCFastTransformation/MultivariatePolynomial.h b/GPU/TPCFastTransformation/MultivariatePolynomial.h index 77deff08782d5..8da69b6978134 100644 --- a/GPU/TPCFastTransformation/MultivariatePolynomial.h +++ b/GPU/TPCFastTransformation/MultivariatePolynomial.h @@ -62,11 +62,11 @@ class MultivariatePolynomial : public FlatObject, public MultivariatePolynomialH } #else /// default constructor - MultivariatePolynomial() CON_DEFAULT; + MultivariatePolynomial() = default; #endif /// default destructor - ~MultivariatePolynomial() CON_DEFAULT; + ~MultivariatePolynomial() = default; /// Copy constructor MultivariatePolynomial(const MultivariatePolynomial& obj) { this->cloneFromObject(obj, nullptr); } diff --git a/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h b/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h index e4518efea37c6..69c2052f83138 100644 --- a/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h +++ b/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h @@ -163,10 +163,10 @@ class MultivariatePolynomialHelper<0, 0, false> : public MultivariatePolynomialP MultivariatePolynomialHelper(const uint32_t nDim, const uint32_t degree, const bool interactionOnly) : mDim{nDim}, mDegree{degree}, mInteractionOnly{interactionOnly} { assert(mDegree <= FMaxdegree); }; /// default constructor - MultivariatePolynomialHelper() CON_DEFAULT; + MultivariatePolynomialHelper() = default; /// Destructor - ~MultivariatePolynomialHelper() CON_DEFAULT; + ~MultivariatePolynomialHelper() = default; /// printing the formula of the polynomial void print() const; diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h index 9498645b76220..596c915950948 100644 --- a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h +++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h @@ -91,10 +91,10 @@ class NDPiecewisePolynomials : public FlatObject }; #endif // !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// default constructor - NDPiecewisePolynomials() CON_DEFAULT; + NDPiecewisePolynomials() = default; /// default destructor - ~NDPiecewisePolynomials() CON_DEFAULT; + ~NDPiecewisePolynomials() = default; /// Copy constructor NDPiecewisePolynomials(const NDPiecewisePolynomials& obj) { cloneFromObject(obj, nullptr); } diff --git a/GPU/TPCFastTransformation/Spline.h b/GPU/TPCFastTransformation/Spline.h index 51d9970028f22..9b514c984785d 100644 --- a/GPU/TPCFastTransformation/Spline.h +++ b/GPU/TPCFastTransformation/Spline.h @@ -95,8 +95,8 @@ class Spline } #else /// Disable constructors for the GPU implementation - Spline() CON_DELETE; - Spline(const Spline&) CON_DELETE; + Spline() = delete; + Spline(const Spline&) = delete; #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) diff --git a/GPU/TPCFastTransformation/Spline1D.h b/GPU/TPCFastTransformation/Spline1D.h index c977e0bbaee35..62c6f82ad166a 100644 --- a/GPU/TPCFastTransformation/Spline1D.h +++ b/GPU/TPCFastTransformation/Spline1D.h @@ -161,8 +161,8 @@ class Spline1D } #else /// Disable constructors for the GPU implementation - Spline1D() CON_DELETE; - Spline1D(const Spline1D&) CON_DELETE; + Spline1D() = delete; + Spline1D(const Spline1D&) = delete; #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) diff --git a/GPU/TPCFastTransformation/Spline1DHelper.h b/GPU/TPCFastTransformation/Spline1DHelper.h index e8388d68a6e05..31a100d28c319 100644 --- a/GPU/TPCFastTransformation/Spline1DHelper.h +++ b/GPU/TPCFastTransformation/Spline1DHelper.h @@ -41,13 +41,13 @@ class Spline1DHelper Spline1DHelper(); /// Copy constructor: disabled - Spline1DHelper(const Spline1DHelper&) CON_DEFAULT; + Spline1DHelper(const Spline1DHelper&) = default; /// Assignment operator: disabled - Spline1DHelper& operator=(const Spline1DHelper&) CON_DEFAULT; + Spline1DHelper& operator=(const Spline1DHelper&) = default; /// Destructor - ~Spline1DHelper() CON_DEFAULT; + ~Spline1DHelper() = default; /// _______________ Main functionality ________________________ diff --git a/GPU/TPCFastTransformation/Spline1DHelperOld.h b/GPU/TPCFastTransformation/Spline1DHelperOld.h index eaf2f185aa23c..013b4974c8c60 100644 --- a/GPU/TPCFastTransformation/Spline1DHelperOld.h +++ b/GPU/TPCFastTransformation/Spline1DHelperOld.h @@ -56,13 +56,13 @@ class Spline1DHelperOld Spline1DHelperOld(); /// Copy constructor: disabled - Spline1DHelperOld(const Spline1DHelperOld&) CON_DEFAULT; + Spline1DHelperOld(const Spline1DHelperOld&) = default; /// Assignment operator: disabled - Spline1DHelperOld& operator=(const Spline1DHelperOld&) CON_DEFAULT; + Spline1DHelperOld& operator=(const Spline1DHelperOld&) = default; /// Destructor - ~Spline1DHelperOld() CON_DEFAULT; + ~Spline1DHelperOld() = default; /// _______________ Main functionality ________________________ diff --git a/GPU/TPCFastTransformation/Spline1DSpec.h b/GPU/TPCFastTransformation/Spline1DSpec.h index f8af1980d81ae..65223d551e2ee 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.h +++ b/GPU/TPCFastTransformation/Spline1DSpec.h @@ -66,13 +66,13 @@ class Spline1DContainer : public FlatObject /// _____________ C++ constructors / destructors __________________________ /// Default constructor, required by the Root IO - Spline1DContainer() CON_DEFAULT; + Spline1DContainer() = default; /// Disable all other constructors - Spline1DContainer(const Spline1DContainer&) CON_DELETE; + Spline1DContainer(const Spline1DContainer&) = delete; /// Destructor - ~Spline1DContainer() CON_DEFAULT; + ~Spline1DContainer() = default; /// _______________ Construction interface ________________________ diff --git a/GPU/TPCFastTransformation/Spline2D.h b/GPU/TPCFastTransformation/Spline2D.h index cbbd91c1d2b8d..ccd68b029c0f1 100644 --- a/GPU/TPCFastTransformation/Spline2D.h +++ b/GPU/TPCFastTransformation/Spline2D.h @@ -22,7 +22,7 @@ #include "FlatObject.h" #include "GPUCommonDef.h" -#if !defined(__CINT__) && !defined(__ROOTCINT__) && !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L #include #include #endif @@ -92,8 +92,8 @@ class Spline2D } #else /// Disable constructors for the GPU implementation - Spline2D() CON_DELETE; - Spline2D(const Spline2D&) CON_DELETE; + Spline2D() = delete; + Spline2D(const Spline2D&) = delete; #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) diff --git a/GPU/TPCFastTransformation/Spline2DHelper.h b/GPU/TPCFastTransformation/Spline2DHelper.h index dc509dc33ea57..19630adacd581 100644 --- a/GPU/TPCFastTransformation/Spline2DHelper.h +++ b/GPU/TPCFastTransformation/Spline2DHelper.h @@ -46,13 +46,13 @@ class Spline2DHelper Spline2DHelper(); /// Copy constructor: disabled - Spline2DHelper(const Spline2DHelper&) CON_DELETE; + Spline2DHelper(const Spline2DHelper&) = delete; /// Assignment operator: disabled - Spline2DHelper& operator=(const Spline2DHelper&) CON_DELETE; + Spline2DHelper& operator=(const Spline2DHelper&) = delete; /// Destructor - ~Spline2DHelper() CON_DEFAULT; + ~Spline2DHelper() = default; /// _______________ Main functionality ________________________ diff --git a/GPU/TPCFastTransformation/Spline2DSpec.h b/GPU/TPCFastTransformation/Spline2DSpec.h index b0f70752d81cf..ab8c3cb39754d 100644 --- a/GPU/TPCFastTransformation/Spline2DSpec.h +++ b/GPU/TPCFastTransformation/Spline2DSpec.h @@ -22,7 +22,7 @@ #include "GPUCommonDef.h" #include "SplineUtil.h" -#if !defined(__CINT__) && !defined(__ROOTCINT__) && !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L #include #include #endif @@ -58,13 +58,13 @@ class Spline2DContainer : public FlatObject /// _____________ C++ constructors / destructors __________________________ /// Default constructor - Spline2DContainer() CON_DEFAULT; + Spline2DContainer() = default; /// Disable all other constructors - Spline2DContainer(const Spline2DContainer&) CON_DELETE; + Spline2DContainer(const Spline2DContainer&) = delete; /// Destructor - ~Spline2DContainer() CON_DEFAULT; + ~Spline2DContainer() = default; /// _______________ Construction interface ________________________ diff --git a/GPU/TPCFastTransformation/SplineHelper.h b/GPU/TPCFastTransformation/SplineHelper.h index 06b1cd9412c0f..ab558f82eaa17 100644 --- a/GPU/TPCFastTransformation/SplineHelper.h +++ b/GPU/TPCFastTransformation/SplineHelper.h @@ -46,13 +46,13 @@ class SplineHelper SplineHelper(); /// Copy constructor: disabled - SplineHelper(const SplineHelper&) CON_DELETE; + SplineHelper(const SplineHelper&) = delete; /// Assignment operator: disabled - SplineHelper& operator=(const SplineHelper&) CON_DELETE; + SplineHelper& operator=(const SplineHelper&) = delete; /// Destructor - ~SplineHelper() CON_DEFAULT; + ~SplineHelper() = default; /// _______________ Main functionality ________________________ diff --git a/GPU/TPCFastTransformation/SplineSpec.h b/GPU/TPCFastTransformation/SplineSpec.h index f9c14b6374337..d4e64b8dad1f6 100644 --- a/GPU/TPCFastTransformation/SplineSpec.h +++ b/GPU/TPCFastTransformation/SplineSpec.h @@ -22,7 +22,7 @@ #include "GPUCommonDef.h" #include "SplineUtil.h" -#if !defined(__CINT__) && !defined(__ROOTCINT__) && !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L #include #include #endif @@ -58,13 +58,13 @@ class SplineContainer : public FlatObject /// _____________ C++ constructors / destructors __________________________ /// Default constructor - SplineContainer() CON_DEFAULT; + SplineContainer() = default; /// Disable all other constructors - SplineContainer(const SplineContainer&) CON_DELETE; + SplineContainer(const SplineContainer&) = delete; /// Destructor - ~SplineContainer() CON_DEFAULT; + ~SplineContainer() = default; /// _______________ Construction interface ________________________ diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h index 8fec1be5c459a..70ca6c99ed853 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h @@ -88,10 +88,10 @@ class TPCFastSpaceChargeCorrection : public FlatObject TPCFastSpaceChargeCorrection(); /// Copy constructor: disabled to avoid ambiguity. Use cloneTo[In/Ex]ternalBuffer() instead - TPCFastSpaceChargeCorrection(const TPCFastSpaceChargeCorrection&) CON_DELETE; + TPCFastSpaceChargeCorrection(const TPCFastSpaceChargeCorrection&) = delete; /// Assignment operator: disabled to avoid ambiguity. Use cloneTo[In/Ex]ternalBuffer() instead - TPCFastSpaceChargeCorrection& operator=(const TPCFastSpaceChargeCorrection&) CON_DELETE; + TPCFastSpaceChargeCorrection& operator=(const TPCFastSpaceChargeCorrection&) = delete; /// Destructor ~TPCFastSpaceChargeCorrection(); diff --git a/GPU/TPCFastTransformation/TPCFastTransform.h b/GPU/TPCFastTransformation/TPCFastTransform.h index 80c8a04f849c6..936a19d3f30fa 100644 --- a/GPU/TPCFastTransformation/TPCFastTransform.h +++ b/GPU/TPCFastTransformation/TPCFastTransform.h @@ -50,7 +50,7 @@ struct TPCSlowSpaceChargeCorrection { o2::tpc::SpaceCharge* mCorr{nullptr}; ///< reference space charge corrections #else - ~TPCSlowSpaceChargeCorrection() CON_DEFAULT; + ~TPCSlowSpaceChargeCorrection() = default; /// setting dummy corrections for GPU GPUd() void getCorrections(const float gx, const float gy, const float gz, const int32_t slice, float& gdxC, float& gdyC, float& gdzC) const @@ -104,10 +104,10 @@ class TPCFastTransform : public FlatObject TPCFastTransform(); /// Copy constructor: disabled to avoid ambiguity. Use cloneFromObject() instead - TPCFastTransform(const TPCFastTransform&) CON_DELETE; + TPCFastTransform(const TPCFastTransform&) = delete; /// Assignment operator: disabled to avoid ambiguity. Use cloneFromObject() instead - TPCFastTransform& operator=(const TPCFastTransform&) CON_DELETE; + TPCFastTransform& operator=(const TPCFastTransform&) = delete; inline void destroy() { @@ -122,7 +122,7 @@ class TPCFastTransform : public FlatObject delete mCorrectionSlow; } #else - ~TPCFastTransform() CON_DEFAULT; + ~TPCFastTransform() = default; #endif /// _____________ FlatObject functionality, see FlatObject class for description ____________ diff --git a/GPU/TPCFastTransformation/TPCFastTransformGeo.h b/GPU/TPCFastTransformation/TPCFastTransformGeo.h index ec1915dc4288d..a24dcbf1e80c2 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformGeo.h +++ b/GPU/TPCFastTransformation/TPCFastTransformGeo.h @@ -65,13 +65,13 @@ class TPCFastTransformGeo TPCFastTransformGeo(); /// Copy constructor: disabled to avoid ambiguity. Use cloneFromObject() instead - TPCFastTransformGeo(const TPCFastTransformGeo&) CON_DEFAULT; + TPCFastTransformGeo(const TPCFastTransformGeo&) = default; /// Assignment operator: disabled to avoid ambiguity. Use cloneFromObject() instead - TPCFastTransformGeo& operator=(const TPCFastTransformGeo&) CON_DEFAULT; + TPCFastTransformGeo& operator=(const TPCFastTransformGeo&) = default; /// Destructor - ~TPCFastTransformGeo() CON_DEFAULT; + ~TPCFastTransformGeo() = default; /// _____________ FlatObject functionality, see FlatObject class for description ____________ diff --git a/GPU/TPCFastTransformation/TPCFastTransformManager.h b/GPU/TPCFastTransformation/TPCFastTransformManager.h index d932c41ca8c18..14a85f1030bd8 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformManager.h +++ b/GPU/TPCFastTransformation/TPCFastTransformManager.h @@ -43,13 +43,13 @@ class TPCFastTransformManager TPCFastTransformManager(); /// Copy constructor: disabled - TPCFastTransformManager(const TPCFastTransformManager&) CON_DELETE; + TPCFastTransformManager(const TPCFastTransformManager&) = delete; /// Assignment operator: disabled - TPCFastTransformManager& operator=(const TPCFastTransformManager&) CON_DELETE; + TPCFastTransformManager& operator=(const TPCFastTransformManager&) = delete; /// Destructor - ~TPCFastTransformManager() CON_DEFAULT; + ~TPCFastTransformManager() = default; /// _______________ Main functionality ________________________ diff --git a/GPU/TPCFastTransformation/TPCFastTransformQA.h b/GPU/TPCFastTransformation/TPCFastTransformQA.h index f27f9be04efb9..6030ceedc5c94 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformQA.h +++ b/GPU/TPCFastTransformation/TPCFastTransformQA.h @@ -46,13 +46,13 @@ class TPCFastTransformQA TPCFastTransformQA(); /// Copy constructor: disabled - TPCFastTransformQA(const TPCFastTransformQA&) CON_DELETE; + TPCFastTransformQA(const TPCFastTransformQA&) = delete; /// Assignment operator: disabled - TPCFastTransformQA& operator=(const TPCFastTransformQA&) CON_DELETE; + TPCFastTransformQA& operator=(const TPCFastTransformQA&) = delete; /// Destructor - ~TPCFastTransformQA() CON_DEFAULT; + ~TPCFastTransformQA() = default; /// _______________ Main functionality ________________________ diff --git a/GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h b/GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h index 8fc2d6bfb88d7..acdd2d701bb86 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h +++ b/GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h @@ -12,7 +12,7 @@ /// \file TPCFastTransformationLinkDef_AliRoot.h /// \author Sergey Gorbunov -#if defined(__CINT__) || defined(__CLING__) +#if defined(__CLING__) #pragma link off all globals; #pragma link off all classes; diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h b/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h index e15aa4b701002..62229c2afe087 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h @@ -133,13 +133,13 @@ class IrregularSpline1D : public FlatObject IrregularSpline1D(); /// Copy constructor: disabled to avoid ambiguity. Use cloneFromObject instead - IrregularSpline1D(const IrregularSpline1D&) CON_DELETE; + IrregularSpline1D(const IrregularSpline1D&) = delete; /// Assignment operator: disabled to avoid ambiguity. Use cloneFromObject instead - IrregularSpline1D& operator=(const IrregularSpline1D&) CON_DELETE; + IrregularSpline1D& operator=(const IrregularSpline1D&) = delete; /// Destructor - ~IrregularSpline1D() CON_DEFAULT; + ~IrregularSpline1D() = default; /// _____________ FlatObject functionality, see FlatObject class for description ____________ diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h index 9135a991c8fef..99550cc12219f 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h @@ -21,7 +21,7 @@ #include "FlatObject.h" #include "GPUCommonDef.h" -#if !defined(__CINT__) && !defined(__ROOTCINT__) && !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L #include #include #endif @@ -70,13 +70,13 @@ class IrregularSpline2D3D : public FlatObject IrregularSpline2D3D(); /// Copy constructor: disabled to avoid ambiguity. Use cloneFromObject() instead - IrregularSpline2D3D(const IrregularSpline2D3D&) CON_DELETE; + IrregularSpline2D3D(const IrregularSpline2D3D&) = delete; /// Assignment operator: disabled to avoid ambiguity. Use cloneFromObject() instead - IrregularSpline2D3D& operator=(const IrregularSpline2D3D&) CON_DELETE; + IrregularSpline2D3D& operator=(const IrregularSpline2D3D&) = delete; /// Destructor - ~IrregularSpline2D3D() CON_DEFAULT; + ~IrregularSpline2D3D() = default; /// _____________ FlatObject functionality, see FlatObject class for description ____________ @@ -331,7 +331,7 @@ GPUdi() void IrregularSpline2D3D::getSplineVec(const float* correctedData, float // Same as getSpline, but using vectorized calculation. // \param correctedData should be at least 128-bit aligned -#if !defined(__CINT__) && !defined(__ROOTCINT__) && !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L const IrregularSpline1D& gridU = getGridU(); const IrregularSpline1D& gridV = getGridV(); int32_t nu = gridU.getNumberOfKnots(); diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h index 12696710e0a5b..4b3ba8f787a79 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h @@ -57,7 +57,7 @@ class IrregularSpline2D3DCalibrator IrregularSpline2D3DCalibrator(); /// Destructor - ~IrregularSpline2D3DCalibrator() CON_DEFAULT; + ~IrregularSpline2D3DCalibrator() = default; /// set size of the raster grid void setRasterSize(int32_t nKnotsU, int32_t nKnotsV); diff --git a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h index aa5acbe411dcd..b97903f7d5aac 100644 --- a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h +++ b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h @@ -37,10 +37,10 @@ class RegularSpline1D /// _____________ Constructors / destructors __________________________ /// Default constructor - RegularSpline1D() CON_DEFAULT; + RegularSpline1D() = default; /// Destructor - ~RegularSpline1D() CON_DEFAULT; + ~RegularSpline1D() = default; /// Constructor. Number of knots will be set to at least 5 void construct(int32_t numberOfKnots); diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h index 41128cf508fc2..4e14b69583d74 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h @@ -23,7 +23,7 @@ #include "RegularSpline1D.h" #include "FlatObject.h" -#if !defined(__CINT__) && !defined(__ROOTCINT__) && !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) #include #include #endif @@ -58,13 +58,13 @@ class SemiregularSpline2D3D : public FlatObject SemiregularSpline2D3D(); /// Copy constructor: disabled to avoid ambiguity. Use cloneFromObject() instead - SemiregularSpline2D3D(const SemiregularSpline2D3D&) CON_DELETE; + SemiregularSpline2D3D(const SemiregularSpline2D3D&) = delete; /// Assignment operator: disabled to avoid ambiguity. Use cloneFromObject() instead - SemiregularSpline2D3D& operator=(const SemiregularSpline2D3D&) CON_DELETE; + SemiregularSpline2D3D& operator=(const SemiregularSpline2D3D&) = delete; /// Destructor - ~SemiregularSpline2D3D() CON_DEFAULT; + ~SemiregularSpline2D3D() = default; /// _____________ FlatObject functionality, see FlatObject class for description ____________ @@ -400,7 +400,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa // Same as getSpline, but using vectorized calculation. // \param correctedData should be at least 128-bit aligned -#if !defined(__CINT__) && !defined(__ROOTCINT__) && !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) //&& !defined(__CLING__) /* Idea: There are 16 knots important for (u, v). diff --git a/GPU/Utils/FlatObject.h b/GPU/Utils/FlatObject.h index d9b3ca8370813..99fcdab8a6cae 100644 --- a/GPU/Utils/FlatObject.h +++ b/GPU/Utils/FlatObject.h @@ -179,12 +179,12 @@ class FlatObject /// Default constructor / destructor #ifndef GPUCA_GPUCODE - FlatObject() CON_DEFAULT; // No object derrived from FlatObject should be created on the GPU + FlatObject() = default; // No object derrived from FlatObject should be created on the GPU ~FlatObject(); - FlatObject(const FlatObject&) CON_DELETE; - FlatObject& operator=(const FlatObject&) CON_DELETE; + FlatObject(const FlatObject&) = delete; + FlatObject& operator=(const FlatObject&) = delete; #else - FlatObject() CON_DELETE; + FlatObject() = delete; #endif protected: diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index c9420de2b704b..c5d53d6359ada 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -12,18 +12,14 @@ if(NOT DEFINED ENABLE_CUDA) set(ENABLE_CUDA "AUTO") endif() -if(NOT DEFINED ENABLE_OPENCL1) - set(ENABLE_OPENCL1 "AUTO") -endif() -if(NOT DEFINED ENABLE_OPENCL2) - set(ENABLE_OPENCL2 "AUTO") +if(NOT DEFINED ENABLE_OPENCL) + set(ENABLE_OPENCL "AUTO") endif() if(NOT DEFINED ENABLE_HIP) set(ENABLE_HIP "AUTO") endif() string(TOUPPER "${ENABLE_CUDA}" ENABLE_CUDA) -string(TOUPPER "${ENABLE_OPENCL1}" ENABLE_OPENCL1) -string(TOUPPER "${ENABLE_OPENCL2}" ENABLE_OPENCL2) +string(TOUPPER "${ENABLE_OPENCL}" ENABLE_OPENCL) string(TOUPPER "${ENABLE_HIP}" ENABLE_HIP) if(NOT DEFINED CMAKE_BUILD_TYPE_UPPER) string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_UPPER) @@ -151,35 +147,17 @@ if(ENABLE_CUDA) endif() # Detect and enable OpenCL 1.2 from AMD -if(ENABLE_OPENCL1 OR ENABLE_OPENCL2) +if(ENABLE_OPENCL) find_package(OpenCL) - if((ENABLE_OPENCL1 AND NOT ENABLE_OPENCL1 STREQUAL "AUTO") - OR (ENABLE_OPENCL2 AND NOT ENABLE_OPENCL2 STREQUAL "AUTO")) + if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO") set_package_properties(OpenCL PROPERTIES TYPE REQUIRED) else() set_package_properties(OpenCL PROPERTIES TYPE OPTIONAL) endif() endif() -if(ENABLE_OPENCL1) - if(NOT AMDAPPSDKROOT) - set(AMDAPPSDKROOT "$ENV{AMDAPPSDKROOT}") - endif() - - if(OpenCL_FOUND - AND OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 1.2 - AND AMDAPPSDKROOT - AND EXISTS "${AMDAPPSDKROOT}") - set(OPENCL1_ENABLED ON) - message(STATUS "Found AMD OpenCL 1.2") - elseif(NOT ENABLE_OPENCL1 STREQUAL "AUTO") - message(FATAL_ERROR "AMD OpenCL 1.2 not available") - else() - set(OPENCL1_ENABLED OFF) - endif() -endif() # Detect and enable OpenCL 2.x -if(ENABLE_OPENCL2) +if(ENABLE_OPENCL) find_package(OpenCL) find_package(LLVM) if(LLVM_FOUND) @@ -198,23 +176,23 @@ if(ENABLE_OPENCL2) AND LLVM_FOUND AND NOT LLVM_CLANG STREQUAL "LLVM_CLANG-NOTFOUND" AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0) - set(OPENCL2_COMPATIBLE_CLANG_FOUND ON) + set(OPENCL_COMPATIBLE_CLANG_FOUND ON) endif() if(OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2 AND NOT LLVM_SPIRV STREQUAL "LLVM_SPIRV-NOTFOUND" - AND OPENCL2_COMPATIBLE_CLANG_FOUND) - set(OPENCL2_ENABLED_SPIRV ON) + AND OPENCL_COMPATIBLE_CLANG_FOUND) + set(OPENCL_ENABLED_SPIRV ON) message(STATUS "Using CLANG ${LLVM_CLANG} and ${LLVM_SPIRV} for SPIR-V compilation") endif () - if(OPENCL2_COMPATIBLE_CLANG_FOUND AND + if(OPENCL_COMPATIBLE_CLANG_FOUND AND (OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2 - OR OPENCL2_ENABLED_SPIRV)) - set(OPENCL2_ENABLED ON) - message(STATUS "Found OpenCL 2 (${OpenCL_VERSION_STRING} SPIR-V ${OPENCL2_ENABLED_SPIRV} with CLANG ${LLVM_PACKAGE_VERSION})") - elseif(NOT ENABLE_OPENCL2 STREQUAL "AUTO") + OR OPENCL_ENABLED_SPIRV)) + set(OPENCL_ENABLED ON) + message(STATUS "Found OpenCL 2 (${OpenCL_VERSION_STRING} SPIR-V ${OPENCL_ENABLED_SPIRV} with CLANG ${LLVM_PACKAGE_VERSION})") + elseif(NOT ENABLE_OPENCL STREQUAL "AUTO") message(FATAL_ERROR "OpenCL 2.x not available") else() - set(OPENCL2_ENABLED OFF) + set(OPENCL_ENABLED OFF) endif() endif()