diff --git a/source/lib/src/NNPInter.cc b/source/lib/src/NNPInter.cc index 58042019c7..f4f39945ff 100644 --- a/source/lib/src/NNPInter.cc +++ b/source/lib/src/NNPInter.cc @@ -3,7 +3,7 @@ #include "SimulationRegion.h" #include -#define MAGIC_NUMBER 256 +#define MAGIC_NUMBER 1024 #ifdef USE_CUDA_TOOLKIT #include "cuda_runtime.h" diff --git a/source/op/cuda/descrpt_se_a.cu b/source/op/cuda/descrpt_se_a.cu index 39434306f2..4b309522fa 100644 --- a/source/op/cuda/descrpt_se_a.cu +++ b/source/op/cuda/descrpt_se_a.cu @@ -18,7 +18,7 @@ limitations under the License. #include #include -#define MAGIC_NUMBER 256 +#define MAGIC_NUMBER 1024 #ifdef HIGH_PREC typedef double VALUETYPE; @@ -326,7 +326,7 @@ void DescrptSeALauncher(const VALUETYPE* coord, i_idx ); const int ITEMS_PER_THREAD = 4; - const int BLOCK_THREADS = 64; + const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD; // BlockSortKernel<<>> ( BlockSortKernel <<>> (key, key + nloc * MAGIC_NUMBER); diff --git a/source/op/descrpt_se_a_gpu.cc b/source/op/descrpt_se_a_gpu.cc index 93c83016fb..70dd9c7751 100644 --- a/source/op/descrpt_se_a_gpu.cc +++ b/source/op/descrpt_se_a_gpu.cc @@ -7,7 +7,6 @@ #include "tensorflow/core/framework/shape_inference.h" using namespace tensorflow; // NOLINT(build/namespaces) -#define MAGIC_NUMBER 256 #ifdef HIGH_PREC typedef double VALUETYPE ; @@ -159,7 +158,8 @@ class DescrptSeAOp : public OpKernel { OP_REQUIRES (context, (ntypes == int(sel_a.size())), errors::InvalidArgument ("number of types should match the length of sel array")); OP_REQUIRES (context, (ntypes == int(sel_r.size())), errors::InvalidArgument ("number of types should match the length of sel array")); - + OP_REQUIRES (context, (nnei <= 1024), errors::InvalidArgument ("Assert failed, max neighbor size of atom(nnei) " + std::to_string(nnei) + " is larger than 1024!, which currently is not supported by deepmd-kit.")); + // Create output tensors TensorShape descrpt_shape ; descrpt_shape.AddDim (nsamples); @@ -201,7 +201,6 @@ class DescrptSeAOp : public OpKernel { cudaErrcheck(cudaMemcpy(&(array_longlong), 20 + mesh_tensor.flat().data(), sizeof(unsigned long long *), cudaMemcpyDeviceToHost)); cudaErrcheck(cudaMemcpy(&(array_double), 24 + mesh_tensor.flat().data(), sizeof(compute_t *), cudaMemcpyDeviceToHost)); - // cudaErrcheck(cudaMemcpy(jlist, host_jlist, sizeof(int) * nloc * MAGIC_NUMBER, cudaMemcpyHostToDevice)); // Launch computation for (int II = 0; II < nsamples; II++) { DescrptSeALauncher(coord_tensor.matrix().data() + II * (nall * 3), // related to the kk argument diff --git a/source/train/CMakeLists.txt b/source/train/CMakeLists.txt index 8be6b6c819..1875d2097c 100644 --- a/source/train/CMakeLists.txt +++ b/source/train/CMakeLists.txt @@ -2,7 +2,7 @@ configure_file("RunOptions.py.in" "${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py" @ONLY) -file(GLOB LIB_PY main.py common.py env.py compat.py calculator.py Network.py Deep*.py Data.py DataSystem.py Model*.py Descrpt*.py Fitting.py Loss.py LearningRate.py Trainer.py TabInter.py EwaldRecp.py DataModifier.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py) +file(GLOB LIB_PY main.py common.py env.py compat.py calculator.py Network.py Deep*.py Data.py DataSystem.py Model*.py Descrpt*.py Fitting.py Loss.py LearningRate.py Trainer.py TabInter.py EwaldRecp.py DataModifier.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py transform.py) file(GLOB CLS_PY Local.py Slurm.py)