diff --git a/source/lib/src/cuda/coord.cu b/source/lib/src/cuda/coord.cu index 4f7714ca50..317a8d42c9 100644 --- a/source/lib/src/cuda/coord.cu +++ b/source/lib/src/cuda/coord.cu @@ -286,8 +286,6 @@ void compute_int_data( const int * nat_end=cell_info+3; const int * ext_stt=cell_info+6; const int * ext_end=cell_info+9; - const int * ngcell=cell_info+12; - const FPTYPE * boxt = region.boxt; const FPTYPE * rec_boxt = region.rec_boxt; const int nblock_loc=(nloc+TPB-1)/TPB; diff --git a/source/lib/tests/test_coord.cc b/source/lib/tests/test_coord.cc index e3b0552b1d..8dee80745e 100644 --- a/source/lib/tests/test_coord.cc +++ b/source/lib/tests/test_coord.cc @@ -473,3 +473,276 @@ TEST_F(TestCopyCoord, gpu_lessmem) // << nall << std::endl; } #endif //GOOGLE_CUDA + +class TestCopyCoordMoreCell : public ::testing::Test +{ +protected: + std::vector posi = { + 0.041, 0.072, 0.100, + 4.053, 0.041, 0.068, + 1.130, 0.014, 0.109, + 0.018, 1.134, 0.139, + 5.120, 0.087, 0.002, + 4.067, 1.141, 0.055 + }; + std::vector atype = {0, 0, 1, 1, 1, 1}; + std::vector _expected_posi_cpy = {0.041, 0.072, 0.1, 4.053, 0.041, 0.068, 1.13, 0.014, 0.109, 0.018, 1.134, 0.139, 5.12, 0.087, 0.002, 4.067, 1.141, 0.055, 0.018, 1.134, 20.139, 0.018, 21.134, 0.139, 0.018, 21.134, 20.139, 0.041, 0.072, 20.1, 0.041, 20.072, 0.1, 0.041, 20.072, 20.1, 1.13, 0.014, 20.109, 1.13, 20.014, 0.109, 1.13, 20.014, 20.109, 4.053, 0.041, 20.068, + 4.053, 20.041, 0.068, 4.053, 20.041, 20.068, 4.067, 1.141, 20.055, 4.067, 21.141, 0.055, 4.067, 21.141, 20.055, 5.12, 0.087, 20.002, 5.12, 20.087, 0.002, 5.12, 20.087, 20.002, 20.018, 1.134, 0.139, 20.018, 1.134, 20.139, 20.018, 21.134, 0.139, 20.018, 21.134, 20.139, 20.041, 0.072, 0.1, 20.041, 0.072, 20.1, 20.041, 20.072, 0.1, 20.041, 20.072, 20.1, + 21.13, 0.014, 0.109, 21.13, 0.014, 20.109, 21.13, 20.014, 0.109, 21.13, 20.014, 20.109, 24.053, 0.041, 0.068, 24.053, 0.041, 20.068, 24.053, 20.041, 0.068, 24.053, 20.041, 20.068, 24.067,1.141, 0.055, 24.067, 1.141, 20.055, 24.067, 21.141, 0.055, 24.067, 21.141, 20.055, 25.12, 0.087, 0.002, 25.12, 0.087, 20.002, 25.12, 20.087, 0.002, 25.12, 20.087, 20.002, + }; + std::vector expected_posi_cpy; + std::vector _expected_atype_cpy = { + 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + }; + std::vector expected_atype_cpy; + std::vector _expected_mapping = { + 0, 1, 2, 3, 4, 5, 3, 3, 3, 0, 0, 0, 2, 2, 2, 1, 1, 1, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 5, 5, 5, 5, 4, 4, 4, 4, + }; + std::vector expected_mapping; + int ntypes = 2; + int nloc, expected_nall; + double rc = 4; + std::vector boxt = {20., 0., 0., 0., 20., 0., 0., 0., 20.}; + std::vector ncell, ngcell; + + void SetUp() override { + nloc = posi.size() / 3; + expected_nall = _expected_posi_cpy.size() / 3; + EXPECT_EQ(expected_nall, _expected_atype_cpy.size()); + EXPECT_EQ(expected_nall, _expected_mapping.size()); + // sort the atoms between nloc and nall, to remove the uncertainty of the ordering + sort_atoms( + expected_posi_cpy, + expected_atype_cpy, + expected_mapping, + _expected_posi_cpy, + _expected_atype_cpy, + _expected_mapping, + nloc, + expected_nall); + } +}; + +TEST_F(TestCopyCoordMoreCell, cpu) +{ + int mem_size = 1000; + std::vector out_c(mem_size * 3); + std::vector out_t(mem_size); + std::vector mapping(mem_size); + int nall; + deepmd::Region region; + init_region_cpu(region, &boxt[0]); + + int ret = copy_coord_cpu( + &out_c[0], + &out_t[0], + &mapping[0], + &nall, + &posi[0], + &atype[0], + nloc, + mem_size, + rc, + region); + EXPECT_EQ(ret, 0); + EXPECT_EQ(nall, expected_nall); + // std::cout << "---------------------" + // << nloc << " " + // << nall << std::endl; + + out_c.resize(nall*3); + out_t.resize(nall); + mapping.resize(nall); + + std::vector out_c_1(mem_size * 3); + std::vector out_t_1(mem_size); + std::vector mapping_1(mem_size); + sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall); + for(int ii = 0; ii < expected_nall; ++ii){ + for(int dd = 0; dd < 3; ++dd){ + EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12); + } + EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]); + EXPECT_EQ(mapping_1[ii], expected_mapping[ii]); + } +} + +TEST_F(TestCopyCoordMoreCell, cpu_lessmem) +{ + int mem_size = 40; + std::vector out_c(mem_size * 3); + std::vector out_t(mem_size); + std::vector mapping(mem_size); + int nall; + deepmd::Region region; + init_region_cpu(region, &boxt[0]); + + int ret = copy_coord_cpu( + &out_c[0], + &out_t[0], + &mapping[0], + &nall, + &posi[0], + &atype[0], + nloc, + mem_size, + rc, + region); + EXPECT_EQ(ret, 1); + // EXPECT_EQ(nall, expected_nall); + // std::cout << "---------------------" + // << nloc << " " + // << nall << std::endl; +} + +#if GOOGLE_CUDA +TEST_F(TestCopyCoordMoreCell, gpu) +{ + int mem_size = 1000; + std::vector out_c(mem_size * 3); + std::vector out_t(mem_size); + std::vector mapping(mem_size); + int nall; + std::vector cell_info; + cell_info.resize(23); + deepmd::Region region; + deepmd::Region region_dev; + double * new_boxt = region_dev.boxt; + double * new_rec_boxt = region_dev.rec_boxt; + init_region_cpu(region, &boxt[0]); + deepmd::compute_cell_info(&cell_info[0], rc, region); + std::vector box_info; + box_info.resize(18); + memcpy(&box_info[0], &boxt[0], sizeof(double)*9); + memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9); + const int loc_cellnum=cell_info[21]; + const int total_cellnum=cell_info[22]; + int * cell_info_dev=NULL; + double * box_info_dev=NULL; + double * out_c_dev=NULL, * in_c_dev=NULL; + int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL; + deepmd::malloc_device_memory_sync(cell_info_dev, cell_info); + deepmd::malloc_device_memory_sync(box_info_dev, box_info); + deepmd::malloc_device_memory_sync(in_c_dev, posi); + deepmd::malloc_device_memory_sync(in_t_dev, atype); + deepmd::malloc_device_memory(out_c_dev, mem_size * 3); + deepmd::malloc_device_memory(out_t_dev, mem_size); + deepmd::malloc_device_memory(mapping_dev, mem_size); + deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc); + region_dev.boxt = box_info_dev; + region_dev.rec_boxt = box_info_dev + 9; + int ret = deepmd::copy_coord_gpu( + out_c_dev, + out_t_dev, + mapping_dev, + &nall, + int_data_dev, + in_c_dev, + in_t_dev, + nloc, + mem_size, + loc_cellnum, + total_cellnum, + cell_info_dev, + region_dev); + region_dev.boxt = new_boxt; + region_dev.rec_boxt = new_rec_boxt; + deepmd::memcpy_device_to_host(out_c_dev, out_c); + deepmd::memcpy_device_to_host(out_t_dev, out_t); + deepmd::memcpy_device_to_host(mapping_dev, mapping); + deepmd::delete_device_memory(cell_info_dev); + deepmd::delete_device_memory(box_info_dev); + deepmd::delete_device_memory(in_c_dev); + deepmd::delete_device_memory(in_t_dev); + deepmd::delete_device_memory(out_c_dev); + deepmd::delete_device_memory(out_t_dev); + deepmd::delete_device_memory(mapping_dev); + deepmd::delete_device_memory(int_data_dev); + EXPECT_EQ(ret, 0); + EXPECT_EQ(nall, expected_nall); + out_c.resize(nall*3); + out_t.resize(nall); + mapping.resize(nall); + + std::vector out_c_1(mem_size * 3); + std::vector out_t_1(mem_size); + std::vector mapping_1(mem_size); + sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall); + for(int ii = 0; ii < expected_nall; ++ii){ + for(int dd = 0; dd < 3; ++dd){ + EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12); + } + EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]); + EXPECT_EQ(mapping_1[ii], expected_mapping[ii]); + } +} + +TEST_F(TestCopyCoordMoreCell, gpu_lessmem) +{ + int mem_size = 40; + std::vector out_c(mem_size * 3); + std::vector out_t(mem_size); + std::vector mapping(mem_size); + int nall; + std::vector cell_info; + cell_info.resize(23); + deepmd::Region region; + deepmd::Region region_dev; + double * new_boxt = region_dev.boxt; + double * new_rec_boxt = region_dev.rec_boxt; + init_region_cpu(region, &boxt[0]); + deepmd::compute_cell_info(&cell_info[0], rc, region); + std::vector box_info; + box_info.resize(18); + memcpy(&box_info[0], &boxt[0], sizeof(double)*9); + memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9); + const int loc_cellnum=cell_info[21]; + const int total_cellnum=cell_info[22]; + int * cell_info_dev=NULL; + double * box_info_dev=NULL; + double * out_c_dev=NULL, * in_c_dev=NULL; + int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL; + deepmd::malloc_device_memory_sync(cell_info_dev, cell_info); + deepmd::malloc_device_memory_sync(box_info_dev, box_info); + deepmd::malloc_device_memory_sync(in_c_dev, posi); + deepmd::malloc_device_memory_sync(in_t_dev, atype); + deepmd::malloc_device_memory(out_c_dev, mem_size * 3); + deepmd::malloc_device_memory(out_t_dev, mem_size); + deepmd::malloc_device_memory(mapping_dev, mem_size); + deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc); + region_dev.boxt = box_info_dev; + region_dev.rec_boxt = box_info_dev + 9; + int ret = deepmd::copy_coord_gpu( + out_c_dev, + out_t_dev, + mapping_dev, + &nall, + int_data_dev, + in_c_dev, + in_t_dev, + nloc, + mem_size, + loc_cellnum, + total_cellnum, + cell_info_dev, + region_dev); + region_dev.boxt = new_boxt; + region_dev.rec_boxt = new_rec_boxt; + deepmd::memcpy_device_to_host(out_c_dev, out_c); + deepmd::memcpy_device_to_host(out_t_dev, out_t); + deepmd::memcpy_device_to_host(mapping_dev, mapping); + deepmd::delete_device_memory(cell_info_dev); + deepmd::delete_device_memory(box_info_dev); + deepmd::delete_device_memory(in_c_dev); + deepmd::delete_device_memory(in_t_dev); + deepmd::delete_device_memory(out_c_dev); + deepmd::delete_device_memory(out_t_dev); + deepmd::delete_device_memory(mapping_dev); + deepmd::delete_device_memory(int_data_dev); + EXPECT_EQ(ret, 1); + // EXPECT_EQ(nall, expected_nall); + // std::cout << "---------------------" + // << nloc << " " + // << nall << std::endl; +} +#endif //GOOGLE_CUDA \ No newline at end of file