diff --git a/source/lib/tests/test_coord.cc b/source/lib/tests/test_coord.cc index 581301b6a7..af320ca3f7 100644 --- a/source/lib/tests/test_coord.cc +++ b/source/lib/tests/test_coord.cc @@ -59,7 +59,7 @@ TEST_F(TestNormCoord, cpu_case2) { } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestNormCoord, gpu_case0) { deepmd::Region region; deepmd::Region region_dev; @@ -144,94 +144,7 @@ TEST_F(TestNormCoord, gpu_case2) { } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestNormCoord, gpu_case0) { - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - init_region_cpu(region, &boxt[0]); - std::vector box_info; - box_info.resize(18); - memcpy(&box_info[0], &boxt[0], sizeof(double) * 9); - memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9); - double* box_info_dev = NULL; - double* out_c_dev = NULL; - std::vector out_c(r0); - deepmd::malloc_device_memory_sync(box_info_dev, box_info); - deepmd::malloc_device_memory_sync(out_c_dev, out_c); - region_dev.boxt = box_info_dev; - region_dev.rec_boxt = box_info_dev + 9; - deepmd::normalize_coord_gpu(out_c_dev, natoms, region_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; - deepmd::memcpy_device_to_host(out_c_dev, out_c); - deepmd::delete_device_memory(box_info_dev); - deepmd::delete_device_memory(out_c_dev); - for (int ii = 0; ii < posi.size(); ++ii) { - EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12); - } -} - -TEST_F(TestNormCoord, gpu_case1) { - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - init_region_cpu(region, &boxt[0]); - std::vector box_info; - box_info.resize(18); - memcpy(&box_info[0], &boxt[0], sizeof(double) * 9); - memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9); - double* box_info_dev = NULL; - double* out_c_dev = NULL; - std::vector out_c(r1); - deepmd::malloc_device_memory_sync(box_info_dev, box_info); - deepmd::malloc_device_memory_sync(out_c_dev, out_c); - region_dev.boxt = box_info_dev; - region_dev.rec_boxt = box_info_dev + 9; - deepmd::normalize_coord_gpu(out_c_dev, natoms, region_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; - deepmd::memcpy_device_to_host(out_c_dev, out_c); - deepmd::delete_device_memory(box_info_dev); - deepmd::delete_device_memory(out_c_dev); - for (int ii = 0; ii < posi.size(); ++ii) { - EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12); - } -} - -TEST_F(TestNormCoord, gpu_case2) { - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - init_region_cpu(region, &boxt[0]); - std::vector box_info; - box_info.resize(18); - memcpy(&box_info[0], &boxt[0], sizeof(double) * 9); - memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9); - double* box_info_dev = NULL; - double* out_c_dev = NULL; - std::vector out_c(r2); - deepmd::malloc_device_memory_sync(box_info_dev, box_info); - deepmd::malloc_device_memory_sync(out_c_dev, out_c); - region_dev.boxt = box_info_dev; - region_dev.rec_boxt = box_info_dev + 9; - deepmd::normalize_coord_gpu(out_c_dev, natoms, region_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; - deepmd::memcpy_device_to_host(out_c_dev, out_c); - deepmd::delete_device_memory(box_info_dev); - deepmd::delete_device_memory(out_c_dev); - for (int ii = 0; ii < posi.size(); ++ii) { - EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12); - } -} - -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM typedef std::pair, std::vector> atom; @@ -375,7 +288,7 @@ TEST_F(TestCopyCoord, cpu_lessmem) { // << nall << std::endl; } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestCopyCoord, gpu) { int mem_size = 1000; std::vector out_c(mem_size * 3); @@ -514,145 +427,6 @@ TEST_F(TestCopyCoord, gpu_lessmem) { } #endif // GOOGLE_CUDA -#if TENSORFLOW_USE_ROCM -TEST_F(TestCopyCoord, gpu) { - int mem_size = 1000; - std::vector out_c(mem_size * 3); - std::vector out_t(mem_size); - std::vector mapping(mem_size); - int nall; - std::vector cell_info; - cell_info.resize(23); - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - init_region_cpu(region, &boxt[0]); - deepmd::compute_cell_info(&cell_info[0], rc, region); - std::vector box_info; - box_info.resize(18); - memcpy(&box_info[0], &boxt[0], sizeof(double) * 9); - memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9); - const int loc_cellnum = cell_info[21]; - const int total_cellnum = cell_info[22]; - int* cell_info_dev = NULL; - double* box_info_dev = NULL; - double *out_c_dev = NULL, *in_c_dev = NULL; - int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL, - *int_data_dev = NULL; - deepmd::malloc_device_memory_sync(cell_info_dev, cell_info); - deepmd::malloc_device_memory_sync(box_info_dev, box_info); - deepmd::malloc_device_memory_sync(in_c_dev, posi); - deepmd::malloc_device_memory_sync(in_t_dev, atype); - deepmd::malloc_device_memory(out_c_dev, mem_size * 3); - deepmd::malloc_device_memory(out_t_dev, mem_size); - deepmd::malloc_device_memory(mapping_dev, mem_size); - deepmd::malloc_device_memory( - int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 + - total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + - 1 + nloc); - region_dev.boxt = box_info_dev; - region_dev.rec_boxt = box_info_dev + 9; - int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall, - int_data_dev, in_c_dev, in_t_dev, nloc, - mem_size, loc_cellnum, total_cellnum, - cell_info_dev, region_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; - deepmd::memcpy_device_to_host(out_c_dev, out_c); - deepmd::memcpy_device_to_host(out_t_dev, out_t); - deepmd::memcpy_device_to_host(mapping_dev, mapping); - deepmd::delete_device_memory(cell_info_dev); - deepmd::delete_device_memory(box_info_dev); - deepmd::delete_device_memory(in_c_dev); - deepmd::delete_device_memory(in_t_dev); - deepmd::delete_device_memory(out_c_dev); - deepmd::delete_device_memory(out_t_dev); - deepmd::delete_device_memory(mapping_dev); - deepmd::delete_device_memory(int_data_dev); - EXPECT_EQ(ret, 0); - EXPECT_EQ(nall, expected_nall); - out_c.resize(nall * 3); - out_t.resize(nall); - mapping.resize(nall); - - std::vector out_c_1(mem_size * 3); - std::vector out_t_1(mem_size); - std::vector mapping_1(mem_size); - sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall); - for (int ii = 0; ii < expected_nall; ++ii) { - for (int dd = 0; dd < 3; ++dd) { - EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]), - 1e-12); - } - EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]); - EXPECT_EQ(mapping_1[ii], expected_mapping[ii]); - } -} - -TEST_F(TestCopyCoord, gpu_lessmem) { - int mem_size = 40; - std::vector out_c(mem_size * 3); - std::vector out_t(mem_size); - std::vector mapping(mem_size); - int nall; - std::vector cell_info; - cell_info.resize(23); - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - init_region_cpu(region, &boxt[0]); - deepmd::compute_cell_info(&cell_info[0], rc, region); - std::vector box_info; - box_info.resize(18); - memcpy(&box_info[0], &boxt[0], sizeof(double) * 9); - memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9); - const int loc_cellnum = cell_info[21]; - const int total_cellnum = cell_info[22]; - int* cell_info_dev = NULL; - double* box_info_dev = NULL; - double *out_c_dev = NULL, *in_c_dev = NULL; - int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL, - *int_data_dev = NULL; - deepmd::malloc_device_memory_sync(cell_info_dev, cell_info); - deepmd::malloc_device_memory_sync(box_info_dev, box_info); - deepmd::malloc_device_memory_sync(in_c_dev, posi); - deepmd::malloc_device_memory_sync(in_t_dev, atype); - deepmd::malloc_device_memory(out_c_dev, mem_size * 3); - deepmd::malloc_device_memory(out_t_dev, mem_size); - deepmd::malloc_device_memory(mapping_dev, mem_size); - deepmd::malloc_device_memory( - int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 + - total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + - 1 + nloc); - region_dev.boxt = box_info_dev; - region_dev.rec_boxt = box_info_dev + 9; - int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall, - int_data_dev, in_c_dev, in_t_dev, nloc, - mem_size, loc_cellnum, total_cellnum, - cell_info_dev, region_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; - deepmd::memcpy_device_to_host(out_c_dev, out_c); - deepmd::memcpy_device_to_host(out_t_dev, out_t); - deepmd::memcpy_device_to_host(mapping_dev, mapping); - deepmd::delete_device_memory(cell_info_dev); - deepmd::delete_device_memory(box_info_dev); - deepmd::delete_device_memory(in_c_dev); - deepmd::delete_device_memory(in_t_dev); - deepmd::delete_device_memory(out_c_dev); - deepmd::delete_device_memory(out_t_dev); - deepmd::delete_device_memory(mapping_dev); - deepmd::delete_device_memory(int_data_dev); - EXPECT_EQ(ret, 1); - // EXPECT_EQ(nall, expected_nall); - // std::cout << "---------------------" - // << nloc << " " - // << nall << std::endl; -} -#endif // TENSORFLOW_USE_ROCM - class TestCopyCoordMoreCell : public ::testing::Test { protected: std::vector posi = {0.041, 0.072, 0.100, 4.053, 0.041, 0.068, @@ -760,146 +534,7 @@ TEST_F(TestCopyCoordMoreCell, cpu_lessmem) { // << nall << std::endl; } -#if GOOGLE_CUDA -TEST_F(TestCopyCoordMoreCell, gpu) { - int mem_size = 1000; - std::vector out_c(mem_size * 3); - std::vector out_t(mem_size); - std::vector mapping(mem_size); - int nall; - std::vector cell_info; - cell_info.resize(23); - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - init_region_cpu(region, &boxt[0]); - deepmd::compute_cell_info(&cell_info[0], rc, region); - std::vector box_info; - box_info.resize(18); - memcpy(&box_info[0], &boxt[0], sizeof(double) * 9); - memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9); - const int loc_cellnum = cell_info[21]; - const int total_cellnum = cell_info[22]; - int* cell_info_dev = NULL; - double* box_info_dev = NULL; - double *out_c_dev = NULL, *in_c_dev = NULL; - int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL, - *int_data_dev = NULL; - deepmd::malloc_device_memory_sync(cell_info_dev, cell_info); - deepmd::malloc_device_memory_sync(box_info_dev, box_info); - deepmd::malloc_device_memory_sync(in_c_dev, posi); - deepmd::malloc_device_memory_sync(in_t_dev, atype); - deepmd::malloc_device_memory(out_c_dev, mem_size * 3); - deepmd::malloc_device_memory(out_t_dev, mem_size); - deepmd::malloc_device_memory(mapping_dev, mem_size); - deepmd::malloc_device_memory( - int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 + - total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + - 1 + nloc); - region_dev.boxt = box_info_dev; - region_dev.rec_boxt = box_info_dev + 9; - int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall, - int_data_dev, in_c_dev, in_t_dev, nloc, - mem_size, loc_cellnum, total_cellnum, - cell_info_dev, region_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; - deepmd::memcpy_device_to_host(out_c_dev, out_c); - deepmd::memcpy_device_to_host(out_t_dev, out_t); - deepmd::memcpy_device_to_host(mapping_dev, mapping); - deepmd::delete_device_memory(cell_info_dev); - deepmd::delete_device_memory(box_info_dev); - deepmd::delete_device_memory(in_c_dev); - deepmd::delete_device_memory(in_t_dev); - deepmd::delete_device_memory(out_c_dev); - deepmd::delete_device_memory(out_t_dev); - deepmd::delete_device_memory(mapping_dev); - deepmd::delete_device_memory(int_data_dev); - EXPECT_EQ(ret, 0); - EXPECT_EQ(nall, expected_nall); - out_c.resize(nall * 3); - out_t.resize(nall); - mapping.resize(nall); - - std::vector out_c_1(mem_size * 3); - std::vector out_t_1(mem_size); - std::vector mapping_1(mem_size); - sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall); - for (int ii = 0; ii < expected_nall; ++ii) { - for (int dd = 0; dd < 3; ++dd) { - EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]), - 1e-12); - } - EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]); - EXPECT_EQ(mapping_1[ii], expected_mapping[ii]); - } -} - -TEST_F(TestCopyCoordMoreCell, gpu_lessmem) { - int mem_size = 40; - std::vector out_c(mem_size * 3); - std::vector out_t(mem_size); - std::vector mapping(mem_size); - int nall; - std::vector cell_info; - cell_info.resize(23); - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - init_region_cpu(region, &boxt[0]); - deepmd::compute_cell_info(&cell_info[0], rc, region); - std::vector box_info; - box_info.resize(18); - memcpy(&box_info[0], &boxt[0], sizeof(double) * 9); - memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9); - const int loc_cellnum = cell_info[21]; - const int total_cellnum = cell_info[22]; - int* cell_info_dev = NULL; - double* box_info_dev = NULL; - double *out_c_dev = NULL, *in_c_dev = NULL; - int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL, - *int_data_dev = NULL; - deepmd::malloc_device_memory_sync(cell_info_dev, cell_info); - deepmd::malloc_device_memory_sync(box_info_dev, box_info); - deepmd::malloc_device_memory_sync(in_c_dev, posi); - deepmd::malloc_device_memory_sync(in_t_dev, atype); - deepmd::malloc_device_memory(out_c_dev, mem_size * 3); - deepmd::malloc_device_memory(out_t_dev, mem_size); - deepmd::malloc_device_memory(mapping_dev, mem_size); - deepmd::malloc_device_memory( - int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 + - total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + - 1 + nloc); - region_dev.boxt = box_info_dev; - region_dev.rec_boxt = box_info_dev + 9; - int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall, - int_data_dev, in_c_dev, in_t_dev, nloc, - mem_size, loc_cellnum, total_cellnum, - cell_info_dev, region_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; - deepmd::memcpy_device_to_host(out_c_dev, out_c); - deepmd::memcpy_device_to_host(out_t_dev, out_t); - deepmd::memcpy_device_to_host(mapping_dev, mapping); - deepmd::delete_device_memory(cell_info_dev); - deepmd::delete_device_memory(box_info_dev); - deepmd::delete_device_memory(in_c_dev); - deepmd::delete_device_memory(in_t_dev); - deepmd::delete_device_memory(out_c_dev); - deepmd::delete_device_memory(out_t_dev); - deepmd::delete_device_memory(mapping_dev); - deepmd::delete_device_memory(int_data_dev); - EXPECT_EQ(ret, 1); - // EXPECT_EQ(nall, expected_nall); - // std::cout << "---------------------" - // << nloc << " " - // << nall << std::endl; -} -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestCopyCoordMoreCell, gpu) { int mem_size = 1000; std::vector out_c(mem_size * 3); @@ -1036,4 +671,4 @@ TEST_F(TestCopyCoordMoreCell, gpu_lessmem) { // << nloc << " " // << nall << std::endl; } -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_env_mat_a.cc b/source/lib/tests/test_env_mat_a.cc index 639f99414d..89756c9fc5 100644 --- a/source/lib/tests/test_env_mat_a.cc +++ b/source/lib/tests/test_env_mat_a.cc @@ -590,7 +590,7 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu) { // } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestEnvMatA, prod_gpu) { EXPECT_EQ(nlist_r_cpy.size(), nloc); int tot_nnei = 0; @@ -782,198 +782,4 @@ TEST_F(TestEnvMatA, prod_gpu_equal_cpu) { } } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestEnvMatA, prod_gpu) { - EXPECT_EQ(nlist_r_cpy.size(), nloc); - int tot_nnei = 0; - int max_nbor_size = 0; - for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) { - tot_nnei += nlist_a_cpy[ii].size(); - if (nlist_a_cpy[ii].size() > max_nbor_size) { - max_nbor_size = nlist_a_cpy[ii].size(); - } - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - std::vector ilist(nloc), numneigh(nloc); - std::vector firstneigh(nloc); - deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(inlist, nlist_a_cpy); - std::vector em(nloc * ndescrpt, 0.0), - em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0); - std::vector nlist(nloc * nnei, 0); - std::vector avg(ntypes * ndescrpt, 0); - std::vector std(ntypes * ndescrpt, 1); - - double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL; - double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL; - int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL, - *memory_dev = NULL; - uint_64 *array_longlong_dev = NULL; - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(avg_dev, avg); - deepmd::malloc_device_memory_sync(std_dev, std); - deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, - max_nbor_size); - - deepmd::prod_env_mat_a_gpu(em_dev, em_deriv_dev, rij_dev, nlist_dev, - posi_cpy_dev, atype_cpy_dev, gpu_inlist, - array_int_dev, array_longlong_dev, max_nbor_size, - avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a); - deepmd::memcpy_device_to_host(em_dev, em); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(em_deriv_dev); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(atype_cpy_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(avg_dev); - deepmd::delete_device_memory(std_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - - for (int ii = 0; ii < nloc; ++ii) { - for (int jj = 0; jj < nnei; ++jj) { - for (int dd = 0; dd < 4; ++dd) { - EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] - - expected_env[ii * nnei * 4 + jj * 4 + dd]), - 1e-5); - } - } - } -} - -TEST_F(TestEnvMatA, prod_gpu_equal_cpu) { - EXPECT_EQ(nlist_r_cpy.size(), nloc); - int tot_nnei = 0; - int max_nbor_size = 0; - for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) { - tot_nnei += nlist_a_cpy[ii].size(); - if (nlist_a_cpy[ii].size() > max_nbor_size) { - max_nbor_size = nlist_a_cpy[ii].size(); - } - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - std::vector ilist(nloc), numneigh(nloc); - std::vector firstneigh(nloc); - deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(inlist, nlist_a_cpy); - std::vector em(nloc * ndescrpt, 0.0), - em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0); - std::vector nlist(nloc * nnei, 0); - std::vector avg(ntypes * ndescrpt, 0); - std::vector std(ntypes * ndescrpt, 1); - - double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL; - double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL; - int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL, - *memory_dev = NULL; - uint_64 *array_longlong_dev = NULL; - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(avg_dev, avg); - deepmd::malloc_device_memory_sync(std_dev, std); - - deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, - max_nbor_size); - - deepmd::prod_env_mat_a_gpu(em_dev, em_deriv_dev, rij_dev, nlist_dev, - posi_cpy_dev, atype_cpy_dev, gpu_inlist, - array_int_dev, array_longlong_dev, max_nbor_size, - avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a); - deepmd::memcpy_device_to_host(em_dev, em); - deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv); - deepmd::memcpy_device_to_host(rij_dev, rij); - deepmd::memcpy_device_to_host(nlist_dev, nlist); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(em_deriv_dev); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(atype_cpy_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(avg_dev); - deepmd::delete_device_memory(std_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - - std::vector fmt_nlist_a_1, fmt_nlist_r_1; - std::vector env_1, env_deriv_1, rij_a_1; - for (int ii = 0; ii < nloc; ++ii) { - int ret_1 = format_nlist_i_cpu(fmt_nlist_a_1, posi_cpy, atype_cpy, - ii, nlist_a_cpy[ii], rc, sec_a); - EXPECT_EQ(ret_1, -1); - deepmd::env_mat_a_cpu(env_1, env_deriv_1, rij_a_1, posi_cpy, - atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, - rc); - EXPECT_EQ(env_1.size(), nnei * 4); - EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3); - EXPECT_EQ(rij_a_1.size(), nnei * 3); - EXPECT_EQ(fmt_nlist_a_1.size(), nnei); - EXPECT_EQ(env_1.size() * nloc, em.size()); - EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size()); - EXPECT_EQ(rij_a_1.size() * nloc, rij.size()); - EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size()); - for (unsigned jj = 0; jj < env_1.size(); ++jj) { - EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10); - } - for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) { - EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]), - 1e-10); - } - for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) { - EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10); - } - for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) { - EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]); - } - } - - for (int ii = 0; ii < nloc; ++ii) { - for (int jj = 0; jj < nnei; ++jj) { - for (int dd = 0; dd < 4; ++dd) { - EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] - - expected_env[ii * nnei * 4 + jj * 4 + dd]), - 1e-5); - } - } - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_env_mat_a_mix.cc b/source/lib/tests/test_env_mat_a_mix.cc index f415317929..909088d1e3 100644 --- a/source/lib/tests/test_env_mat_a_mix.cc +++ b/source/lib/tests/test_env_mat_a_mix.cc @@ -628,7 +628,7 @@ TEST_F(TestEnvMatAMix, prod_cpu_equal_cpu) { // } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestEnvMatAMix, prod_gpu) { EXPECT_EQ(nlist_r_cpy.size(), nloc); int tot_nnei = 0; @@ -844,222 +844,4 @@ TEST_F(TestEnvMatAMix, prod_gpu_equal_cpu) { } } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestEnvMatAMix, prod_gpu) { - EXPECT_EQ(nlist_r_cpy.size(), nloc); - int tot_nnei = 0; - int max_nbor_size = 0; - for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) { - tot_nnei += nlist_a_cpy[ii].size(); - if (nlist_a_cpy[ii].size() > max_nbor_size) { - max_nbor_size = nlist_a_cpy[ii].size(); - } - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - std::vector ilist(nloc), numneigh(nloc); - std::vector firstneigh(nloc); - deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(inlist, nlist_a_cpy); - std::vector em(nloc * ndescrpt, 0.0), - em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0); - std::vector nlist(nloc * nnei, 0); - std::vector ntype(nloc * nnei, 0); - bool *nmask = new bool[nloc * nnei]; - memset(nmask, 0, sizeof(bool) * nloc * nnei); - std::vector avg(ntypes * ndescrpt, 0); - std::vector std(ntypes * ndescrpt, 1); - - double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL, - *nmask_dev = NULL; - double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL; - int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL, - *ntype_dev = NULL, *mapping_dev = NULL, *array_int_dev = NULL, - *memory_dev = NULL; - uint_64 *array_longlong_dev = NULL; - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(avg_dev, avg); - deepmd::malloc_device_memory_sync(std_dev, std); - deepmd::malloc_device_memory_sync(f_atype_cpy_dev, f_atype_cpy); - deepmd::malloc_device_memory_sync(atype_dev, atype); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(ntype_dev, ntype); - deepmd::malloc_device_memory_sync(mapping_dev, mapping); - deepmd::malloc_device_memory_sync(nmask_dev, nmask, nloc * nnei); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, - max_nbor_size); - - deepmd::prod_env_mat_a_gpu( - em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_dev, - gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev, - std_dev, nloc, nall, rc, rc_smth, sec_a, f_atype_cpy_dev); - - deepmd::use_nei_info_gpu(nlist_dev, ntype_dev, nmask_dev, atype_dev, - mapping_dev, nloc, nnei, ntypes, true); - deepmd::memcpy_device_to_host(em_dev, em); - deepmd::memcpy_device_to_host(ntype_dev, ntype); - deepmd::memcpy_device_to_host(nmask_dev, nmask, nloc * nnei); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(em_deriv_dev); - deepmd::delete_device_memory(rij_dev); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(ntype_dev); - deepmd::delete_device_memory(nmask_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(f_atype_cpy_dev); - deepmd::delete_device_memory(atype_dev); - deepmd::delete_device_memory(mapping_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(avg_dev); - deepmd::delete_device_memory(std_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - - for (int ii = 0; ii < nloc; ++ii) { - for (int jj = 0; jj < nnei; ++jj) { - for (int dd = 0; dd < 4; ++dd) { - EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] - - expected_env[ii * nnei * 4 + jj * 4 + dd]), - 1e-5); - } - EXPECT_EQ(ntype[ii * nnei + jj], expected_ntype[ii * nnei + jj]); - EXPECT_EQ(nmask[ii * nnei + jj], expected_nmask[ii * nnei + jj]); - } - } - delete[] nmask; -} - -TEST_F(TestEnvMatAMix, prod_gpu_equal_cpu) { - EXPECT_EQ(nlist_r_cpy.size(), nloc); - int tot_nnei = 0; - int max_nbor_size = 0; - for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) { - tot_nnei += nlist_a_cpy[ii].size(); - if (nlist_a_cpy[ii].size() > max_nbor_size) { - max_nbor_size = nlist_a_cpy[ii].size(); - } - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - std::vector ilist(nloc), numneigh(nloc); - std::vector firstneigh(nloc); - deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(inlist, nlist_a_cpy); - std::vector em(nloc * ndescrpt, 0.0), - em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0); - std::vector nlist(nloc * nnei, 0); - std::vector avg(ntypes * ndescrpt, 0); - std::vector std(ntypes * ndescrpt, 1); - - double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL; - double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL; - int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL, - *array_int_dev = NULL, *memory_dev = NULL; - uint_64 *array_longlong_dev = NULL; - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(avg_dev, avg); - deepmd::malloc_device_memory_sync(std_dev, std); - - deepmd::malloc_device_memory_sync(f_atype_cpy_dev, f_atype_cpy); - deepmd::malloc_device_memory_sync(atype_dev, atype); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, - max_nbor_size); - - deepmd::prod_env_mat_a_gpu( - em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_dev, - gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev, - std_dev, nloc, nall, rc, rc_smth, sec_a, f_atype_cpy_dev); - deepmd::memcpy_device_to_host(em_dev, em); - deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv); - deepmd::memcpy_device_to_host(rij_dev, rij); - deepmd::memcpy_device_to_host(nlist_dev, nlist); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(em_deriv_dev); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(f_atype_cpy_dev); - deepmd::delete_device_memory(atype_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(avg_dev); - deepmd::delete_device_memory(std_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - - std::vector fmt_nlist_a_1, fmt_nlist_r_1; - std::vector env_1, env_deriv_1, rij_a_1; - for (int ii = 0; ii < nloc; ++ii) { - int ret_1 = format_nlist_i_cpu(fmt_nlist_a_1, posi_cpy, f_atype_cpy, - ii, nlist_a_cpy[ii], rc, sec_a); - EXPECT_EQ(ret_1, -1); - deepmd::env_mat_a_cpu(env_1, env_deriv_1, rij_a_1, posi_cpy, - f_atype_cpy, ii, fmt_nlist_a_1, sec_a, - rc_smth, rc); - EXPECT_EQ(env_1.size(), nnei * 4); - EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3); - EXPECT_EQ(rij_a_1.size(), nnei * 3); - EXPECT_EQ(fmt_nlist_a_1.size(), nnei); - EXPECT_EQ(env_1.size() * nloc, em.size()); - EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size()); - EXPECT_EQ(rij_a_1.size() * nloc, rij.size()); - EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size()); - for (unsigned jj = 0; jj < env_1.size(); ++jj) { - EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10); - } - for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) { - EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]), - 1e-10); - } - for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) { - EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10); - } - for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) { - EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]); - } - } - - for (int ii = 0; ii < nloc; ++ii) { - for (int jj = 0; jj < nnei; ++jj) { - for (int dd = 0; dd < 4; ++dd) { - EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] - - expected_env[ii * nnei * 4 + jj * 4 + dd]), - 1e-5); - } - } - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_env_mat_r.cc b/source/lib/tests/test_env_mat_r.cc index f20a8cbbc3..3024e651d9 100644 --- a/source/lib/tests/test_env_mat_r.cc +++ b/source/lib/tests/test_env_mat_r.cc @@ -358,7 +358,7 @@ TEST_F(TestEnvMatR, prod_cpu_equal_cpu) { } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestEnvMatR, prod_gpu) { EXPECT_EQ(nlist_r_cpy.size(), nloc); int tot_nnei = 0; @@ -541,189 +541,4 @@ TEST_F(TestEnvMatR, prod_gpu_equal_cpu) { } } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestEnvMatR, prod_gpu) { - EXPECT_EQ(nlist_r_cpy.size(), nloc); - int tot_nnei = 0; - int max_nbor_size = 0; - for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) { - tot_nnei += nlist_a_cpy[ii].size(); - if (nlist_a_cpy[ii].size() > max_nbor_size) { - max_nbor_size = nlist_a_cpy[ii].size(); - } - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - std::vector ilist(nloc), numneigh(nloc); - std::vector firstneigh(nloc); - deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(inlist, nlist_a_cpy); - std::vector em(nloc * ndescrpt, 0.0), - em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0); - std::vector nlist(nloc * nnei, 0); - std::vector avg(ntypes * ndescrpt, 0); - std::vector std(ntypes * ndescrpt, 1); - - double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL; - double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL; - int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL, - *memory_dev = NULL; - uint_64 *array_longlong_dev = NULL; - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(avg_dev, avg); - deepmd::malloc_device_memory_sync(std_dev, std); - - deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, - max_nbor_size); - - deepmd::prod_env_mat_r_gpu(em_dev, em_deriv_dev, rij_dev, nlist_dev, - posi_cpy_dev, atype_cpy_dev, gpu_inlist, - array_int_dev, array_longlong_dev, max_nbor_size, - avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a); - deepmd::memcpy_device_to_host(em_dev, em); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(em_deriv_dev); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(atype_cpy_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(avg_dev); - deepmd::delete_device_memory(std_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - - for (int ii = 0; ii < nloc; ++ii) { - for (int jj = 0; jj < nnei; ++jj) { - for (int dd = 0; dd < 1; ++dd) { - EXPECT_LT(fabs(em[ii * nnei * 1 + jj * 1 + dd] - - expected_env[ii * nnei * 1 + jj * 1 + dd]), - 1e-5); - } - } - } -} - -TEST_F(TestEnvMatR, prod_gpu_equal_cpu) { - EXPECT_EQ(nlist_r_cpy.size(), nloc); - int tot_nnei = 0; - int max_nbor_size = 0; - for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) { - tot_nnei += nlist_a_cpy[ii].size(); - if (nlist_a_cpy[ii].size() > max_nbor_size) { - max_nbor_size = nlist_a_cpy[ii].size(); - } - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - std::vector ilist(nloc), numneigh(nloc); - std::vector firstneigh(nloc); - deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(inlist, nlist_a_cpy); - std::vector em(nloc * ndescrpt, 0.0), - em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0); - std::vector nlist(nloc * nnei, 0); - std::vector avg(ntypes * ndescrpt, 0); - std::vector std(ntypes * ndescrpt, 1); - - double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL; - double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL; - int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL, - *memory_dev = NULL; - uint_64 *array_longlong_dev = NULL; - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(avg_dev, avg); - deepmd::malloc_device_memory_sync(std_dev, std); - - deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, - max_nbor_size); - - deepmd::prod_env_mat_r_gpu(em_dev, em_deriv_dev, rij_dev, nlist_dev, - posi_cpy_dev, atype_cpy_dev, gpu_inlist, - array_int_dev, array_longlong_dev, max_nbor_size, - avg_dev, std_dev, nloc, nall, rc, rc_smth, sec_a); - deepmd::memcpy_device_to_host(em_dev, em); - deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv); - deepmd::memcpy_device_to_host(rij_dev, rij); - deepmd::memcpy_device_to_host(nlist_dev, nlist); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(em_deriv_dev); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(atype_cpy_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(avg_dev); - deepmd::delete_device_memory(std_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - - std::vector fmt_nlist_a_1, fmt_nlist_r_1; - std::vector env_1, env_deriv_1, rij_a_1; - for (int ii = 0; ii < nloc; ++ii) { - int ret_1 = format_nlist_i_cpu(fmt_nlist_a_1, posi_cpy, atype_cpy, - ii, nlist_a_cpy[ii], rc, sec_a); - EXPECT_EQ(ret_1, -1); - deepmd::env_mat_r_cpu(env_1, env_deriv_1, rij_a_1, posi_cpy, - atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, - rc); - EXPECT_EQ(env_1.size(), nnei * 1); - EXPECT_EQ(env_deriv_1.size(), nnei * 1 * 3); - EXPECT_EQ(rij_a_1.size(), nnei * 3); - EXPECT_EQ(fmt_nlist_a_1.size(), nnei); - EXPECT_EQ(env_1.size() * nloc, em.size()); - EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size()); - EXPECT_EQ(rij_a_1.size() * nloc, rij.size()); - EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size()); - for (unsigned jj = 0; jj < env_1.size(); ++jj) { - EXPECT_LT(fabs(em[ii * nnei * 1 + jj] - env_1[jj]), 1e-10); - } - for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) { - EXPECT_LT(fabs(em_deriv[ii * nnei * 1 * 3 + jj] - env_deriv_1[jj]), - 1e-10); - } - for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) { - EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10); - } - for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) { - EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]); - } - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_fmt_nlist.cc b/source/lib/tests/test_fmt_nlist.cc index 1d995f8fce..bc79c92ea6 100644 --- a/source/lib/tests/test_fmt_nlist.cc +++ b/source/lib/tests/test_fmt_nlist.cc @@ -313,7 +313,7 @@ TEST_F(TestFormatNlistShortSel, cpu) { } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestFormatNlist, gpu) { std::vector> nlist_a_0, nlist_r_0; build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, @@ -501,163 +501,4 @@ TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu) { // deepmd::delete_device_memory(out_index_dev); // deepmd::delete_device_memory(key_dev); // } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestFormatNlist, gpu) { - std::vector> nlist_a_0, nlist_r_0; - build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, - ext_stt, ext_end, region, ncell); - // make a input nlist - int inum = nlist_a_0.size(); - std::vector ilist(inum); - std::vector numneigh(inum); - std::vector firstneigh(inum); - deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(in_nlist, nlist_a_0); - // allocate the mem for the result - std::vector nlist(inum * sec_a.back()); - EXPECT_EQ(nlist.size(), expect_nlist_cpy.size()); - - double* posi_cpy_dev = NULL; - int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL, - *memory_dev = NULL; - uint_64* array_longlong_dev = NULL; - for (int ii = 0; ii < inum; ii++) { - max_nbor_size = - max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii]; - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev, - max_nbor_size); - // format nlist - format_nbor_list_gpu(nlist_dev, posi_cpy_dev, atype_cpy_dev, gpu_inlist, - array_int_dev, array_longlong_dev, max_nbor_size, nloc, - nall, rc, sec_a); - deepmd::memcpy_device_to_host(nlist_dev, nlist); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(atype_cpy_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - // validate - for (int ii = 0; ii < nlist.size(); ++ii) { - EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]); - } -} - -TEST_F(TestFormatNlistShortSel, gpu) { - std::vector> nlist_a_0, nlist_r_0; - build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, - ext_stt, ext_end, region, ncell); - // make a input nlist - int inum = nlist_a_0.size(); - std::vector ilist(inum); - std::vector numneigh(inum); - std::vector firstneigh(inum); - deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]), - gpu_inlist; - convert_nlist(in_nlist, nlist_a_0); - // mem - std::vector nlist(inum * sec_a.back()); - EXPECT_EQ(nlist.size(), expect_nlist_cpy.size()); - // format nlist - double* posi_cpy_dev = NULL; - int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL, - *memory_dev = NULL; - uint_64* array_longlong_dev = NULL; - for (int ii = 0; ii < inum; ii++) { - max_nbor_size = - max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii]; - } - assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); - if (max_nbor_size <= 1024) { - max_nbor_size = 1024; - } else if (max_nbor_size <= 2048) { - max_nbor_size = 2048; - } else { - max_nbor_size = 4096; - } - deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy); - deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy); - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory(array_int_dev, - sec_a.size() + nloc * sec_a.size() + nloc); - deepmd::malloc_device_memory(array_longlong_dev, - nloc * GPU_MAX_NBOR_SIZE * 2); - deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size); - deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev, - max_nbor_size); - // format nlist - format_nbor_list_gpu(nlist_dev, posi_cpy_dev, atype_cpy_dev, gpu_inlist, - array_int_dev, array_longlong_dev, max_nbor_size, nloc, - nall, rc, sec_a); - deepmd::memcpy_device_to_host(nlist_dev, nlist); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(posi_cpy_dev); - deepmd::delete_device_memory(atype_cpy_dev); - deepmd::delete_device_memory(array_int_dev); - deepmd::delete_device_memory(array_longlong_dev); - deepmd::delete_device_memory(memory_dev); - deepmd::free_nlist_gpu_device(gpu_inlist); - // validate - for (int ii = 0; ii < nlist.size(); ++ii) { - EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]); - } -} - -TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu) { - int *valid_type_dev = NULL, *valid_index_dev = NULL, *out_type_dev = NULL, - *out_index_dev = NULL; - double* valid_dist_dev = NULL; - uint_64* key_dev = NULL; - std::vector out_type(size_of_array, 0); - std::vector out_index(size_of_array, 0); - std::vector key(size_of_array, 0); - deepmd::malloc_device_memory_sync(valid_type_dev, valid_type); - deepmd::malloc_device_memory_sync(valid_dist_dev, valid_dist); - deepmd::malloc_device_memory_sync(valid_index_dev, valid_index); - deepmd::malloc_device_memory_sync(out_type_dev, out_type); - deepmd::malloc_device_memory_sync(out_index_dev, out_index); - deepmd::malloc_device_memory_sync(key_dev, key); - - deepmd::test_encoding_decoding_nbor_info_gpu( - key_dev, out_type_dev, out_index_dev, valid_type_dev, valid_dist_dev, - valid_index_dev, size_of_array); - - deepmd::memcpy_device_to_host(key_dev, key); - deepmd::memcpy_device_to_host(out_type_dev, out_type); - deepmd::memcpy_device_to_host(out_index_dev, out_index); - deepmd::delete_device_memory(valid_type_dev); - deepmd::delete_device_memory(valid_dist_dev); - deepmd::delete_device_memory(valid_index_dev); - deepmd::delete_device_memory(out_type_dev); - deepmd::delete_device_memory(out_index_dev); - deepmd::delete_device_memory(key_dev); - // validate - for (int ii = 0; ii < size_of_array; ii++) { - EXPECT_EQ(key[ii], expect_key[ii]); - EXPECT_EQ(out_type[ii], expect_type[ii]); - EXPECT_EQ(out_index[ii], expect_index[ii]); - } -} - -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_gelu.cc b/source/lib/tests/test_gelu.cc index e680567b9c..322625f1ac 100644 --- a/source/lib/tests/test_gelu.cc +++ b/source/lib/tests/test_gelu.cc @@ -145,7 +145,7 @@ TEST_F(TestGelu, gelu_grad_grad_cpu) { } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestGelu, gelu_gpu) { std::vector gelu(nloc, 0.0); @@ -210,71 +210,4 @@ TEST_F(TestGelu, gelu_grad_grad_gpu) { EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]), 1e-5); } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestGelu, gelu_gpu) { - std::vector gelu(nloc, 0.0); - - double *gelu_dev = NULL, *xx_dev = NULL; - deepmd::malloc_device_memory_sync(gelu_dev, gelu); - deepmd::malloc_device_memory_sync(xx_dev, xx); - deepmd::gelu_gpu(gelu_dev, xx_dev, nloc); - deepmd::memcpy_device_to_host(gelu_dev, gelu); - deepmd::delete_device_memory(gelu_dev); - deepmd::delete_device_memory(xx_dev); - - EXPECT_EQ(gelu.size(), nloc); - EXPECT_EQ(gelu.size(), expected_gelu.size()); - for (int jj = 0; jj < gelu.size(); ++jj) { - EXPECT_LT(fabs(gelu[jj] - expected_gelu[jj]), 1e-5); - } -} - -TEST_F(TestGelu, gelu_grad_gpu) { - std::vector dy(100, 1.0); - std::vector gelu_grad(nloc, 0.0); - - double *gelu_grad_dev = NULL, *xx_dev = NULL, *dy_dev = NULL; - deepmd::malloc_device_memory_sync(gelu_grad_dev, gelu_grad); - deepmd::malloc_device_memory_sync(xx_dev, xx); - deepmd::malloc_device_memory_sync(dy_dev, dy); - deepmd::gelu_grad_gpu(gelu_grad_dev, xx_dev, dy_dev, nloc); - deepmd::memcpy_device_to_host(gelu_grad_dev, gelu_grad); - deepmd::delete_device_memory(gelu_grad_dev); - deepmd::delete_device_memory(xx_dev); - deepmd::delete_device_memory(dy_dev); - - EXPECT_EQ(gelu_grad.size(), nloc); - EXPECT_EQ(gelu_grad.size(), expected_gelu_grad.size()); - for (int jj = 0; jj < gelu_grad.size(); ++jj) { - EXPECT_LT(fabs(gelu_grad[jj] - expected_gelu_grad[jj]), 1e-5); - } -} - -TEST_F(TestGelu, gelu_grad_grad_gpu) { - std::vector dy(100, 1.0); - std::vector dy_2(100, 1.0); - std::vector gelu_grad_grad(nloc, 0.0); - - double *gelu_grad_grad_dev = NULL, *xx_dev = NULL, *dy_dev = NULL, - *dy_2_dev = NULL; - deepmd::malloc_device_memory_sync(gelu_grad_grad_dev, gelu_grad_grad); - deepmd::malloc_device_memory_sync(xx_dev, xx); - deepmd::malloc_device_memory_sync(dy_dev, dy); - deepmd::malloc_device_memory_sync(dy_2_dev, dy_2); - deepmd::gelu_grad_grad_gpu(gelu_grad_grad_dev, xx_dev, dy_dev, - dy_2_dev, nloc); - deepmd::memcpy_device_to_host(gelu_grad_grad_dev, gelu_grad_grad); - deepmd::delete_device_memory(gelu_grad_grad_dev); - deepmd::delete_device_memory(xx_dev); - deepmd::delete_device_memory(dy_dev); - deepmd::delete_device_memory(dy_2_dev); - - EXPECT_EQ(gelu_grad_grad.size(), nloc); - EXPECT_EQ(gelu_grad_grad.size(), expected_gelu_grad_grad.size()); - for (int jj = 0; jj < gelu_grad_grad.size(); ++jj) { - EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]), 1e-5); - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_neighbor_list.cc b/source/lib/tests/test_neighbor_list.cc index 985f69b3f4..b59fbd4691 100644 --- a/source/lib/tests/test_neighbor_list.cc +++ b/source/lib/tests/test_neighbor_list.cc @@ -135,7 +135,7 @@ TEST_F(TestNeighborList, cpu_lessmem) { delete[] firstneigh; } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestNeighborList, gpu) { int mem_size = 48; @@ -229,100 +229,4 @@ TEST_F(TestNeighborList, gpu_lessmem) { deepmd::delete_device_memory(c_cpy_dev); } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestNeighborList, gpu) { - int mem_size = 48; - - int *nlist_data_dev = NULL, *jlist_dev = NULL, *ilist_dev = NULL, - *numneigh_dev = NULL; - int** firstneigh_dev = NULL; - std::vector temp_firstneigh(nloc); - double* c_cpy_dev = NULL; - - deepmd::malloc_device_memory(nlist_data_dev, 2 * nloc * mem_size); - deepmd::malloc_device_memory(jlist_dev, nloc * mem_size); - deepmd::malloc_device_memory(ilist_dev, nloc); - deepmd::malloc_device_memory(numneigh_dev, nloc); - for (int ii = 0; ii < nloc; ++ii) { - temp_firstneigh[ii] = jlist_dev + ii * mem_size; - } - deepmd::malloc_device_memory_sync(firstneigh_dev, temp_firstneigh); - deepmd::malloc_device_memory_sync(c_cpy_dev, posi_cpy); - deepmd::InputNlist nlist_dev(nloc, ilist_dev, numneigh_dev, firstneigh_dev); - - int max_list_size; - int ret = deepmd::build_nlist_gpu(nlist_dev, &max_list_size, nlist_data_dev, - c_cpy_dev, nloc, nall, mem_size, rc); - - EXPECT_EQ(ret, 0); - int* ilist = new int[nloc]; - int* numneigh = new int[nloc]; - int** firstneigh = new int*[nloc]; - int* jlist = new int[nloc * mem_size]; - deepmd::memcpy_device_to_host(jlist_dev, jlist, nloc * mem_size); - deepmd::memcpy_device_to_host(ilist_dev, ilist, nloc); - deepmd::memcpy_device_to_host(numneigh_dev, numneigh, nloc); - for (int ii = 0; ii < nloc; ++ii) { - firstneigh[ii] = jlist + ii * mem_size; - } - - deepmd::InputNlist nlist(nlist_dev.inum, ilist, numneigh, firstneigh); - EXPECT_EQ(nlist.inum, nloc); - EXPECT_EQ(max_list_size, 5); - for (int ii = 0; ii < nloc; ++ii) { - EXPECT_EQ(nlist.ilist[ii], ii); - EXPECT_EQ(nlist.numneigh[ii], expect_nlist_cpy[ii].size()); - std::sort(nlist.firstneigh[ii], nlist.firstneigh[ii] + nlist.numneigh[ii]); - for (int jj = 0; jj < nlist.numneigh[ii]; ++jj) { - EXPECT_EQ(nlist.firstneigh[ii][jj], expect_nlist_cpy[ii][jj]); - } - } - - delete[] ilist; - delete[] numneigh; - delete[] jlist; - delete[] firstneigh; - deepmd::delete_device_memory(nlist_data_dev); - deepmd::delete_device_memory(jlist_dev); - deepmd::delete_device_memory(ilist_dev); - deepmd::delete_device_memory(numneigh_dev); - deepmd::delete_device_memory(firstneigh_dev); - deepmd::delete_device_memory(c_cpy_dev); -} - -TEST_F(TestNeighborList, gpu_lessmem) { - int mem_size = 47; - - int *nlist_data_dev = NULL, *jlist_dev = NULL, *ilist_dev = NULL, - *numneigh_dev = NULL; - int** firstneigh_dev = NULL; - std::vector temp_firstneigh(nloc); - double* c_cpy_dev = NULL; - - deepmd::malloc_device_memory(nlist_data_dev, 2 * nloc * mem_size); - deepmd::malloc_device_memory(jlist_dev, nloc * mem_size); - deepmd::malloc_device_memory(ilist_dev, nloc); - deepmd::malloc_device_memory(numneigh_dev, nloc); - for (int ii = 0; ii < nloc; ++ii) { - temp_firstneigh[ii] = jlist_dev + ii * mem_size; - } - deepmd::malloc_device_memory_sync(firstneigh_dev, temp_firstneigh); - deepmd::malloc_device_memory_sync(c_cpy_dev, posi_cpy); - deepmd::InputNlist nlist_dev(nloc, ilist_dev, numneigh_dev, firstneigh_dev); - - int max_list_size; - int ret = deepmd::build_nlist_gpu(nlist_dev, &max_list_size, nlist_data_dev, - c_cpy_dev, nloc, nall, mem_size, rc); - - EXPECT_EQ(ret, 1); - deepmd::delete_device_memory(nlist_data_dev); - deepmd::delete_device_memory(jlist_dev); - deepmd::delete_device_memory(ilist_dev); - deepmd::delete_device_memory(numneigh_dev); - deepmd::delete_device_memory(firstneigh_dev); - deepmd::delete_device_memory(c_cpy_dev); -} - -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_force_a.cc b/source/lib/tests/test_prod_force_a.cc index b51c97e421..2031f086b4 100644 --- a/source/lib/tests/test_prod_force_a.cc +++ b/source/lib/tests/test_prod_force_a.cc @@ -133,7 +133,7 @@ TEST_F(TestProdForceA, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdForceA, gpu) { std::vector force(nframes * nall * 3, 0.0); int n_a_sel = nnei; @@ -161,34 +161,4 @@ TEST_F(TestProdForceA, gpu) { EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5); } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdForceA, gpu) { - std::vector force(nframes * nall * 3, 0.0); - int n_a_sel = nnei; - - int* nlist_dev = NULL; - double *force_dev = NULL, *net_deriv_dev = NULL, *env_deriv_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(force_dev, force); - deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - - deepmd::prod_force_a_gpu(force_dev, net_deriv_dev, env_deriv_dev, - nlist_dev, nloc, nall, nnei, nframes); - - deepmd::memcpy_device_to_host(force_dev, force); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(force_dev); - deepmd::delete_device_memory(net_deriv_dev); - deepmd::delete_device_memory(env_deriv_dev); - - EXPECT_EQ(force.size(), nframes * nall * 3); - EXPECT_EQ(force.size(), expected_force.size()); - for (int jj = 0; jj < force.size(); ++jj) { - EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5); - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_force_grad_a.cc b/source/lib/tests/test_prod_force_grad_a.cc index 4694c4ac3b..abb04eaa01 100644 --- a/source/lib/tests/test_prod_force_grad_a.cc +++ b/source/lib/tests/test_prod_force_grad_a.cc @@ -143,7 +143,7 @@ TEST_F(TestProdForceGradA, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdForceGradA, gpu) { std::vector grad_net(nframes * nloc * ndescrpt); int* nlist_dev = NULL; @@ -171,34 +171,4 @@ TEST_F(TestProdForceGradA, gpu) { // } // printf("\n"); } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdForceGradA, gpu) { - std::vector grad_net(nframes * nloc * ndescrpt); - int* nlist_dev = NULL; - double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(grad_dev, grad); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - deepmd::malloc_device_memory(grad_net_dev, nframes * nloc * ndescrpt); - deepmd::prod_force_grad_a_gpu(grad_net_dev, grad_dev, env_deriv_dev, - nlist_dev, nloc, nnei, nframes); - deepmd::memcpy_device_to_host(grad_net_dev, grad_net); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(grad_dev); - deepmd::delete_device_memory(env_deriv_dev); - deepmd::delete_device_memory(grad_net_dev); - - EXPECT_EQ(grad_net.size(), nframes * nloc * ndescrpt); - EXPECT_EQ(grad_net.size(), expected_grad_net.size()); - for (int jj = 0; jj < grad_net.size(); ++jj) { - EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5); - } - // for (int jj = 0; jj < nloc * ndescrpt; ++jj){ - // printf("%8.5f, ", grad_net[jj]); - // } - // printf("\n"); -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_force_grad_r.cc b/source/lib/tests/test_prod_force_grad_r.cc index 31f8b64982..c8a27077c3 100644 --- a/source/lib/tests/test_prod_force_grad_r.cc +++ b/source/lib/tests/test_prod_force_grad_r.cc @@ -117,7 +117,7 @@ TEST_F(TestProdForceGradR, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdForceGradR, gpu) { std::vector grad_net(nframes * nloc * ndescrpt); int* nlist_dev = NULL; @@ -145,34 +145,4 @@ TEST_F(TestProdForceGradR, gpu) { // } // printf("\n"); } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdForceGradR, gpu) { - std::vector grad_net(nframes * nloc * ndescrpt); - int* nlist_dev = NULL; - double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(grad_dev, grad); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - deepmd::malloc_device_memory(grad_net_dev, nframes * nloc * ndescrpt); - deepmd::prod_force_grad_r_gpu(grad_net_dev, grad_dev, env_deriv_dev, - nlist_dev, nloc, nnei, nframes); - deepmd::memcpy_device_to_host(grad_net_dev, grad_net); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(grad_dev); - deepmd::delete_device_memory(env_deriv_dev); - deepmd::delete_device_memory(grad_net_dev); - - EXPECT_EQ(grad_net.size(), nframes * nloc * ndescrpt); - EXPECT_EQ(grad_net.size(), expected_grad_net.size()); - for (int jj = 0; jj < grad_net.size(); ++jj) { - EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5); - } - // for (int jj = 0; jj < nloc * ndescrpt; ++jj){ - // printf("%8.5f, ", grad_net[jj]); - // } - // printf("\n"); -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_force_r.cc b/source/lib/tests/test_prod_force_r.cc index 7f46aa3244..ff3245742d 100644 --- a/source/lib/tests/test_prod_force_r.cc +++ b/source/lib/tests/test_prod_force_r.cc @@ -130,7 +130,7 @@ TEST_F(TestProdForceR, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdForceR, gpu) { std::vector force(nframes * nall * 3, 0.0); int n_a_sel = nnei; @@ -158,34 +158,4 @@ TEST_F(TestProdForceR, gpu) { EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5); } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdForceR, gpu) { - std::vector force(nframes * nall * 3, 0.0); - int n_a_sel = nnei; - - int* nlist_dev = NULL; - double *force_dev = NULL, *net_deriv_dev = NULL, *env_deriv_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(force_dev, force); - deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - - deepmd::prod_force_r_gpu(force_dev, net_deriv_dev, env_deriv_dev, - nlist_dev, nloc, nall, nnei, nframes); - - deepmd::memcpy_device_to_host(force_dev, force); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(force_dev); - deepmd::delete_device_memory(net_deriv_dev); - deepmd::delete_device_memory(env_deriv_dev); - - EXPECT_EQ(force.size(), nframes * nall * 3); - EXPECT_EQ(force.size(), expected_force.size()); - for (int jj = 0; jj < force.size(); ++jj) { - EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5); - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_virial_a.cc b/source/lib/tests/test_prod_virial_a.cc index 054a152869..b2f2a11989 100644 --- a/source/lib/tests/test_prod_virial_a.cc +++ b/source/lib/tests/test_prod_virial_a.cc @@ -178,7 +178,7 @@ TEST_F(TestProdVirialA, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdVirialA, gpu) { std::vector virial(9, 0.0); std::vector atom_virial(nall * 9, 0.0); @@ -225,53 +225,4 @@ TEST_F(TestProdVirialA, gpu) { EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5); } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdVirialA, gpu) { - std::vector virial(9, 0.0); - std::vector atom_virial(nall * 9, 0.0); - int n_a_sel = nnei; - - int* nlist_dev = NULL; - double *virial_dev = NULL, *atom_virial_dev = NULL, *net_deriv_dev = NULL, - *env_deriv_dev = NULL, *rij_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(virial_dev, virial); - deepmd::malloc_device_memory_sync(atom_virial_dev, atom_virial); - deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - - deepmd::prod_virial_a_gpu(virial_dev, atom_virial_dev, net_deriv_dev, - env_deriv_dev, rij_dev, nlist_dev, nloc, - nall, nnei); - - deepmd::memcpy_device_to_host(virial_dev, virial); - deepmd::memcpy_device_to_host(atom_virial_dev, atom_virial); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(virial_dev); - deepmd::delete_device_memory(atom_virial_dev); - deepmd::delete_device_memory(net_deriv_dev); - deepmd::delete_device_memory(env_deriv_dev); - deepmd::delete_device_memory(rij_dev); - // virial are not calculated in gpu currently; - // for (int ii = 0; ii < 9; ii++) { - // virial[ii] = 0; - // } - // for (int ii = 0; ii < nall * 9; ii++) { - // virial[ii % 9] += atom_virial[ii]; - // } - EXPECT_EQ(virial.size(), 9); - EXPECT_EQ(virial.size(), expected_virial.size()); - EXPECT_EQ(atom_virial.size(), nall * 9); - EXPECT_EQ(atom_virial.size(), expected_atom_virial.size()); - for (int jj = 0; jj < virial.size(); ++jj) { - EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5); - } - for (int jj = 0; jj < atom_virial.size(); ++jj) { - EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5); - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_virial_grad_a.cc b/source/lib/tests/test_prod_virial_grad_a.cc index 98a08ce5c3..09af51d6ed 100644 --- a/source/lib/tests/test_prod_virial_grad_a.cc +++ b/source/lib/tests/test_prod_virial_grad_a.cc @@ -137,7 +137,7 @@ TEST_F(TestProdVirialGradA, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdVirialGradA, gpu) { std::vector grad_net(nloc * ndescrpt); int n_a_sel = nnei; @@ -169,38 +169,4 @@ TEST_F(TestProdVirialGradA, gpu) { // } // printf("\n"); } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdVirialGradA, gpu) { - std::vector grad_net(nloc * ndescrpt); - int n_a_sel = nnei; - int* nlist_dev = NULL; - double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL, - *rij_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(grad_dev, grad); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt); - deepmd::prod_virial_grad_a_gpu(grad_net_dev, grad_dev, env_deriv_dev, - rij_dev, nlist_dev, nloc, nnei); - deepmd::memcpy_device_to_host(grad_net_dev, grad_net); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(grad_dev); - deepmd::delete_device_memory(env_deriv_dev); - deepmd::delete_device_memory(rij_dev); - deepmd::delete_device_memory(grad_net_dev); - - EXPECT_EQ(grad_net.size(), nloc * ndescrpt); - EXPECT_EQ(grad_net.size(), expected_grad_net.size()); - for (int jj = 0; jj < grad_net.size(); ++jj) { - EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5); - } - // for (int jj = 0; jj < nloc * ndescrpt; ++jj){ - // printf("%8.5f, ", grad_net[jj]); - // } - // printf("\n"); -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_virial_grad_r.cc b/source/lib/tests/test_prod_virial_grad_r.cc index a0c7dad0db..93a7291176 100644 --- a/source/lib/tests/test_prod_virial_grad_r.cc +++ b/source/lib/tests/test_prod_virial_grad_r.cc @@ -111,7 +111,7 @@ TEST_F(TestProdVirialGradR, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdVirialGradR, gpu) { std::vector grad_net(nloc * ndescrpt); int n_a_sel = nnei; @@ -143,38 +143,4 @@ TEST_F(TestProdVirialGradR, gpu) { // } // printf("\n"); } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdVirialGradR, gpu) { - std::vector grad_net(nloc * ndescrpt); - int n_a_sel = nnei; - int* nlist_dev = NULL; - double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL, - *rij_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(grad_dev, grad); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt); - deepmd::prod_virial_grad_r_gpu(grad_net_dev, grad_dev, env_deriv_dev, - rij_dev, nlist_dev, nloc, nnei); - deepmd::memcpy_device_to_host(grad_net_dev, grad_net); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(grad_dev); - deepmd::delete_device_memory(env_deriv_dev); - deepmd::delete_device_memory(rij_dev); - deepmd::delete_device_memory(grad_net_dev); - - EXPECT_EQ(grad_net.size(), nloc * ndescrpt); - EXPECT_EQ(grad_net.size(), expected_grad_net.size()); - for (int jj = 0; jj < grad_net.size(); ++jj) { - EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5); - } - // for (int jj = 0; jj < nloc * ndescrpt; ++jj){ - // printf("%8.5f, ", grad_net[jj]); - // } - // printf("\n"); -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_prod_virial_r.cc b/source/lib/tests/test_prod_virial_r.cc index f1077b6dbc..aed4abc512 100644 --- a/source/lib/tests/test_prod_virial_r.cc +++ b/source/lib/tests/test_prod_virial_r.cc @@ -178,7 +178,7 @@ TEST_F(TestProdVirialR, cpu) { // printf("\n"); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestProdVirialR, gpu) { std::vector virial(9, 0.0); std::vector atom_virial(nall * 9, 0.0); @@ -225,53 +225,4 @@ TEST_F(TestProdVirialR, gpu) { EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5); } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestProdVirialR, gpu) { - std::vector virial(9, 0.0); - std::vector atom_virial(nall * 9, 0.0); - int n_a_sel = nnei; - - int* nlist_dev = NULL; - double *virial_dev = NULL, *atom_virial_dev = NULL, *net_deriv_dev = NULL, - *env_deriv_dev = NULL, *rij_dev = NULL; - - deepmd::malloc_device_memory_sync(nlist_dev, nlist); - deepmd::malloc_device_memory_sync(virial_dev, virial); - deepmd::malloc_device_memory_sync(atom_virial_dev, atom_virial); - deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv); - deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv); - deepmd::malloc_device_memory_sync(rij_dev, rij); - - deepmd::prod_virial_r_gpu(virial_dev, atom_virial_dev, net_deriv_dev, - env_deriv_dev, rij_dev, nlist_dev, nloc, - nall, nnei); - - deepmd::memcpy_device_to_host(virial_dev, virial); - deepmd::memcpy_device_to_host(atom_virial_dev, atom_virial); - deepmd::delete_device_memory(nlist_dev); - deepmd::delete_device_memory(virial_dev); - deepmd::delete_device_memory(atom_virial_dev); - deepmd::delete_device_memory(net_deriv_dev); - deepmd::delete_device_memory(env_deriv_dev); - deepmd::delete_device_memory(rij_dev); - // virial are not calculated in gpu currently; - // for (int ii = 0; ii < 9; ii++) { - // virial[ii] = 0; - // } - // for (int ii = 0; ii < nall * 9; ii++) { - // virial[ii % 9] += atom_virial[ii]; - // } - EXPECT_EQ(virial.size(), 9); - EXPECT_EQ(virial.size(), expected_virial.size()); - EXPECT_EQ(atom_virial.size(), nall * 9); - EXPECT_EQ(atom_virial.size(), expected_atom_virial.size()); - for (int jj = 0; jj < virial.size(); ++jj) { - EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5); - } - for (int jj = 0; jj < atom_virial.size(); ++jj) { - EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5); - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_simulation_region.cc b/source/lib/tests/test_simulation_region.cc index 6f1db46bb0..98da9ec350 100644 --- a/source/lib/tests/test_simulation_region.cc +++ b/source/lib/tests/test_simulation_region.cc @@ -73,7 +73,7 @@ TEST_F(TestRegion, cpu) { EXPECT_LT(fabs(ri2[ii] - ref_ri[ii]), 1e-10); } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestRegion, gpu) { // check rec_box deepmd::Region region; @@ -144,7 +144,7 @@ TEST_F(TestRegion, gpu) { region_dev.boxt = new_boxt; region_dev.rec_boxt = new_rec_boxt; } -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM // double square_root (const double xx) // { @@ -156,76 +156,3 @@ TEST_F(TestRegion, gpu) { // EXPECT_EQ (25.4, square_root (645.16)); // EXPECT_EQ (50.332, square_root (2533.310224)); // } - -#if TENSORFLOW_USE_ROCM -TEST_F(TestRegion, gpu) { - // check rec_box - deepmd::Region region; - deepmd::Region region_dev; - double* new_boxt = region_dev.boxt; - double* new_rec_boxt = region_dev.rec_boxt; - double *boxt_dev = NULL, *rec_boxt_dev = NULL; - double *ref_rp_dev = NULL, *ref_ri_dev = NULL; - init_region_cpu(region, &ref_boxt[0]); - for (int ii = 0; ii < 9; ++ii) { - EXPECT_LT(fabs(region.rec_boxt[ii] - ref_rec_boxt[ii]), 1e-10); - } - deepmd::malloc_device_memory_sync(boxt_dev, region.boxt, 9); - deepmd::malloc_device_memory_sync(rec_boxt_dev, region.rec_boxt, 9); - deepmd::malloc_device_memory_sync(ref_rp_dev, ref_rp); - deepmd::malloc_device_memory_sync(ref_ri_dev, ref_ri); - region_dev.boxt = boxt_dev; - region_dev.rec_boxt = rec_boxt_dev; - // check volume - double vol[1]; - double* vol_dev = NULL; - deepmd::malloc_device_memory(vol_dev, 1); - deepmd::volume_gpu(vol_dev, region_dev); - deepmd::memcpy_device_to_host(vol_dev, vol, 1); - EXPECT_LT(fabs(vol[0] - expected_vol), 1e-10); - // check conversion between phys and inter coords. - double ri[3]; - double* ri_dev = NULL; - deepmd::malloc_device_memory(ri_dev, 3); - deepmd::convert_to_inter_gpu(ri_dev, region_dev, ref_rp_dev); - deepmd::memcpy_device_to_host(ri_dev, ri, 3); - for (int ii = 0; ii < 3; ++ii) { - EXPECT_LT(fabs(ri[ii] - ref_ri[ii]), 1e-10); - } - double rp2[3]; - double* rp2_dev = NULL; - deepmd::malloc_device_memory(rp2_dev, 3); - deepmd::convert_to_phys_gpu(rp2_dev, region_dev, ri_dev); - deepmd::memcpy_device_to_host(rp2_dev, rp2, 3); - for (int ii = 0; ii < 3; ++ii) { - EXPECT_LT(fabs(rp2[ii] - ref_rp[ii]), 1e-10); - } - double rp[3]; - double* rp_dev = NULL; - deepmd::malloc_device_memory(rp_dev, 3); - deepmd::convert_to_phys_gpu(rp_dev, region_dev, ref_ri_dev); - deepmd::memcpy_device_to_host(rp_dev, rp, 3); - for (int ii = 0; ii < 3; ++ii) { - EXPECT_LT(fabs(rp[ii] - ref_rp[ii]), 1e-10); - } - double ri2[3]; - double* ri2_dev = NULL; - deepmd::malloc_device_memory(ri2_dev, 3); - deepmd::convert_to_inter_gpu(ri2_dev, region_dev, rp_dev); - deepmd::memcpy_device_to_host(ri2_dev, ri2, 3); - for (int ii = 0; ii < 3; ++ii) { - EXPECT_LT(fabs(ri2[ii] - ref_ri[ii]), 1e-10); - } - deepmd::delete_device_memory(boxt_dev); - deepmd::delete_device_memory(rec_boxt_dev); - deepmd::delete_device_memory(vol_dev); - deepmd::delete_device_memory(ref_rp_dev); - deepmd::delete_device_memory(ref_ri_dev); - deepmd::delete_device_memory(ri_dev); - deepmd::delete_device_memory(rp2_dev); - deepmd::delete_device_memory(rp_dev); - deepmd::delete_device_memory(ri2_dev); - region_dev.boxt = new_boxt; - region_dev.rec_boxt = new_rec_boxt; -} -#endif // TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_tabulate_se_a.cc b/source/lib/tests/test_tabulate_se_a.cc index 6f76f9c2ee..fc0fd04980 100644 --- a/source/lib/tests/test_tabulate_se_a.cc +++ b/source/lib/tests/test_tabulate_se_a.cc @@ -755,7 +755,7 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_cpu) { } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu) { std::vector xyz_scatter(nloc * nnei * last_layer_size, 0.0); @@ -852,103 +852,4 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu) { deepmd::delete_device_memory(dy_dev); deepmd::delete_device_memory(two_embed_dev); } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu) { - std::vector xyz_scatter(nloc * nnei * last_layer_size, 0.0); - - double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL, - *em_dev = NULL; - deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter); - deepmd::malloc_device_memory_sync(table_dev, table); - deepmd::malloc_device_memory_sync(em_x_dev, em_x); - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::tabulate_fusion_se_a_gpu(xyz_scatter_dev, table_dev, &info[0], - em_x_dev, em_dev, nullptr, nloc, - nnei, last_layer_size); - deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter); - - EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size); - EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size()); - for (int jj = 0; jj < xyz_scatter.size(); ++jj) { - EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5); - } - - double *two_embed_dev = nullptr; - deepmd::malloc_device_memory_sync(two_embed_dev, two_embed); - deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter); - deepmd::tabulate_fusion_se_a_gpu(xyz_scatter_dev, table_dev, &info[0], - em_x_dev, em_dev, two_embed_dev, - nloc, nnei, last_layer_size); - deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter); - - EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size); - EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size()); - for (int jj = 0; jj < xyz_scatter.size(); ++jj) { - EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter_with_two_embed[jj]), - 1e-5); - } - - deepmd::delete_device_memory(xyz_scatter_dev); - deepmd::delete_device_memory(table_dev); - deepmd::delete_device_memory(em_x_dev); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(two_embed_dev); -} - -TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu) { - std::vector dy_dem_x(em_x.size(), 0.0); - std::vector dy_dem(em.size(), 0.0); - std::vector dy(nloc * nnei * last_layer_size, 1.0); - - double *dy_dem_x_dev = NULL, *dy_dem_dev = NULL, *table_dev = NULL, - *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL; - deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x); - deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem); - deepmd::malloc_device_memory_sync(table_dev, table); - deepmd::malloc_device_memory_sync(em_x_dev, em_x); - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(dy_dev, dy); - deepmd::tabulate_fusion_se_a_grad_gpu( - dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, nullptr, - dy_dev, nloc, nnei, last_layer_size); - deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x); - deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem); - - EXPECT_EQ(dy_dem_x.size(), nloc * nnei); - EXPECT_EQ(dy_dem.size(), nloc * nnei * 4); - EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size()); - EXPECT_EQ(dy_dem.size(), expected_dy_dem.size()); - for (int jj = 0; jj < dy_dem_x.size(); ++jj) { - EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5); - } - for (int jj = 0; jj < dy_dem.size(); ++jj) { - EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5); - } - - double *two_embed_dev = nullptr; - deepmd::malloc_device_memory_sync(two_embed_dev, two_embed); - deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x); - deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem); - deepmd::tabulate_fusion_se_a_grad_gpu( - dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, - two_embed_dev, dy_dev, nloc, nnei, last_layer_size); - deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x); - deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem); - for (int jj = 0; jj < dy_dem_x.size(); ++jj) { - EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x_with_two_embed[jj]), 1e-5); - } - for (int jj = 0; jj < dy_dem.size(); ++jj) { - EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem_with_two_embed[jj]), 1e-5); - } - - deepmd::delete_device_memory(dy_dem_x_dev); - deepmd::delete_device_memory(dy_dem_dev); - deepmd::delete_device_memory(table_dev); - deepmd::delete_device_memory(em_x_dev); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(dy_dev); - deepmd::delete_device_memory(two_embed_dev); -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_tabulate_se_r.cc b/source/lib/tests/test_tabulate_se_r.cc index 5097451aab..8ac7e13c96 100644 --- a/source/lib/tests/test_tabulate_se_r.cc +++ b/source/lib/tests/test_tabulate_se_r.cc @@ -606,7 +606,7 @@ TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_cpu) { } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu) { std::vector xyz_scatter(nloc * nnei * last_layer_size, 0.0); @@ -653,53 +653,4 @@ TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu) { EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5); } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu) { - std::vector xyz_scatter(nloc * nnei * last_layer_size, 0.0); - - double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_dev = NULL; - deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter); - deepmd::malloc_device_memory_sync(table_dev, table); - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::tabulate_fusion_se_r_gpu(xyz_scatter_dev, table_dev, &info[0], - em_dev, nloc, nnei, last_layer_size); - deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter); - deepmd::delete_device_memory(xyz_scatter_dev); - deepmd::delete_device_memory(table_dev); - deepmd::delete_device_memory(em_dev); - - EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size); - EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size()); - for (int jj = 0; jj < xyz_scatter.size(); ++jj) { - EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5); - } -} - -TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu) { - std::vector dy_dem(em.size(), 0.0); - std::vector dy(nloc * nnei * last_layer_size, 1.0); - - double *dy_dem_dev = NULL, *table_dev = NULL, *em_dev = NULL, *dy_dev = NULL; - deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem); - deepmd::malloc_device_memory_sync(table_dev, table); - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(dy_dev, dy); - deepmd::tabulate_fusion_se_r_grad_gpu(dy_dem_dev, table_dev, &info[0], - em_dev, dy_dev, nloc, nnei, - last_layer_size); - deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem); - deepmd::delete_device_memory(dy_dem_dev); - deepmd::delete_device_memory(table_dev); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(dy_dev); - - EXPECT_EQ(dy_dem.size(), nloc * nnei); - EXPECT_EQ(dy_dem.size(), expected_dy_dem.size()); - - for (int jj = 0; jj < dy_dem.size(); ++jj) { - EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5); - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/lib/tests/test_tabulate_se_t.cc b/source/lib/tests/test_tabulate_se_t.cc index ffb1b41220..be82a07ba6 100644 --- a/source/lib/tests/test_tabulate_se_t.cc +++ b/source/lib/tests/test_tabulate_se_t.cc @@ -5260,7 +5260,7 @@ TEST_F(TestTabulateSeT, tabulate_fusion_se_t_grad_cpu) { } } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu) { std::vector xyz_scatter(nloc * last_layer_size, 0.0); double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL, @@ -5322,66 +5322,4 @@ TEST_F(TestTabulateSeT, tabulate_fusion_se_a_grad_gpu) { EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5); } } -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu) { - std::vector xyz_scatter(nloc * last_layer_size, 0.0); - double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL, - *em_dev = NULL; - deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter); - deepmd::malloc_device_memory_sync(table_dev, table); - deepmd::malloc_device_memory_sync(em_x_dev, em_x); - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::tabulate_fusion_se_t_gpu(xyz_scatter_dev, table_dev, &info[0], - em_x_dev, em_dev, nloc, nnei_i, - nnei_j, last_layer_size); - deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter); - deepmd::delete_device_memory(xyz_scatter_dev); - deepmd::delete_device_memory(table_dev); - deepmd::delete_device_memory(em_x_dev); - deepmd::delete_device_memory(em_dev); - - EXPECT_EQ(xyz_scatter.size(), nloc * last_layer_size); - EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size()); - for (int jj = 0; jj < xyz_scatter.size() / 100; ++jj) { - EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5); - } -} - -TEST_F(TestTabulateSeT, tabulate_fusion_se_t_grad_gpu) { - std::vector dy_dem_x(em_x.size(), 0.0); - std::vector dy_dem(em.size(), 0.0); - - double *dy_dem_x_dev = NULL, *dy_dem_dev = NULL, *table_dev = NULL, - *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL; - deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x); - deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem); - deepmd::malloc_device_memory_sync(table_dev, table); - deepmd::malloc_device_memory_sync(em_x_dev, em_x); - deepmd::malloc_device_memory_sync(em_dev, em); - deepmd::malloc_device_memory_sync(dy_dev, dy); - deepmd::tabulate_fusion_se_t_grad_gpu( - dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev, - nloc, nnei_i, nnei_j, last_layer_size); - deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x); - deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem); - deepmd::delete_device_memory(dy_dem_x_dev); - deepmd::delete_device_memory(dy_dem_dev); - deepmd::delete_device_memory(table_dev); - deepmd::delete_device_memory(em_x_dev); - deepmd::delete_device_memory(em_dev); - deepmd::delete_device_memory(dy_dev); - - EXPECT_EQ(dy_dem_x.size(), nloc * nnei_i * nnei_j); - EXPECT_EQ(dy_dem.size(), nloc * nnei_i * nnei_j); - EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size()); - EXPECT_EQ(dy_dem.size(), expected_dy_dem.size()); - for (int jj = 0; jj < dy_dem_x.size(); ++jj) { - EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5); - } - for (int jj = 0; jj < dy_dem.size(); ++jj) { - EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5); - } -} -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM