Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions source/lib/src/cuda/prod_env_mat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -539,9 +539,9 @@ void prod_env_mat_a_gpu_cuda(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_cuda(
nlist,
Expand Down Expand Up @@ -578,9 +578,9 @@ void prod_env_mat_r_gpu_cuda(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 1;
DPErrcheck(cudaMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_cuda(
nlist,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/cuda/prod_force.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void prod_force_a_gpu_cuda(
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
force,
Expand Down Expand Up @@ -141,7 +141,7 @@ void prod_force_r_gpu_cuda(
const int ndescrpt = nnei * 1;
DPErrcheck(cudaMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
force,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/cuda/prod_force_grad.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void prod_force_grad_a_gpu_cuda(
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down Expand Up @@ -122,7 +122,7 @@ void prod_force_grad_r_gpu_cuda(
const int ndescrpt = nnei * 1;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down
8 changes: 4 additions & 4 deletions source/lib/src/cuda/prod_virial.cu
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ void prod_virial_a_gpu_cuda(
{
DPErrcheck(cudaMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(cudaMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN - 1) / LEN;
Expand Down Expand Up @@ -153,10 +153,10 @@ void prod_virial_r_gpu_cuda(
{
DPErrcheck(cudaMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(cudaMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN - 1) / LEN;
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/cuda/prod_virial_grad.cu
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void prod_virial_grad_a_gpu_cuda(
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand All @@ -125,7 +125,7 @@ void prod_virial_grad_r_gpu_cuda(
const int ndescrpt = nnei;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand Down
16 changes: 8 additions & 8 deletions source/lib/src/cuda/tabulate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -648,10 +648,10 @@ void tabulate_fusion_se_a_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));
DPErrcheck(cudaMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei * 4));
0, sizeof(FPTYPE) * nloc * nnei * 4));

tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
dy_dem_x, dy_dem,
Expand All @@ -676,7 +676,7 @@ void tabulate_fusion_se_a_grad_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
dz_dy,
table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down Expand Up @@ -721,10 +721,10 @@ void tabulate_fusion_se_t_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
DPErrcheck(cudaMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));

tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size>>>(
dy_dem_x, dy_dem,
Expand All @@ -750,7 +750,7 @@ void tabulate_fusion_se_t_grad_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * last_layer_size));
0, sizeof(FPTYPE) * nloc * last_layer_size));

tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size>>>(
dz_dy,
Expand Down Expand Up @@ -791,7 +791,7 @@ void tabulate_fusion_se_r_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));

tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
dy_dem,
Expand All @@ -814,7 +814,7 @@ void tabulate_fusion_se_r_grad_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
dz_dy,
table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down
2 changes: 1 addition & 1 deletion source/lib/src/prod_force.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ prod_force_a_cpu(
{
const int ndescrpt = 4 * nnei;

memset(force, 0.0, sizeof(FPTYPE) * nall * 3);
memset(force, 0, sizeof(FPTYPE) * nall * 3);
// compute force of a frame
#pragma omp parallel
for (int i_idx = 0; i_idx < nloc; ++i_idx) {
Expand Down
12 changes: 6 additions & 6 deletions source/lib/src/rocm/prod_env_mat.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -537,9 +537,9 @@ void prod_env_mat_a_gpu_rocm(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_rocm(
nlist,
Expand Down Expand Up @@ -576,9 +576,9 @@ void prod_env_mat_r_gpu_rocm(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 1;
DPErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_rocm(
nlist,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/rocm/prod_force.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ namespace deepmd {
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0,
force,
Expand Down Expand Up @@ -141,7 +141,7 @@ namespace deepmd {
const int ndescrpt = nnei * 1;
DPErrcheck(hipMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0,
force,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/rocm/prod_force_grad.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void prod_force_grad_a_gpu_rocm(
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down Expand Up @@ -121,7 +121,7 @@ void prod_force_grad_r_gpu_rocm(
const int ndescrpt = nnei * 1;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down
8 changes: 4 additions & 4 deletions source/lib/src/rocm/prod_virial.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ void prod_virial_a_gpu_rocm(
{
DPErrcheck(hipMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(hipMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN -1) / LEN;
Expand Down Expand Up @@ -150,10 +150,10 @@ void prod_virial_r_gpu_rocm(
{
DPErrcheck(hipMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(hipMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN -1) / LEN;
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/rocm/prod_virial_grad.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void prod_virial_grad_a_gpu_rocm(
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand All @@ -125,7 +125,7 @@ void prod_virial_grad_r_gpu_rocm(
const int ndescrpt = nnei;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand Down
16 changes: 8 additions & 8 deletions source/lib/src/rocm/tabulate.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -637,10 +637,10 @@ void tabulate_fusion_se_a_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));
DPErrcheck(hipMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei * 4));
0, sizeof(FPTYPE) * nloc * nnei * 4));

hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0,
dy_dem_x, dy_dem,
Expand All @@ -665,7 +665,7 @@ void tabulate_fusion_se_a_grad_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0,
dz_dy,
table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down Expand Up @@ -710,10 +710,10 @@ void tabulate_fusion_se_t_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
DPErrcheck(hipMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));

hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size, 0,
dy_dem_x, dy_dem,
Expand All @@ -739,7 +739,7 @@ void tabulate_fusion_se_t_grad_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * last_layer_size));
0, sizeof(FPTYPE) * nloc * last_layer_size));
hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, 0, 0,
dz_dy,
table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
Expand Down Expand Up @@ -779,7 +779,7 @@ void tabulate_fusion_se_r_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));

hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0,
dy_dem,
Expand All @@ -802,7 +802,7 @@ void tabulate_fusion_se_r_grad_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0,
dz_dy,
table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down
Loading