diff --git a/source/op/descrpt_se_a_multi_device.cc b/source/op/descrpt_se_a_multi_device.cc index 40f2c92eb0..af18909884 100644 --- a/source/op/descrpt_se_a_multi_device.cc +++ b/source/op/descrpt_se_a_multi_device.cc @@ -261,7 +261,7 @@ class DescrptSeAOp : public OpKernel { cudaErrcheck(cudaMemcpy(jlist, jlist_host, sizeof(int) * mesh_host[3], cudaMemcpyHostToDevice)); max_nbor_size = 0; - for(int ii = 0; ii < mesh_host[2]; ii++) { + for(int ii = 0; ii < mesh_host[2] - 1; ii++) { max_nbor_size = (jrange_host[ii + 1] - jrange_host[ii]) > max_nbor_size ? (jrange_host[ii + 1] - jrange_host[ii]) : max_nbor_size; } assert(max_nbor_size <= GPU_MAX_NBOR_SIZE); diff --git a/source/op/descrpt_se_r_multi_device.cc b/source/op/descrpt_se_r_multi_device.cc index 81d2603c79..a1f50fbb95 100644 --- a/source/op/descrpt_se_r_multi_device.cc +++ b/source/op/descrpt_se_r_multi_device.cc @@ -250,7 +250,7 @@ class DescrptSeROp : public OpKernel { cudaErrcheck(cudaMemcpy(jlist, jlist_host, sizeof(int) * mesh_host[3], cudaMemcpyHostToDevice)); max_nbor_size = 0; - for(int ii = 0; ii < mesh_host[2]; ii++) { + for(int ii = 0; ii < mesh_host[2] - 1; ii++) { max_nbor_size = (jrange_host[ii + 1] - jrange_host[ii]) > max_nbor_size ? (jrange_host[ii + 1] - jrange_host[ii]) : max_nbor_size; } assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);