Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions source/lib/include/NNPInter.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,8 @@ class NNPInter
compute_t *array_double;
InternalNeighborList nlist;
NNPAtomMap<VALUETYPE> nnpmap;
unsigned long long *array_longlong;
int *ilist, *jrange, *jlist, *array_int;
int *ilist, *jrange, *jlist;
int ilist_size, jrange_size, jlist_size;
int arr_int_size, arr_ll_size, arr_dou_size;

// function used for neighbor list copy
vector<int> get_sel_a() const;
Expand Down Expand Up @@ -191,13 +189,10 @@ class NNPInterModelDevi
vector<vector<int> > sec;
InternalNeighborList nlist;
NNPAtomMap<VALUETYPE> nnpmap;
unsigned long long *array_longlong;
int max_sec_size = 0, max_sec_back = 0;
int *ilist, *jrange, *jlist, *array_int;
int ilist_size, jrange_size, jlist_size, arr_int_size, arr_ll_size, arr_dou_size;
int *ilist, *jrange, *jlist;
int ilist_size, jrange_size, jlist_size;

// function used for nborlist copy
void get_max_sec();
vector<vector<int> > get_sel() const;
void cum_sum(const std::vector<std::vector<int32> > n_sel);
#ifdef USE_CUDA_TOOLKIT
Expand Down
3 changes: 0 additions & 3 deletions source/lib/include/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,6 @@ session_input_tensors (vector<std::pair<string, Tensor>>& input_tensors,
const int * ilist,
const int * jrange,
const int * jlist,
int * array_int,
unsigned long long * array_longlong,
double * array_double,
const vector<VALUETYPE> & fparam_,
const vector<VALUETYPE> & aparam_,
const NNPAtomMap<VALUETYPE> & nnpmap,
Expand Down
114 changes: 10 additions & 104 deletions source/lib/src/NNPInter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "SimulationRegion.h"
#include <stdexcept>

#define MAGIC_NUMBER 1024

#ifdef USE_CUDA_TOOLKIT
#include "cuda_runtime.h"
Expand All @@ -14,7 +13,7 @@
#define cudaErrcheck(res) { cudaAssert((res), __FILE__, __LINE__); }
inline void cudaAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
if (code != cudaSuccess)
{
fprintf(stderr,"cuda assert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
Expand Down Expand Up @@ -273,34 +272,19 @@ NNPInter::~NNPInter() {
cudaErrcheck(cudaFree(ilist));
cudaErrcheck(cudaFree(jrange));
cudaErrcheck(cudaFree(jlist));
cudaErrcheck(cudaFree(array_int));
cudaErrcheck(cudaFree(array_longlong));
cudaErrcheck(cudaFree(array_double));
}
#endif
}

#ifdef USE_CUDA_TOOLKIT
void NNPInter::update_nbor(const InternalNeighborList & nlist, const int nloc) {
if (!init_nbor) {
sec_a = cum_sum(get_sel_a());
cudaErrcheck(cudaMalloc((void**)&ilist, sizeof(int) * nlist.ilist.size()));
cudaErrcheck(cudaMalloc((void**)&jrange, sizeof(int) * nlist.jrange.size()));
cudaErrcheck(cudaMalloc((void**)&jlist, sizeof(int) * nlist.jlist.size()));
cudaErrcheck(cudaMalloc((void**)&array_int, sizeof(int) * (sec_a.size() + nloc * sec_a.size() + nloc)));
cudaErrcheck(cudaMalloc((void**)&array_longlong, sizeof(unsigned long long) * nloc * MAGIC_NUMBER * 2));
#ifdef HIGH_PREC
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * sec_a.back() * 3));
#else
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * sec_a.back() * 3));
#endif
ilist_size = nlist.ilist.size();
jrange_size = nlist.jrange.size();
jlist_size = nlist.jlist.size();
arr_int_size = sec_a.size() + nloc * sec_a.size() + nloc;
arr_ll_size = nloc * MAGIC_NUMBER * 2;
arr_dou_size = nloc * sec_a.back() * 3;
init_nbor = true;
}
if (ilist_size < nlist.ilist.size()) {
cudaErrcheck(cudaFree(ilist));
Expand All @@ -317,25 +301,7 @@ void NNPInter::update_nbor(const InternalNeighborList & nlist, const int nloc) {
cudaErrcheck(cudaMalloc((void**)&jlist, sizeof(int) * nlist.jlist.size()));
jlist_size = nlist.jlist.size();
}
if (arr_int_size < sec_a.size() + nloc * sec_a.size() + nloc) {
cudaErrcheck(cudaFree(array_int));
cudaErrcheck(cudaMalloc((void**)&array_int, sizeof(int) * (sec_a.size() + nloc * sec_a.size() + nloc)));
arr_int_size = sec_a.size() + nloc * sec_a.size() + nloc;
}
if (arr_ll_size < nloc * MAGIC_NUMBER * 2) {
cudaErrcheck(cudaFree(array_longlong));
cudaErrcheck(cudaMalloc((void**)&array_longlong, sizeof(unsigned long long) * nloc * MAGIC_NUMBER * 2));
arr_ll_size = nloc * MAGIC_NUMBER * 2;
}
if (arr_dou_size < nloc * sec_a.back() * 3) {
cudaErrcheck(cudaFree(array_double));
#ifdef HIGH_PREC
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * sec_a.back() * 3));
#else
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * sec_a.back() * 3));
#endif
arr_dou_size = nloc * sec_a.back() * 3;
}

cudaErrcheck(cudaMemcpy(ilist, &nlist.ilist[0], sizeof(int) * nlist.ilist.size(), cudaMemcpyHostToDevice));
cudaErrcheck(cudaMemcpy(jrange, &nlist.jrange[0], sizeof(int) * nlist.jrange.size(), cudaMemcpyHostToDevice));
cudaErrcheck(cudaMemcpy(jlist, &nlist.jlist[0], sizeof(int) * nlist.jlist.size(), cudaMemcpyHostToDevice));
Expand Down Expand Up @@ -378,14 +344,10 @@ init (const string & model, const int & gpu_rank)
if (dfparam < 0) dfparam = 0;
if (daparam < 0) daparam = 0;
inited = true;

init_nbor = false;
array_int = NULL;
array_double = NULL;
array_longlong = NULL;
ilist = NULL; jrange = NULL; jlist = NULL;
ilist_size = 0; jrange_size = 0; jlist_size = 0;
arr_int_size = 0; arr_ll_size = 0; arr_dou_size = 0;
}
#else
void
Expand Down Expand Up @@ -415,12 +377,8 @@ init (const string & model, const int & gpu_rank)
inited = true;

init_nbor = false;
array_int = NULL;
array_double = NULL;
array_longlong = NULL;
ilist = NULL; jrange = NULL; jlist = NULL;
ilist_size = 0; jrange_size = 0; jlist_size = 0;
arr_int_size = 0; arr_ll_size = 0; arr_dou_size = 0;
}
#endif

Expand Down Expand Up @@ -602,7 +560,7 @@ compute_inner (ENERGYTYPE & dener,
}

#ifdef USE_CUDA_TOOLKIT
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
#else
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
#endif
Expand Down Expand Up @@ -669,7 +627,7 @@ compute (ENERGYTYPE & dener,
}

#ifdef USE_CUDA_TOOLKIT
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
#else
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
#endif
Expand Down Expand Up @@ -710,9 +668,6 @@ NNPInterModelDevi::~NNPInterModelDevi() {
cudaErrcheck(cudaFree(ilist));
cudaErrcheck(cudaFree(jrange));
cudaErrcheck(cudaFree(jlist));
cudaErrcheck(cudaFree(array_int));
cudaErrcheck(cudaFree(array_longlong));
cudaErrcheck(cudaFree(array_double));
}
#endif
}
Expand Down Expand Up @@ -761,14 +716,10 @@ init (const vector<string> & models, const int & gpu_rank)
// cell_size = rcut;
// ntypes = get_ntypes();
inited = true;

init_nbor = false;
array_int = NULL;
array_double = NULL;
array_longlong = NULL;
ilist = NULL; jrange = NULL; jlist = NULL;
ilist_size = 0; jrange_size = 0; jlist_size = 0;
arr_int_size = 0; arr_ll_size = 0; arr_dou_size = 0;
}
#else
void
Expand Down Expand Up @@ -798,14 +749,10 @@ init (const vector<string> & models, const int & gpu_rank)
// cell_size = rcut;
// ntypes = get_ntypes();
inited = true;

init_nbor = false;
array_int = NULL;
array_double = NULL;
array_longlong = NULL;
ilist = NULL; jrange = NULL; jlist = NULL;
ilist_size = 0; jrange_size = 0; jlist_size = 0;
arr_int_size = 0; arr_ll_size = 0; arr_dou_size = 0;
}
#endif

Expand Down Expand Up @@ -873,41 +820,18 @@ cum_sum (const std::vector<std::vector<int32> > n_sel)
}
}

void
NNPInterModelDevi::
get_max_sec()
{
for (int ii = 0; ii < numb_models; ii++) {
this->max_sec_size = max_sec_size < sec[ii].size() ? sec[ii].size() : max_sec_size;
this->max_sec_back = max_sec_back < sec[ii].back() ? sec[ii].back() : max_sec_back;
}
}

#ifdef USE_CUDA_TOOLKIT
void
NNPInterModelDevi::
update_nbor(const InternalNeighborList & nlist, const int nloc)
{
if (!init_nbor) {
cum_sum(get_sel());
get_max_sec();
cudaErrcheck(cudaMalloc((void**)&ilist, sizeof(int) * nlist.ilist.size()));
cudaErrcheck(cudaMalloc((void**)&jrange, sizeof(int) * nlist.jrange.size()));
cudaErrcheck(cudaMalloc((void**)&jlist, sizeof(int) * nlist.jlist.size()));
cudaErrcheck(cudaMalloc((void**)&array_int, sizeof(int) * (max_sec_size + nloc * max_sec_size + nloc)));
cudaErrcheck(cudaMalloc((void**)&array_longlong, sizeof(unsigned long long) * nloc * MAGIC_NUMBER * 2));
#ifdef HIGH_PREC
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * max_sec_back * 3));
#else
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * max_sec_back * 3));
#endif
ilist_size = nlist.ilist.size();
jrange_size = nlist.jrange.size();
jlist_size = nlist.jlist.size();
arr_int_size = max_sec_size + nloc * max_sec_size + nloc;
arr_ll_size = nloc * MAGIC_NUMBER * 2;
arr_dou_size = nloc * max_sec_back * 3;
init_nbor = true;
}
if (ilist_size < nlist.ilist.size()) {
cudaErrcheck(cudaFree(ilist));
Expand All @@ -924,25 +848,7 @@ update_nbor(const InternalNeighborList & nlist, const int nloc)
cudaErrcheck(cudaMalloc((void**)&jlist, sizeof(int) * nlist.jlist.size()));
jlist_size = nlist.jlist.size();
}
if (arr_int_size < max_sec_size + nloc * max_sec_size + nloc) {
cudaErrcheck(cudaFree(array_int));
cudaErrcheck(cudaMalloc((void**)&array_int, sizeof(int) * (max_sec_size + nloc * max_sec_size + nloc)));
arr_int_size = max_sec_size + nloc * max_sec_size + nloc;
}
if (arr_ll_size < nloc * MAGIC_NUMBER * 2) {
cudaErrcheck(cudaFree(array_longlong));
cudaErrcheck(cudaMalloc((void**)&array_longlong, sizeof(unsigned long long) * nloc * MAGIC_NUMBER * 2));
arr_ll_size = nloc * MAGIC_NUMBER * 2;
}
if (arr_dou_size < nloc * max_sec_back * 3) {
cudaErrcheck(cudaFree(array_double));
#ifdef HIGH_PREC
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * max_sec_back * 3));
#else
cudaErrcheck(cudaMalloc((void**)&array_double, sizeof(compute_t) * nloc * max_sec_back * 3));
#endif
arr_dou_size = nloc * max_sec_back * 3;
}

cudaErrcheck(cudaMemcpy(ilist, &nlist.ilist[0], sizeof(int) * nlist.ilist.size(), cudaMemcpyHostToDevice));
cudaErrcheck(cudaMemcpy(jrange, &nlist.jrange[0], sizeof(int) * nlist.jrange.size(), cudaMemcpyHostToDevice));
cudaErrcheck(cudaMemcpy(jlist, &nlist.jlist[0], sizeof(int) * nlist.jlist.size(), cudaMemcpyHostToDevice));
Expand Down Expand Up @@ -1044,7 +950,7 @@ compute (vector<ENERGYTYPE> & all_energy,

}
#ifdef USE_CUDA_TOOLKIT
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
#else
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
#endif
Expand Down Expand Up @@ -1094,7 +1000,7 @@ compute (vector<ENERGYTYPE> & all_energy,

}
#ifdef USE_CUDA_TOOLKIT
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, array_int, array_longlong, array_double, fparam, aparam, nnpmap, nghost);
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, ilist, jrange, jlist, fparam, aparam, nnpmap, nghost);
#else
int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
#endif
Expand Down
10 changes: 2 additions & 8 deletions source/lib/src/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,6 @@ session_input_tensors (
const int * ilist,
const int * jrange,
const int * jlist,
int * array_int,
unsigned long long * array_longlong,
double * array_double,
const vector<VALUETYPE> & fparam_,
const vector<VALUETYPE> & aparam_,
const NNPAtomMap<VALUETYPE> & nnpmap,
Expand Down Expand Up @@ -511,7 +508,7 @@ session_input_tensors (
box_shape.AddDim (nframes);
box_shape.AddDim (9);
TensorShape mesh_shape;
mesh_shape.AddDim (32);
mesh_shape.AddDim (16);
TensorShape natoms_shape;
natoms_shape.AddDim (2 + ntypes);
TensorShape fparam_shape;
Expand Down Expand Up @@ -565,7 +562,7 @@ session_input_tensors (
}
}

for (int ii = 0; ii < 32; ++ii) mesh(ii) = 0;
for (int ii = 0; ii < 16; ++ii) mesh(ii) = 0;

mesh (0) = sizeof(int *) / sizeof(int);
assert (mesh(0) * sizeof(int) == sizeof(int *));
Expand All @@ -577,9 +574,6 @@ session_input_tensors (
memcpy (&mesh(4), &(ilist), sizeof(int *));
memcpy (&mesh(8), &(jrange), sizeof(int *));
memcpy (&mesh(12), &(jlist), sizeof(int *));
memcpy (&mesh(16), &(array_int), sizeof(int *));
memcpy (&mesh(20), &(array_longlong), sizeof(unsigned long long *));
memcpy (&mesh(24), &(array_double), sizeof(double *));

natoms (0) = nloc;
natoms (1) = nall;
Expand Down
Loading