diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index 2d03635e00..3d502730de 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -112,7 +112,7 @@ def __init__ (self, self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name=name_pfx+'t_natoms') self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx+'t_mesh') self.stat_descrpt, descrpt_deriv, rij, nlist \ - = op_module.descrpt_se_a(self.place_holders['coord'], + = op_module.prod_env_mat_a(self.place_holders['coord'], self.place_holders['type'], self.place_holders['natoms_vec'], self.place_holders['box'], @@ -340,7 +340,7 @@ def build (self, atype = tf.reshape (atype_, [-1, natoms[1]]) self.descrpt, self.descrpt_deriv, self.rij, self.nlist \ - = op_module.descrpt_se_a (coord, + = op_module.prod_env_mat_a (coord, atype, natoms, box, diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h index 44a7fc417b..58d5f150ed 100644 --- a/source/lib/include/neighbor_list.h +++ b/source/lib/include/neighbor_list.h @@ -16,6 +16,9 @@ struct InputNlist int * ilist; int * numneigh; int ** firstneigh; + InputNlist () + : inum(0), ilist(NULL), numneigh(NULL), firstneigh(NULL) + {}; InputNlist ( int inum_, int * ilist_, @@ -23,7 +26,7 @@ struct InputNlist int ** firstneigh_ ) : inum(inum_), ilist(ilist_), numneigh(numneigh_), firstneigh(firstneigh_) - {} + {}; }; void convert_nlist( @@ -31,10 +34,12 @@ void convert_nlist( std::vector > & from_nlist ); +int max_numneigh( + const InputNlist & to_nlist + ); // build neighbor list. // outputs - // nlist, max_list_size // max_list_size is the maximal size of jlist. // inputs diff --git a/source/lib/include/prod_env_mat.h b/source/lib/include/prod_env_mat.h index 2b09d07d22..ff12ba1710 100644 --- a/source/lib/include/prod_env_mat.h +++ b/source/lib/include/prod_env_mat.h @@ -1,6 +1,7 @@ #pragma once #include #include "device.h" +#include "neighbor_list.h" template void prod_env_mat_a_cpu( @@ -10,9 +11,7 @@ void prod_env_mat_a_cpu( int * nlist, const FPTYPE * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const FPTYPE * avg, const FPTYPE * std, @@ -30,9 +29,7 @@ void prod_env_mat_r_cpu( int * nlist, const FPTYPE * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const FPTYPE * avg, const FPTYPE * std, diff --git a/source/lib/src/coord.cc b/source/lib/src/coord.cc index 5066eac2a5..fb57b2c403 100644 --- a/source/lib/src/coord.cc +++ b/source/lib/src/coord.cc @@ -33,10 +33,11 @@ copy_coord_cpu( const FPTYPE * in_c, const int * in_t, const int & nloc, - const int & mem_nall, + const int & mem_nall_, const float & rcut, const Region & region) { + const int mem_nall = mem_nall_; std::vector coord(nloc * 3); std::vector atype(nloc); std::copy(in_c, in_c+nloc*3, coord.begin()); diff --git a/source/lib/src/neighbor_list.cc b/source/lib/src/neighbor_list.cc index 0643037e3f..6a603dc94e 100644 --- a/source/lib/src/neighbor_list.cc +++ b/source/lib/src/neighbor_list.cc @@ -757,6 +757,18 @@ convert_nlist( } } +int +max_numneigh( + const InputNlist & nlist + ) +{ + int max_num = 0; + for(int ii = 0; ii < nlist.inum; ++ii){ + if(nlist.numneigh[ii] > max_num) max_num = nlist.numneigh[ii]; + } + return max_num; +} + template int build_nlist_cpu( @@ -765,9 +777,10 @@ build_nlist_cpu( const FPTYPE * c_cpy, const int & nloc, const int & nall, - const int & mem_size, + const int & mem_size_, const float & rcut) { + const int mem_size = mem_size_; *max_list_size = 0; nlist.inum = nloc; FPTYPE rcut2 = rcut * rcut; diff --git a/source/lib/src/prod_env_mat.cc b/source/lib/src/prod_env_mat.cc index ce052b3df0..6bad611ed7 100644 --- a/source/lib/src/prod_env_mat.cc +++ b/source/lib/src/prod_env_mat.cc @@ -13,9 +13,7 @@ void prod_env_mat_a_cpu( int * nlist, const FPTYPE * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const FPTYPE * avg, const FPTYPE * std, @@ -45,13 +43,14 @@ void prod_env_mat_a_cpu( // build nlist std::vector > d_nlist_a(nloc); + assert(nloc == inlist.inum); for (unsigned ii = 0; ii < nloc; ++ii) { - d_nlist_a.reserve (jrange[nloc] / nloc + 10); + d_nlist_a[ii].reserve(max_nbor_size); } for (unsigned ii = 0; ii < nloc; ++ii) { - int i_idx = ilist[ii]; - for (unsigned jj = jrange[ii]; jj < jrange[ii+1]; ++jj) { - int j_idx = jlist[jj]; + int i_idx = inlist.ilist[ii]; + for(unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj){ + int j_idx = inlist.firstneigh[ii][jj]; d_nlist_a[i_idx].push_back (j_idx); } } @@ -96,9 +95,7 @@ void prod_env_mat_r_cpu( int * nlist, const FPTYPE * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const FPTYPE * avg, const FPTYPE * std, @@ -128,13 +125,14 @@ void prod_env_mat_r_cpu( // build nlist std::vector > d_nlist_a(nloc); + assert(nloc == inlist.inum); for (unsigned ii = 0; ii < nloc; ++ii) { - d_nlist_a.reserve (jrange[nloc] / nloc + 10); + d_nlist_a[ii].reserve(max_nbor_size); } for (unsigned ii = 0; ii < nloc; ++ii) { - int i_idx = ilist[ii]; - for (unsigned jj = jrange[ii]; jj < jrange[ii+1]; ++jj) { - int j_idx = jlist[jj]; + int i_idx = inlist.ilist[ii]; + for(unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj){ + int j_idx = inlist.firstneigh[ii][jj]; d_nlist_a[i_idx].push_back (j_idx); } } @@ -180,9 +178,7 @@ void prod_env_mat_a_cpu( int * nlist, const double * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const double * avg, const double * std, @@ -200,9 +196,7 @@ void prod_env_mat_a_cpu( int * nlist, const float * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const float * avg, const float * std, @@ -220,9 +214,7 @@ void prod_env_mat_r_cpu( int * nlist, const double * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const double * avg, const double * std, @@ -240,9 +232,7 @@ void prod_env_mat_r_cpu( int * nlist, const float * coord, const int * type, - const int * ilist, - const int * jrange, - const int * jlist, + const InputNlist & inlist, const int max_nbor_size, const float * avg, const float * std, diff --git a/source/lib/tests/test_env_mat_a.cc b/source/lib/tests/test_env_mat_a.cc index 85d66ec5dd..1e630ecd7d 100644 --- a/source/lib/tests/test_env_mat_a.cc +++ b/source/lib/tests/test_env_mat_a.cc @@ -397,15 +397,11 @@ TEST_F(TestEnvMatA, prod_cpu) max_nbor_size = nlist_a_cpy[ii].size(); } } - std::vector ilist(nloc), jlist(tot_nnei), jrange(nloc+1, 0); - for (int ii = 0; ii < nloc; ++ii){ - ilist[ii] = ii; - jrange[ii+1] = jrange[ii] + nlist_a_cpy[ii].size(); - int jj, cc; - for (jj = jrange[ii], cc = 0; jj < jrange[ii+1]; ++jj, ++cc){ - jlist[jj] = nlist_a_cpy[ii][cc]; - } - } + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_a_cpy); + std::vector em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3); std::vector nlist(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); @@ -417,9 +413,7 @@ TEST_F(TestEnvMatA, prod_cpu) &nlist[0], &posi_cpy[0], &atype_cpy[0], - &ilist[0], - &jrange[0], - &jlist[0], + inlist, max_nbor_size, &avg[0], &std[0], @@ -452,15 +446,10 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu) max_nbor_size = nlist_a_cpy[ii].size(); } } - std::vector ilist(nloc), jlist(tot_nnei), jrange(nloc+1, 0); - for (int ii = 0; ii < nloc; ++ii){ - ilist[ii] = ii; - jrange[ii+1] = jrange[ii] + nlist_a_cpy[ii].size(); - int jj, cc; - for (jj = jrange[ii], cc = 0; jj < jrange[ii+1]; ++jj, ++cc){ - jlist[jj] = nlist_a_cpy[ii][cc]; - } - } + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_a_cpy); std::vector em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3); std::vector nlist(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); @@ -472,9 +461,7 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu) &nlist[0], &posi_cpy[0], &atype_cpy[0], - &ilist[0], - &jrange[0], - &jlist[0], + inlist, max_nbor_size, &avg[0], &std[0], diff --git a/source/lib/tests/test_env_mat_r.cc b/source/lib/tests/test_env_mat_r.cc index d70e5ef0fe..153c10d582 100644 --- a/source/lib/tests/test_env_mat_r.cc +++ b/source/lib/tests/test_env_mat_r.cc @@ -249,15 +249,11 @@ TEST_F(TestEnvMatR, prod_cpu) max_nbor_size = nlist_a_cpy[ii].size(); } } - std::vector ilist(nloc), jlist(tot_nnei), jrange(nloc+1, 0); - for (int ii = 0; ii < nloc; ++ii){ - ilist[ii] = ii; - jrange[ii+1] = jrange[ii] + nlist_a_cpy[ii].size(); - int jj, cc; - for (jj = jrange[ii], cc = 0; jj < jrange[ii+1]; ++jj, ++cc){ - jlist[jj] = nlist_a_cpy[ii][cc]; - } - } + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_a_cpy); + std::vector em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3); std::vector nlist(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); @@ -269,9 +265,7 @@ TEST_F(TestEnvMatR, prod_cpu) &nlist[0], &posi_cpy[0], &atype_cpy[0], - &ilist[0], - &jrange[0], - &jlist[0], + inlist, max_nbor_size, &avg[0], &std[0], @@ -304,15 +298,10 @@ TEST_F(TestEnvMatR, prod_cpu_equal_cpu) max_nbor_size = nlist_a_cpy[ii].size(); } } - std::vector ilist(nloc), jlist(tot_nnei), jrange(nloc+1, 0); - for (int ii = 0; ii < nloc; ++ii){ - ilist[ii] = ii; - jrange[ii+1] = jrange[ii] + nlist_a_cpy[ii].size(); - int jj, cc; - for (jj = jrange[ii], cc = 0; jj < jrange[ii+1]; ++jj, ++cc){ - jlist[jj] = nlist_a_cpy[ii][cc]; - } - } + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_a_cpy); std::vector em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3); std::vector nlist(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); @@ -324,9 +313,7 @@ TEST_F(TestEnvMatR, prod_cpu_equal_cpu) &nlist[0], &posi_cpy[0], &atype_cpy[0], - &ilist[0], - &jrange[0], - &jlist[0], + inlist, max_nbor_size, &avg[0], &std[0], diff --git a/source/lib/tests/test_pair_tab.cc b/source/lib/tests/test_pair_tab.cc index 867956e7ae..0c7cf5ea6f 100644 --- a/source/lib/tests/test_pair_tab.cc +++ b/source/lib/tests/test_pair_tab.cc @@ -160,34 +160,34 @@ TEST_F(TestPairTab, cpu) } -int make_inter_nlist( - std::vector &ilist, - std::vector &jrange, - std::vector &jlist, - const int & nloc, - const std::vector> & nlist_cpy) -{ - ilist.resize(nloc); - jrange.resize(nloc+1); - int tot_nnei = 0; - int max_nbor_size = 0; - for(int ii = 0; ii < nlist_cpy.size(); ++ii){ - tot_nnei += nlist_cpy[ii].size(); - if (nlist_cpy[ii].size() > max_nbor_size){ - max_nbor_size = nlist_cpy[ii].size(); - } - } - jlist.resize(tot_nnei); - for (int ii = 0; ii < nloc; ++ii){ - ilist[ii] = ii; - jrange[ii+1] = jrange[ii] + nlist_cpy[ii].size(); - int jj, cc; - for (jj = jrange[ii], cc = 0; jj < jrange[ii+1]; ++jj, ++cc){ - jlist[jj] = nlist_cpy[ii][cc]; - } - } - return max_nbor_size; -} +// int make_inter_nlist( +// std::vector &ilist, +// std::vector &jrange, +// std::vector &jlist, +// const int & nloc, +// const std::vector> & nlist_cpy) +// { +// ilist.resize(nloc); +// jrange.resize(nloc+1); +// int tot_nnei = 0; +// int max_nbor_size = 0; +// for(int ii = 0; ii < nlist_cpy.size(); ++ii){ +// tot_nnei += nlist_cpy[ii].size(); +// if (nlist_cpy[ii].size() > max_nbor_size){ +// max_nbor_size = nlist_cpy[ii].size(); +// } +// } +// jlist.resize(tot_nnei); +// for (int ii = 0; ii < nloc; ++ii){ +// ilist[ii] = ii; +// jrange[ii+1] = jrange[ii] + nlist_cpy[ii].size(); +// int jj, cc; +// for (jj = jrange[ii], cc = 0; jj < jrange[ii+1]; ++jj, ++cc){ +// jlist[jj] = nlist_cpy[ii][cc]; +// } +// } +// return max_nbor_size; +// } TEST_F(TestPairTab, cpu_f_num_deriv) @@ -240,18 +240,22 @@ TEST_F(TestPairTab, cpu_f_num_deriv) std::vector> nlist_cpy_0, nlist_cpy_1, t_nlist; build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell); build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell); - std::vector ilist_0, jlist_0, jrange_0; - std::vector ilist_1, jlist_1, jrange_1; - int max_nnei_0 = make_inter_nlist(ilist_0, jrange_0, jlist_0, nloc, nlist_cpy_0); - int max_nnei_1 = make_inter_nlist(ilist_1, jrange_1, jlist_1, nloc, nlist_cpy_1); + std::vector ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);; + std::vector firstneigh_0(nloc), firstneigh_1(nloc); + InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]); + InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]); + convert_nlist(inlist_0, nlist_cpy_0); + convert_nlist(inlist_1, nlist_cpy_1); + int max_nnei_0 = max_numneigh(inlist_0); + int max_nnei_1 = max_numneigh(inlist_1); EXPECT_EQ(max_nnei_0, max_nnei_1); std::vector t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3); std::vector rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3); std::vector nlist_0(nloc * nnei), nlist_1(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); std::vector std(ntypes * ndescrpt, 1); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], &ilist_0[0], &jrange_0[0], &jlist_0[0], max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], &ilist_1[0], &jrange_1[0], &jlist_1[0], max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); std::vector energy_0(nloc), energy_1(nloc); std::vector t_force(nall * 3), t_virial(nall * 9); pair_tab_cpu( @@ -344,18 +348,22 @@ TEST_F(TestPairTab, cpu_f_num_deriv_scale) std::vector> nlist_cpy_0, nlist_cpy_1, t_nlist; build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell); build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell); - std::vector ilist_0, jlist_0, jrange_0; - std::vector ilist_1, jlist_1, jrange_1; - int max_nnei_0 = make_inter_nlist(ilist_0, jrange_0, jlist_0, nloc, nlist_cpy_0); - int max_nnei_1 = make_inter_nlist(ilist_1, jrange_1, jlist_1, nloc, nlist_cpy_1); + std::vector ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);; + std::vector firstneigh_0(nloc), firstneigh_1(nloc); + InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]); + InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]); + convert_nlist(inlist_0, nlist_cpy_0); + convert_nlist(inlist_1, nlist_cpy_1); + int max_nnei_0 = max_numneigh(inlist_0); + int max_nnei_1 = max_numneigh(inlist_1); EXPECT_EQ(max_nnei_0, max_nnei_1); std::vector t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3); std::vector rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3); std::vector nlist_0(nloc * nnei), nlist_1(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); std::vector std(ntypes * ndescrpt, 1); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], &ilist_0[0], &jrange_0[0], &jlist_0[0], max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], &ilist_1[0], &jrange_1[0], &jlist_1[0], max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); std::vector energy_0(nloc), energy_1(nloc); std::vector t_force(nall * 3), t_virial(nall * 9); pair_tab_cpu( @@ -461,18 +469,22 @@ TEST_F(TestPairTab, cpu_v_num_deriv) std::vector> nlist_cpy_0, nlist_cpy_1, t_nlist; build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_0, ncell); build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_1, ncell); - std::vector ilist_0, jlist_0, jrange_0; - std::vector ilist_1, jlist_1, jrange_1; - int max_nnei_0 = make_inter_nlist(ilist_0, jrange_0, jlist_0, nloc, nlist_cpy_0); - int max_nnei_1 = make_inter_nlist(ilist_1, jrange_1, jlist_1, nloc, nlist_cpy_1); + std::vector ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);; + std::vector firstneigh_0(nloc), firstneigh_1(nloc); + InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]); + InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]); + convert_nlist(inlist_0, nlist_cpy_0); + convert_nlist(inlist_1, nlist_cpy_1); + int max_nnei_0 = max_numneigh(inlist_0); + int max_nnei_1 = max_numneigh(inlist_1); EXPECT_EQ(max_nnei_0, max_nnei_1); std::vector t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3); std::vector rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3); std::vector nlist_0(nloc * nnei), nlist_1(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); std::vector std(ntypes * ndescrpt, 1); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], &ilist_0[0], &jrange_0[0], &jlist_0[0], max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], &ilist_1[0], &jrange_1[0], &jlist_1[0], max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); std::vector energy_0(nloc), energy_1(nloc); std::vector t_force(nall * 3), t_virial(nall * 9); pair_tab_cpu( @@ -589,18 +601,22 @@ TEST_F(TestPairTab, cpu_v_num_deriv_scale) std::vector> nlist_cpy_0, nlist_cpy_1, t_nlist; build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_0, ncell); build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_1, ncell); - std::vector ilist_0, jlist_0, jrange_0; - std::vector ilist_1, jlist_1, jrange_1; - int max_nnei_0 = make_inter_nlist(ilist_0, jrange_0, jlist_0, nloc, nlist_cpy_0); - int max_nnei_1 = make_inter_nlist(ilist_1, jrange_1, jlist_1, nloc, nlist_cpy_1); + std::vector ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);; + std::vector firstneigh_0(nloc), firstneigh_1(nloc); + InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]); + InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]); + convert_nlist(inlist_0, nlist_cpy_0); + convert_nlist(inlist_1, nlist_cpy_1); + int max_nnei_0 = max_numneigh(inlist_0); + int max_nnei_1 = max_numneigh(inlist_1); EXPECT_EQ(max_nnei_0, max_nnei_1); std::vector t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3); std::vector rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3); std::vector nlist_0(nloc * nnei), nlist_1(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); std::vector std(ntypes * ndescrpt, 1); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], &ilist_0[0], &jrange_0[0], &jlist_0[0], max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], &ilist_1[0], &jrange_1[0], &jlist_1[0], max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); std::vector energy_0(nloc), energy_1(nloc); std::vector t_force(nall * 3), t_virial(nall * 9); pair_tab_cpu( @@ -717,18 +733,22 @@ TEST_F(TestPairTabTriBox, cpu_v_num_deriv) std::vector> nlist_cpy_0, nlist_cpy_1, t_nlist; build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_0, ncell); build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_1, ncell); - std::vector ilist_0, jlist_0, jrange_0; - std::vector ilist_1, jlist_1, jrange_1; - int max_nnei_0 = make_inter_nlist(ilist_0, jrange_0, jlist_0, nloc, nlist_cpy_0); - int max_nnei_1 = make_inter_nlist(ilist_1, jrange_1, jlist_1, nloc, nlist_cpy_1); + std::vector ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);; + std::vector firstneigh_0(nloc), firstneigh_1(nloc); + InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]); + InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]); + convert_nlist(inlist_0, nlist_cpy_0); + convert_nlist(inlist_1, nlist_cpy_1); + int max_nnei_0 = max_numneigh(inlist_0); + int max_nnei_1 = max_numneigh(inlist_1); EXPECT_EQ(max_nnei_0, max_nnei_1); std::vector t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3); std::vector rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3); std::vector nlist_0(nloc * nnei), nlist_1(nloc * nnei); std::vector avg(ntypes * ndescrpt, 0); std::vector std(ntypes * ndescrpt, 1); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], &ilist_0[0], &jrange_0[0], &jlist_0[0], max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); - prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], &ilist_1[0], &jrange_1[0], &jlist_1[0], max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); + prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a); std::vector energy_0(nloc), energy_1(nloc); std::vector t_force(nall * 3), t_virial(nall * 9); pair_tab_cpu( diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt index 67ffc7f844..ced3cd0bf5 100644 --- a/source/op/CMakeLists.txt +++ b/source/op/CMakeLists.txt @@ -3,7 +3,7 @@ set(OP_LIB ${PROJECT_SOURCE_DIR}/lib/src/SimulationRegion.cpp ${PROJECT_SOURCE_DIR}/lib/src/neighbor_list.cc) set (OP_CXX_FLAG -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI} ) -file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc descrpt_se_r.cc pair_tab.cc prod_force_se_a.cc prod_virial_se_a.cc prod_force_se_r.cc prod_virial_se_r.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc) +file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc descrpt_se_r.cc pair_tab.cc prod_force_se_a.cc prod_virial_se_a.cc prod_force_se_r.cc prod_virial_se_r.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc prod_env_mat_multi_device.cc) file(GLOB OP_CUDA_SRC prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc) file(GLOB OP_GRADS_SRC prod_force_grad.cc prod_force_se_a_grad.cc prod_force_se_r_grad.cc prod_virial_grad.cc prod_virial_se_a_grad.cc prod_virial_se_r_grad.cc soft_min_force_grad.cc soft_min_virial_grad.cc ) file(GLOB OP_PY *.py) diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc index a50461a274..42310ed30b 100644 --- a/source/op/prod_env_mat_multi_device.cc +++ b/source/op/prod_env_mat_multi_device.cc @@ -1,8 +1,11 @@ #include "custom_op.h" #include "utilities.h" +#include "coord.h" +#include "region.h" +#include "neighbor_list.h" #include "prod_env_mat.h" -REGISTER_OP("DescrptSeA") +REGISTER_OP("ProdEnvMatA") .Attr("T: {float, double}") .Input("coord: T") //atomic coordinates .Input("type: int32") //atomic type @@ -22,7 +25,7 @@ REGISTER_OP("DescrptSeA") .Output("nlist: int32"); // only sel_a and rcut_r uesd. -REGISTER_OP("DescrptSeR") +REGISTER_OP("ProdEnvMatR") .Attr("T: {float, double}") .Input("coord: T") .Input("type: int32") @@ -39,10 +42,49 @@ REGISTER_OP("DescrptSeR") .Output("rij: T") .Output("nlist: int32"); + +template +static int +_norm_copy_coord_cpu( + std::vector & coord_cpy, + std::vector & type_cpy, + std::vector & mapping, + int & nall, + int & mem_cpy, + const FPTYPE * coord, + const FPTYPE * box, + const int * type, + const int &nloc, + const int &max_cpy_trial, + const float & rcut_r); + +template +static int +_build_nlist_cpu( + std::vector &ilist, + std::vector &numneigh, + std::vector &firstneigh, + std::vector> &jlist, + int & max_nnei, + int & mem_nnei, + const FPTYPE *coord, + const int & nloc, + const int & new_nall, + const int & max_nnei_trial, + const float & rcut_r); + +static void +_map_nlist_cpu( + int * nlist, + const int * idx_mapping, + const int & nloc, + const int & nnei); + + template -class DescrptSeAOp : public OpKernel { +class ProdEnvMatAOp : public OpKernel { public: - explicit DescrptSeAOp(OpKernelConstruction* context) : OpKernel(context) { + explicit ProdEnvMatAOp(OpKernelConstruction* context) : OpKernel(context) { float nloc_f, nall_f; OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a)); OP_REQUIRES_OK(context, context->GetAttr("rcut_r", &rcut_r)); @@ -60,6 +102,10 @@ class DescrptSeAOp : public OpKernel { nnei_r = sec_r.back(); nnei = nnei_a + nnei_r; max_nbor_size = 1024; + max_cpy_trial = 100; + mem_cpy = 256; + max_nnei_trial = 100; + mem_nnei = 256; } void Compute(OpKernelContext* context) override { @@ -105,6 +151,28 @@ class DescrptSeAOp : public OpKernel { OP_REQUIRES (context, (ntypes == int(sel_a.size())), errors::InvalidArgument ("number of types should match the length of sel array")); OP_REQUIRES (context, (ntypes == int(sel_r.size())), errors::InvalidArgument ("number of types should match the length of sel array")); + + int nei_mode = 0; + bool b_nlist_map = false; + if (mesh_tensor.shape().dim_size(0) == 16) { + // lammps neighbor list + nei_mode = 3; + } + else if (mesh_tensor.shape().dim_size(0) == 6) { + // manual copied pbc + assert (nloc == nall); + nei_mode = 1; + b_nlist_map = true; + } + else if (mesh_tensor.shape().dim_size(0) == 0) { + // no pbc + assert (nloc == nall); + nei_mode = -1; + } + else { + throw std::runtime_error("invalid mesh tensor"); + } + // Create output tensors TensorShape descrpt_shape ; descrpt_shape.AddDim (nsamples); @@ -141,16 +209,29 @@ class DescrptSeAOp : public OpKernel { nlist_shape, &nlist_tensor)); - FPTYPE * em = descrpt_tensor->flat().data(); - FPTYPE * em_deriv = descrpt_deriv_tensor->flat().data(); - FPTYPE * rij = rij_tensor->flat().data(); - int * nlist = nlist_tensor->flat().data(); - const FPTYPE * coord = coord_tensor.flat().data(); + FPTYPE * p_em = descrpt_tensor->flat().data(); + FPTYPE * p_em_deriv = descrpt_deriv_tensor->flat().data(); + FPTYPE * p_rij = rij_tensor->flat().data(); + int * p_nlist = nlist_tensor->flat().data(); + const FPTYPE * p_coord = coord_tensor.flat().data(); + const FPTYPE * p_box = box_tensor.flat().data(); const FPTYPE * avg = avg_tensor.flat().data(); const FPTYPE * std = std_tensor.flat().data(); - const int * type = type_tensor.flat().data(); + const int * p_type = type_tensor.flat().data(); + + // loop over samples + for(int ff = 0; ff < nsamples; ++ff){ + FPTYPE * em = p_em + ff*nloc*ndescrpt; + FPTYPE * em_deriv = p_em_deriv + ff*nloc*ndescrpt*3; + FPTYPE * rij = p_rij + ff*nloc*nnei*3; + int * nlist = p_nlist + ff*nloc*nnei; + const FPTYPE * coord = p_coord + ff*nall*3; + const FPTYPE * box = p_box + ff*9; + const int * type = p_type + ff*nall; + std::vector idx_mapping; if(device == "GPU") { + #if GOOGLE_CUDA // allocate temp memory, temp memory must not be used after this operation! Tensor int_temp; TensorShape int_shape; @@ -166,7 +247,6 @@ class DescrptSeAOp : public OpKernel { init, ilist, jrange, jlist, ilist_size, jrange_size, jlist_size, max_nbor_size, mesh_tensor.flat().data(), static_cast(mesh_tensor.NumElements())); OP_REQUIRES (context, (max_nbor_size <= GPU_MAX_NBOR_SIZE), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_nbor_size) + " is larger than " + std::to_string(GPU_MAX_NBOR_SIZE) + ", which currently is not supported by deepmd-kit.")); - #if GOOGLE_CUDA // launch the gpu(nv) compute function prod_env_mat_a_gpu_cuda( em, em_deriv, rij, nlist, @@ -174,13 +254,49 @@ class DescrptSeAOp : public OpKernel { #endif //GOOGLE_CUDA } else if (device == "CPU") { - memcpy (&ilist, 4 + mesh_tensor.flat().data(), sizeof(int *)); - memcpy (&jrange, 8 + mesh_tensor.flat().data(), sizeof(int *)); - memcpy (&jlist, 12 + mesh_tensor.flat().data(), sizeof(int *)); + int new_nall = nall; + InputNlist inlist; + inlist.inum = nloc; + // some buffers, be free after prod_env_mat_a_cpu + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + std::vector> jlist(nloc); + std::vector coord_cpy; + std::vector type_cpy; + if(nei_mode != 3){ + // build nlist by myself + // normalize and copy coord + if(nei_mode == 1){ + int copy_ok = _norm_copy_coord_cpu( + coord_cpy, type_cpy, idx_mapping, new_nall, mem_cpy, + coord, box, type, nloc, max_cpy_trial, rcut_r); + OP_REQUIRES (context, copy_ok, errors::Aborted("cannot allocate mem for copied coords")); + coord = &coord_cpy[0]; + type = &type_cpy[0]; + } + // build nlist + int build_ok = _build_nlist_cpu( + ilist, numneigh, firstneigh, jlist, max_nbor_size, mem_nnei, + coord, nloc, new_nall, max_nnei_trial, rcut_r); + OP_REQUIRES (context, build_ok, errors::Aborted("cannot allocate mem for nlist")); + inlist.ilist = &ilist[0]; + inlist.numneigh = &numneigh[0]; + inlist.firstneigh = &firstneigh[0]; + } + else{ + // copy pointers to nlist data + memcpy(&inlist.ilist, 4 + mesh_tensor.flat().data(), sizeof(int *)); + memcpy(&inlist.numneigh, 8 + mesh_tensor.flat().data(), sizeof(int *)); + memcpy(&inlist.firstneigh, 12 + mesh_tensor.flat().data(), sizeof(int **)); + max_nbor_size = max_numneigh(inlist); + } // launch the cpu compute function prod_env_mat_a_cpu( - em, em_deriv, rij, nlist, - coord, type, ilist, jrange, jlist, max_nbor_size, avg, std, nloc, nall, rcut_r, rcut_r_smth, sec_a); + em, em_deriv, rij, nlist, + coord, type, inlist, max_nbor_size, avg, std, nloc, new_nall, rcut_r, rcut_r_smth, sec_a); + // do nlist mapping if coords were copied + if(b_nlist_map) _map_nlist_cpu(nlist, &idx_mapping[0], nloc, nnei); + } } } @@ -195,6 +311,8 @@ class DescrptSeAOp : public OpKernel { std::vector sec_r; int ndescrpt, ndescrpt_a, ndescrpt_r; int nnei, nnei_a, nnei_r, nloc, nall, max_nbor_size; + int mem_cpy, max_cpy_trial; + int mem_nnei, max_nnei_trial; std::string device; int * array_int = NULL; unsigned long long * array_longlong = NULL; @@ -204,9 +322,9 @@ class DescrptSeAOp : public OpKernel { }; template -class DescrptSeROp : public OpKernel { +class ProdEnvMatROp : public OpKernel { public: - explicit DescrptSeROp(OpKernelConstruction* context) : OpKernel(context) { + explicit ProdEnvMatROp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("rcut", &rcut)); OP_REQUIRES_OK(context, context->GetAttr("rcut_smth", &rcut_smth)); OP_REQUIRES_OK(context, context->GetAttr("sel", &sel)); @@ -304,6 +422,7 @@ class DescrptSeROp : public OpKernel { const int * type = type_tensor.flat().data(); if(device == "GPU") { + #if GOOGLE_CUDA // allocate temp memory, temp memory must not be used after this operation! Tensor int_temp; TensorShape int_shape; @@ -319,7 +438,6 @@ class DescrptSeROp : public OpKernel { init, ilist, jrange, jlist, ilist_size, jrange_size, jlist_size, max_nbor_size, mesh_tensor.flat().data(), static_cast(mesh_tensor.NumElements())); OP_REQUIRES (context, (max_nbor_size <= GPU_MAX_NBOR_SIZE), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_nbor_size) + " is larger than " + std::to_string(GPU_MAX_NBOR_SIZE) + ", which currently is not supported by deepmd-kit.")); - #if GOOGLE_CUDA // launch the gpu(nv) compute function prod_env_mat_r_gpu_cuda( em, em_deriv, rij, nlist, @@ -331,9 +449,9 @@ class DescrptSeROp : public OpKernel { memcpy (&jrange, 8 + mesh_tensor.flat().data(), sizeof(int *)); memcpy (&jlist, 12 + mesh_tensor.flat().data(), sizeof(int *)); // launch the cpu compute function - prod_env_mat_r_cpu( - em, em_deriv, rij, nlist, - coord, type, ilist, jrange, jlist, max_nbor_size, avg, std, nloc, nall, rcut, rcut_smth, sec); + // prod_env_mat_r_cpu( + // em, em_deriv, rij, nlist, + // coord, type, ilist, jrange, jlist, max_nbor_size, avg, std, nloc, nall, rcut, rcut_smth, sec); } } @@ -355,14 +473,106 @@ class DescrptSeROp : public OpKernel { int ilist_size = 0, jrange_size = 0, jlist_size = 0; }; +template +static int +_norm_copy_coord_cpu( + std::vector & coord_cpy, + std::vector & type_cpy, + std::vector & idx_mapping, + int & nall, + int & mem_cpy, + const FPTYPE * coord, + const FPTYPE * box, + const int * type, + const int &nloc, + const int &max_cpy_trial, + const float & rcut_r) +{ + std::vector tmp_coord(nall*3); + std::copy(coord, coord+nall*3, tmp_coord.begin()); + Region region; + init_region_cpu(region, box); + normalize_coord_cpu(&tmp_coord[0], nall, region); + int tt; + for(tt = 0; tt < max_cpy_trial; ++tt){ + coord_cpy.resize(mem_cpy*3); + type_cpy.resize(mem_cpy); + idx_mapping.resize(mem_cpy); + int ret = copy_coord_cpu( + &coord_cpy[0], &type_cpy[0], &idx_mapping[0], &nall, + &tmp_coord[0], type, nloc, mem_cpy, rcut_r, region); + if(ret == 0){ + break; + } + else{ + mem_cpy *= 2; + } + } + return (tt != max_cpy_trial); +} + +template +static int +_build_nlist_cpu( + std::vector &ilist, + std::vector &numneigh, + std::vector &firstneigh, + std::vector> &jlist, + int & max_nnei, + int & mem_nnei, + const FPTYPE *coord, + const int & nloc, + const int & new_nall, + const int & max_nnei_trial, + const float & rcut_r) +{ + int tt; + for(tt = 0; tt < max_nnei_trial; ++tt){ + for(int ii = 0; ii < nloc; ++ii){ + jlist[ii].resize(mem_nnei); + firstneigh[ii] = &jlist[ii][0]; + } + InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + int ret = build_nlist_cpu( + inlist, &max_nnei, + coord, nloc, new_nall, mem_nnei, rcut_r); + if(ret == 0){ + break; + } + else{ + mem_nnei *= 2; + } + } + return (tt != max_nnei_trial); +} + +static void +_map_nlist_cpu( + int * nlist, + const int * idx_mapping, + const int & nloc, + const int & nnei) +{ + for (int ii = 0; ii < nloc; ++ii){ + for (int jj = 0; jj < nnei; ++jj){ + int record = nlist[ii*nnei+jj]; + if (record >= 0) { + nlist[ii*nnei+jj] = idx_mapping[record]; + } + } + } +} + + + // Register the CPU kernels. #define REGISTER_CPU(T) \ REGISTER_KERNEL_BUILDER( \ - Name("DescrptSeA").Device(DEVICE_CPU).TypeConstraint("T"), \ - DescrptSeAOp); \ + Name("ProdEnvMatA").Device(DEVICE_CPU).TypeConstraint("T"), \ + ProdEnvMatAOp); \ REGISTER_KERNEL_BUILDER( \ - Name("DescrptSeR").Device(DEVICE_CPU).TypeConstraint("T"), \ - DescrptSeROp); + Name("ProdEnvMatR").Device(DEVICE_CPU).TypeConstraint("T"), \ + ProdEnvMatROp); REGISTER_CPU(float); REGISTER_CPU(double); @@ -370,11 +580,11 @@ REGISTER_CPU(double); #if GOOGLE_CUDA #define REGISTER_GPU(T) \ REGISTER_KERNEL_BUILDER( \ - Name("DescrptSeA").Device(DEVICE_GPU).TypeConstraint("T").HostMemory("natoms"), \ - DescrptSeAOp); \ + Name("ProdEnvMatA").Device(DEVICE_GPU).TypeConstraint("T").HostMemory("natoms"), \ + ProdEnvMatAOp); \ REGISTER_KERNEL_BUILDER( \ - Name("DescrptSeR").Device(DEVICE_GPU).TypeConstraint("T").HostMemory("natoms"), \ - DescrptSeROp); + Name("ProdEnvMatR").Device(DEVICE_GPU).TypeConstraint("T").HostMemory("natoms"), \ + ProdEnvMatROp); REGISTER_GPU(float); REGISTER_GPU(double); #endif // GOOGLE_CUDA diff --git a/source/tests/test_data_modifier.py b/source/tests/test_data_modifier.py index d6574e39c8..70999f5734 100644 --- a/source/tests/test_data_modifier.py +++ b/source/tests/test_data_modifier.py @@ -43,7 +43,7 @@ def _setUp(self): restart=None, init_model=None, log_path=None, - log_level=0, + log_level=30, mpi_log="master", try_distrib=False ) diff --git a/source/tests/test_data_modifier_shuffle.py b/source/tests/test_data_modifier_shuffle.py index 9057ba5836..39269c2ff5 100644 --- a/source/tests/test_data_modifier_shuffle.py +++ b/source/tests/test_data_modifier_shuffle.py @@ -48,7 +48,7 @@ def _setUp(self): restart=None, init_model=None, log_path=None, - log_level=0, + log_level=30, mpi_log="master", try_distrib=False ) diff --git a/source/tests/test_descrpt_smooth.py b/source/tests/test_descrpt_smooth.py index c9c796799a..6a72af7db5 100644 --- a/source/tests/test_descrpt_smooth.py +++ b/source/tests/test_descrpt_smooth.py @@ -78,7 +78,7 @@ def comp_ef (self, name, reuse = None) : descrpt, descrpt_deriv, rij, nlist \ - = op_module.descrpt_se_a (dcoord, + = op_module.prod_env_mat_a (dcoord, dtype, tnatoms, dbox, diff --git a/source/tests/test_prod_env_mat.py b/source/tests/test_prod_env_mat.py new file mode 100644 index 0000000000..347eb913ea --- /dev/null +++ b/source/tests/test_prod_env_mat.py @@ -0,0 +1,214 @@ +import os,sys +import numpy as np +import unittest + +import deepmd.op +from deepmd.env import tf +from deepmd.env import op_module +from deepmd.run_options import GLOBAL_TF_FLOAT_PRECISION +from deepmd.run_options import GLOBAL_NP_FLOAT_PRECISION +from deepmd.run_options import GLOBAL_ENER_FLOAT_PRECISION + +class TestProdEnvMat(unittest.TestCase): + def setUp(self): + self.sess = tf.Session() + self.nframes = 2 + self.dcoord = [ + 12.83, 2.56, 2.18, + 12.09, 2.87, 2.74, + 00.25, 3.32, 1.68, + 3.36, 3.00, 1.81, + 3.51, 2.51, 2.60, + 4.27, 3.22, 1.56] + self.dtype = [0, 1, 1, 0, 1, 1] + self.dbox = [13., 0., 0., 0., 13., 0., 0., 0., 13.] + self.dcoord = np.reshape(self.dcoord, [1, -1]) + self.dtype = np.reshape(self.dtype, [1, -1]) + self.dbox = np.reshape(self.dbox, [1, -1]) + self.dcoord = np.tile(self.dcoord, [self.nframes, 1]) + self.dtype = np.tile(self.dtype, [self.nframes, 1]) + self.dbox = np.tile(self.dbox, [self.nframes, 1]) + self.pbc_expected_output = [ + 0.12206, 0.12047, 0.01502, -0.01263, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.02167, -0.77271, 0.32370, 0.58475, 0.99745, 0.41810, 0.75655, -0.49773, 0.10564, 0.10495, -0.00143, 0.01198, 0.03103, 0.03041, 0.00452, -0.00425, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, + 1.02167, 0.77271, -0.32370, -0.58475, 0.04135, 0.04039, 0.00123, -0.00880, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, 0.42028, 0.16304, -0.38405, 0.03694, 0.03680, -0.00300, -0.00117, 0.00336, 0.00327, 0.00022, -0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, + 0.99745, -0.41810, -0.75655, 0.49773, 0.19078, 0.18961, -0.01951, 0.00793, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, -0.42028, -0.16304, 0.38405, 0.13499, 0.12636, -0.03140, 0.03566, 0.07054, 0.07049, -0.00175, -0.00210, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, + 0.12206, -0.12047, -0.01502, 0.01263, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.06176, 0.16913, -0.55250, 0.89077, 1.03163, 0.96880, 0.23422, -0.26615, 0.19078, -0.18961, 0.01951, -0.00793, 0.04135, -0.04039, -0.00123, 0.00880, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, + 1.06176, -0.16913, 0.55250, -0.89077, 0.10564, -0.10495, 0.00143, -0.01198, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, 0.34516, 0.32245, -0.47232, 0.13499, -0.12636, 0.03140, -0.03566, 0.03694, -0.03680, 0.00300, 0.00117, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, + 1.03163, -0.96880, -0.23422, 0.26615, 0.03103, -0.03041, -0.00452, 0.00425, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, -0.34516, -0.32245, 0.47232, 0.07054, -0.07049, 0.00175, 0.00210, 0.00336, -0.00327, -0.00022, 0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000] + self.nopbc_expected_output = [ + 0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.02167,-0.77271,0.32370,0.58475,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000, + 1.02167,0.77271,-0.32370,-0.58475,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000, + 0.19078,0.18961,-0.01951,0.00793,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.13499,0.12636,-0.03140,0.03566,0.07054,0.07049,-0.00175,-0.00210,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000, + 0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.06176,0.16913,-0.55250,0.89077,1.03163,0.96880,0.23422,-0.26615,0.19078,-0.18961,0.01951,-0.00793,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000, + 1.06176,-0.16913,0.55250,-0.89077,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.66798,0.34516,0.32245,-0.47232,0.13499,-0.12636,0.03140,-0.03566,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000, +1.03163,-0.96880,-0.23422,0.26615,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.66798,-0.34516,-0.32245,0.47232,0.07054,-0.07049,0.00175,0.00210,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000] + self.sel = [10, 10] + self.sec = np.array([0, 0, 0], dtype = int) + self.sec[1:3] = np.cumsum(self.sel) + self.rcut = 6. + self.rcut_smth = 0.8 + self.dnatoms = [6, 6, 2, 4] + self.tcoord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * 3], name='t_coord') + self.tbox = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box') + self.ttype = tf.placeholder(tf.int32, [None, self.dnatoms[0]], name = "t_type") + self.tnatoms = tf.placeholder(tf.int32, [None], name = "t_natoms") + self.nloc = self.dnatoms[0] + self.nall = self.dnatoms[1] + self.nnei = self.sec[-1] + self.ndescrpt = 4 * self.nnei + self.ntypes = np.max(self.dtype) + 1 + davg = np.zeros ([self.ntypes, self.ndescrpt]) + dstd = np.ones ([self.ntypes, self.ndescrpt]) + self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION)) + self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION)) + + def test_pbc_self_built_nlist(self): + tem, tem_deriv, trij, tnlist \ + = op_module.prod_env_mat_a ( + self.tcoord, + self.ttype, + self.tnatoms, + self.tbox, + tf.constant(np.zeros(6, dtype = np.int32)), + self.t_avg, + self.t_std, + rcut_a = -1, + rcut_r = self.rcut, + rcut_r_smth = self.rcut_smth, + sel_a = self.sel, + sel_r = [0, 0]) + self.sess.run (tf.global_variables_initializer()) + dem, dem_deriv, drij, dnlist = self.sess.run( + [tem, tem_deriv, trij, tnlist], + feed_dict = { + self.tcoord: self.dcoord, + self.ttype: self.dtype, + self.tbox: self.dbox, + self.tnatoms: self.dnatoms} + ) + self.assertEqual(dem.shape, (self.nframes, self.nloc*self.ndescrpt)) + self.assertEqual(dem_deriv.shape, (self.nframes, self.nloc*self.ndescrpt*3)) + self.assertEqual(drij.shape, (self.nframes, self.nloc*self.nnei*3)) + self.assertEqual(dnlist.shape, (self.nframes, self.nloc*self.nnei)) + for ff in range(self.nframes): + for ii in range(self.ndescrpt): + self.assertAlmostEqual(dem[ff][ii], self.pbc_expected_output[ii], places=5) + + def test_pbc_self_built_nlist_deriv(self): + hh = 1e-4 + tem, tem_deriv, trij, tnlist \ + = op_module.prod_env_mat_a ( + self.tcoord, + self.ttype, + self.tnatoms, + self.tbox, + tf.constant(np.zeros(6, dtype = np.int32)), + self.t_avg, + self.t_std, + rcut_a = -1, + rcut_r = self.rcut, + rcut_r_smth = self.rcut_smth, + sel_a = self.sel, + sel_r = [0, 0]) + self.sess.run (tf.global_variables_initializer()) + self.check_deriv_numerical_deriv(hh, tem, tem_deriv, trij, tnlist) + + def test_nopbc_self_built_nlist(self): + tem, tem_deriv, trij, tnlist \ + = op_module.prod_env_mat_a ( + self.tcoord, + self.ttype, + self.tnatoms, + self.tbox, + tf.constant(np.zeros(0, dtype = np.int32)), + self.t_avg, + self.t_std, + rcut_a = -1, + rcut_r = self.rcut, + rcut_r_smth = self.rcut_smth, + sel_a = self.sel, + sel_r = [0, 0]) + self.sess.run (tf.global_variables_initializer()) + dem, dem_deriv, drij, dnlist = self.sess.run( + [tem, tem_deriv, trij, tnlist], + feed_dict = { + self.tcoord: self.dcoord, + self.ttype: self.dtype, + self.tbox: self.dbox, + self.tnatoms: self.dnatoms} + ) + self.assertEqual(dem.shape, (self.nframes, self.nloc*self.ndescrpt)) + self.assertEqual(dem_deriv.shape, (self.nframes, self.nloc*self.ndescrpt*3)) + self.assertEqual(drij.shape, (self.nframes, self.nloc*self.nnei*3)) + self.assertEqual(dnlist.shape, (self.nframes, self.nloc*self.nnei)) + for ff in range(self.nframes): + for ii in range(self.ndescrpt): + self.assertAlmostEqual(dem[ff][ii], self.nopbc_expected_output[ii], places=5) + + + def test_nopbc_self_built_nlist_deriv(self): + hh = 1e-4 + tem, tem_deriv, trij, tnlist \ + = op_module.prod_env_mat_a ( + self.tcoord, + self.ttype, + self.tnatoms, + self.tbox, + tf.constant(np.zeros(0, dtype = np.int32)), + self.t_avg, + self.t_std, + rcut_a = -1, + rcut_r = self.rcut, + rcut_r_smth = self.rcut_smth, + sel_a = self.sel, + sel_r = [0, 0]) + self.sess.run (tf.global_variables_initializer()) + self.check_deriv_numerical_deriv(hh, tem, tem_deriv, trij, tnlist) + + + def check_deriv_numerical_deriv(self, + hh, + tem, tem_deriv, trij, tnlist): + dem_, dem_deriv_, drij_, dnlist_ = self.sess.run( + [tem, tem_deriv, trij, tnlist], + feed_dict = { + self.tcoord: self.dcoord, + self.ttype: self.dtype, + self.tbox: self.dbox, + self.tnatoms: self.dnatoms} + ) + ff = 0 + dem = dem_[ff] + dem_deriv = dem_deriv_[ff] + dnlist = dnlist_[ff] + for ii in range(self.dnatoms[0]): + for jj in range(self.nnei): + j_idx = dnlist[ii*self.nnei+jj] + if j_idx < 0: + continue + for kk in range(4): + for dd in range(3): + dcoord_0 = np.copy(self.dcoord) + dcoord_1 = np.copy(self.dcoord) + dcoord_0[ff][j_idx*3+dd] -= hh + dcoord_1[ff][j_idx*3+dd] += hh + dem_0, dem_deriv_0, drij_0, dnlist_0 = self.sess.run( + [tem, tem_deriv, trij, tnlist], + feed_dict = { + self.tcoord: dcoord_0, + self.ttype: self.dtype, + self.tbox: self.dbox, + self.tnatoms: self.dnatoms} + ) + dem_1, dem_deriv_1, drij_1, dnlist_1 = self.sess.run( + [tem, tem_deriv, trij, tnlist], + feed_dict = { + self.tcoord: dcoord_1, + self.ttype: self.dtype, + self.tbox: self.dbox, + self.tnatoms: self.dnatoms} + ) + num_deriv = (dem_1[0][ii*self.nnei*4+jj*4+kk] - dem_0[0][ii*self.ndescrpt+jj*4+kk]) / (2.*hh) + ana_deriv = -dem_deriv[ii*self.nnei*4*3+jj*4*3+kk*3+dd] + self.assertAlmostEqual(num_deriv, ana_deriv, places = 5) +