diff --git a/doc/conf.py b/doc/conf.py index 824503f8b6..71488ac4e0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -28,7 +28,7 @@ def mkindex(dirname): newfindex = open(dirname + "index.md", "a") for root, dirs, files in os.walk(dirname, topdown=False): - newnames = [name for name in files if name != "index.md" and name not in oldnames] + newnames = [name for name in files if "index.md" not in name and name not in oldnames] for name in newnames: f = open(dirname + name, "r") _lines = f.readlines() @@ -45,6 +45,62 @@ def mkindex(dirname): newfindex.close() +def classify_index_TS(): + dirname = "troubleshooting/" + oldfindex = open(dirname + "index.md", "r") + oldlist = oldfindex.readlines() + oldfindex.close() + + oldnames = [] + sub_titles = [] + heads = [] + while(len(oldlist) > 0): + entry = oldlist.pop(0) + if (entry.find("(") >= 0): + _name = entry[entry.find("(")+1 : entry.find(")")] + oldnames.append(_name) + continue + if (entry.find("##") >= 0): + _name = entry[entry.find("##")+3:-1] + sub_titles.append(_name) + continue + entry.strip() + if (entry != '\n'): + heads.append(entry) + + newfindex = open(dirname + "index.md", "w") + for entry in heads: + newfindex.write(entry) + newfindex.write('\n') + sub_lists = [[],[]] + for root, dirs, files in os.walk(dirname, topdown=False): + newnames = [name for name in files if "index.md" not in name] + for name in newnames: + f = open(dirname + name, "r") + _lines = f.readlines() + f.close() + for _headline in _lines: + _headline = _headline.strip("#") + headline = _headline.strip() + if (len(headline) == 0 or headline[0] == "." or headline[0] == "="): + continue + else: + break + longname = "- ["+headline+"]"+"("+name+")\n" + if ("howtoset_" in name): + sub_lists[1].append(longname) + else: + sub_lists[0].append(longname) + + newfindex.write("## Trouble shooting\n") + for entry in sub_lists[0]: + newfindex.write(entry) + newfindex.write("\n") + newfindex.write("## Parameters setting\n") + for entry in sub_lists[1]: + newfindex.write(entry) + newfindex.close() + # -- Project information ----------------------------------------------------- @@ -68,6 +124,7 @@ def mkindex(dirname): mkindex("troubleshooting") mkindex("development") +classify_index_TS() extensions = [ "sphinx_rtd_theme", diff --git a/doc/troubleshooting/howtoset_netsize.md b/doc/troubleshooting/howtoset_netsize.md new file mode 100644 index 0000000000..800abb9a08 --- /dev/null +++ b/doc/troubleshooting/howtoset_netsize.md @@ -0,0 +1,141 @@ +# How to tune Fitting/embedding-net size ? + +Here are some test forms on fitting-net size tuning or embedding-net size tuning performed on several different systems. + + +## Al2O3 + +### Fitting net size tuning form on Al2O3: (embedding-net size: [25,50,100]) + +Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|---|--- +[240,240,240] | 1.742252e-02 | 7.259383e-05 | 4.014115e-02 +[80,80,80] | 1.799349e-02 | 7.497287e-05 | 4.042977e-02 +[40,40,40] | 1.799036e-02 | 7.495984e-05 | 4.068806e-02 +[20,20,20] | 1.834032e-02 | 7.641801e-05 | 4.094784e-02 +[10,10,10] | 1.913058e-02 | 7.971073e-05 | 4.154775e-02 +[5,5,5] | 1.932914e-02 | 8.053808e-05 | 4.188052e-02 +[4,4,4] | 1.944832e-02 | 8.103467e-05 | 4.217826e-02 +[3,3,3] | 2.068631e-02 | 8.619296e-05 | 4.300497e-02 +[2,2,2] | 2.267962e-02 | 9.449840e-05 | 4.413609e-02 +[1,1,1] | 2.813596e-02 | 1.172332e-04 | 4.781115e-02 +[] | 3.135002e-02 | 1.306251e-04 | 5.373120e-02 + +_[] means no hidden layer, but there is still a linear output layer. This situation is equal to the linear regression._ + +### Embedding net size tuning form on Al2O3: (Fitting-net size: [240,240,240]) + +Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|---|--- +[25,50,100] | 1.742252e-02 | 7.259383e-05 | 4.014115e-02 +[10,20,40] | 2.909990e-02 | 1.212496e-04 | 4.734667e-02 +[5,10,20] | 3.357767e-02 | 1.399070e-04 | 5.706385e-02 +[4,8,16] | 6.060367e-02 | 2.525153e-04 | 7.333304e-02 +[3,6,12] | 5.656043e-02 | 2.356685e-04 | 7.793539e-02 +[2,4,8] | 5.277023e-02 | 2.198759e-04 | 7.459995e-02 +[1,2,4] | 1.302282e-01 | 5.426174e-04 | 9.672238e-02 + + +## Cu + +### Fitting net size tuning form on Cu: (embedding-net size: [25,50,100]) + +Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|---|--- +[240,240,240] | 4.135548e-02 | 1.615449e-04 | 8.940946e-02 +[20,20,20] | 4.323858e-02 | 1.689007e-04 | 8.955762e-02 +[10,10,10] | 4.399364e-02 | 1.718502e-04 | 8.962891e-02 +[5,5,5] | 4.468404e-02 | 1.745470e-04 | 8.970111e-02 +[4,4,4] | 4.463580e-02 | 1.743586e-04 | 8.972011e-02 +[3,3,3] | 4.493758e-02 | 1.755374e-04 | 8.971303e-02 +[2,2,2] | 4.500736e-02 | 1.758100e-04 | 8.973878e-02 +[1,1,1] | 4.542073e-02 | 1.774247e-04 | 8.964761e-02 +[] | 4.545168e-02 | 1.775456e-04 | 8.983201e-02 + +### Embedding net size tuning form on Cu: (Fitting-net size: [240,240,240]) + +Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|---|--- +[25,50,100] | 4.135548e-02 | 1.615449e-04 | 8.940946e-02 +[20,40,80] | 4.203562e-02 | 1.642016e-04 | 8.925881e-02 +[15,30,60] | 4.146672e-02 | 1.619794e-04 | 8.936911e-02 +[10,20,40] | 4.263060e-02 | 1.665258e-04 | 8.955818e-02 +[5,10,20] | 4.994913e-02 | 1.951138e-04 | 9.007786e-02 +[4,8,16] | 1.022157e-01 | 3.992802e-04 | 9.532119e-02 +[3,9,12] | 1.362098e-01 | 5.320695e-04 | 1.073860e-01 +[2,4,8] | 7.061800e-02 | 2.758515e-04 | 9.126418e-02 +[1,2,4] && seed = 1 | 9.843161e-02 | 3.844985e-04 | 9.348505e-02 +[1,2,4] && seed = 2 | 9.404335e-02 | 3.673568e-04 | 9.304089e-02 +[1,2,4] && seed = 3 | 1.508016e-01 | 5.890688e-04 | 1.382356e-01 +[1,2,4] && seed = 4 | 9.686949e-02 | 3.783965e-04 | 9.294820e-02 + + +## Water + +### Fitting net size tuning form on water: (embedding-net size: [25,50,100]) + +Fitting-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|--- +[240,240,240] | 9.1589E-04 | 5.1540E-02 +[200,200,200] | 9.3221E-04 | 5.2366E-02 +[160,160,160] | 9.4274E-04 | 5.3403E-02 +[120,120,120] | 9.5407E-04 | 5.3093E-02 +[80,80,80] | 9.4605E-04 | 5.3402E-02 +[40,40,40] | 9.8533E-04 | 5.5790E-02 +[20,20,20] | 1.0057E-03 | 5.8232E-02 +[10,10,10] | 1.0466E-03 | 6.2279E-02 +[5,5,5] | 1.1154E-03 | 6.7994E-02 +[4,4,4] | 1.1289E-03 | 6.9613E-02 +[3,3,3] | 1.2368E-03 | 7.9786E-02 +[2,2,2] | 1.3558E-03 | 9.7042E-02 +[1,1,1] | 1.4633E-03 | 1.1265E-01 +[] | 1.5193E-03 | 1.2136E-01 + +### Embedding net size tuning form on water: (Fitting-net size: [240,240,240]) + +Embedding-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|--- +[25,50,100] | 9.1589E-04 | 5.1540E-02 +[20,40,80] | 9.5080E-04 | 5.3593E-02 +[15,30,60] | 9.7996E-04 | 5.6338E-02 +[10,20,40] | 1.0353E-03 | 6.2776E-02 +[5,10,20] | 1.1254E-03 | 7.3195E-02 +[4,8,16] | 1.2495E-03 | 8.0371E-02 +[3,6,12] | 1.3604E-03 | 9.9883E-02 +[2,4,8] | 1.4358E-03 | 9.7389E-02 +[1,2,4] | 2.1765E-03 | 1.7276E-01 + + +## Mg-Al + +### Fitting net size tuning form on Mg-Al: (embedding-net size: [25,50,100]) + +Fitting-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|--- +[240,240,240] | 3.9606e-03 | 1.6289e-02 +[200,200,200] | 3.9449e-03 | 1.6471e-02 +[160,160,160] | 4.0947e-03 | 1.6413e-02 +[120,120,120] | 3.9234e-03 | 1.6283e-02 +[80,80,80] | 3.9758e-03 | 1.6506e-02 +[40,40,40] | 3.9142e-03 | 1.6348e-02 +[20,20,20] | 4.1302e-03 | 1.7006e-02 +[10,10,10] | 4.3433e-03 | 1.7524e-02 +[5,5,5] | 5.3154e-03 | 1.9716e-02 +[4,4,4] | 5.4210e-03 | 1.9710e-02 +[2,2,2] | 6.2667e-03 | 2.2568e-02 +[1,1,1] | 7.3676e-03 | 2.6375e-02 +[] | 7.3999e-03 | 2.6097e-02 + +### Embedding net size tuning form on Mg-Al: (Fitting-net size: [240,240,240]) + +Embedding-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) +---|---|--- +[25,50,100] | 3.9606e-03 | 1.6289e-02 +[20,40,80] | 4.0292e-03 | 1.6555e-02 +[15,30,60] | 4.1743e-03 | 1.7026e-02 +[10,20,40] | 4.8138e-03 | 1.8516e-02 +[5,10,20] | 5.6052e-03 | 2.0709e-02 +[4,8,16] | 6.1335e-03 | 2.1450e-02 +[3,6,12] | 6.6469e-03 | 2.3003e-02 +[2,4,8] | 6.8222e-03 | 2.6318e-02 +[1,2,4] | 1.0678e-02 | 3.9559e-02 diff --git a/doc/troubleshooting/howtoset_num_nodes.md b/doc/troubleshooting/howtoset_num_nodes.md new file mode 100644 index 0000000000..131732ea42 --- /dev/null +++ b/doc/troubleshooting/howtoset_num_nodes.md @@ -0,0 +1,16 @@ +# How to control the number of nodes used by a job ? + +Set the number of CPU nodes used by DP algorithms with: +```bash +mpirun -np $num_nodes dp +``` +Set the number of threads used by DP algorithms with: +```bash +export OMP_NUM_THREADS=$num_threads +``` + +Set the number of CPU nodes used by TF kernels with: +```bash +export TF_INTRA_OP_PARALLELISM_THREADS=$num_nodes +export TF_INTER_OP_PARALLELISM_THREADS=$num_nodes +``` diff --git a/doc/troubleshooting/howtoset_rcut.md b/doc/troubleshooting/howtoset_rcut.md new file mode 100644 index 0000000000..f8ba56ccaa --- /dev/null +++ b/doc/troubleshooting/howtoset_rcut.md @@ -0,0 +1,7 @@ +# Do we need to set rcut < half boxsize ? + +When seeking the neighbors of atom i under periodic boundary condition, deepmd-kit considers all j atoms within cutoff rcut from atom i in all mirror cells. + +So, so there is no limitation on the setting of rcut. + +PS: The reason why some softwares require rcut < half boxsize is that they only consider the nearest mirrors from the center cell. Deepmd-kit is totally different from them. diff --git a/doc/troubleshooting/howtoset_sel.md b/doc/troubleshooting/howtoset_sel.md new file mode 100644 index 0000000000..915fdd3094 --- /dev/null +++ b/doc/troubleshooting/howtoset_sel.md @@ -0,0 +1,11 @@ +# How to set sel ? + +`sel` is short for "selected number of atoms in `rcut`". + +`sel_a[i]` is a list of integers. The length of the list should be the same as the number of atom types in the system. + +`sel_a[i]` gives the number of selected number of type `i` neighbors within `rcut`. To ensure that the results are strictly accurate, `sel_a[i]` should be larger than the largest number of type `i` neighbors in the `rcut`. + +However, the computation overhead increases with `sel_a[i]`, therefore, `sel_a[i]` should be as small as possible. + +The setting of `sel_a[i]` should balance the above two considerations. diff --git a/doc/troubleshooting/index.md b/doc/troubleshooting/index.md index 0db7b82198..1c7d642355 100644 --- a/doc/troubleshooting/index.md +++ b/doc/troubleshooting/index.md @@ -1,10 +1,16 @@ -# Troubleshooting +# FAQs In consequence of various differences of computers or systems, problems may occur. Some common circumstances are listed as follows. +In addition, some frequently asked questions about parameters setting are listed as follows. If other unexpected problems occur, you're welcome to contact us for help. - +## Trouble shooting - [Installation](installation.md) - [The temperature undulates violently during early stages of MD](md-energy-undulation.md) - [MD: cannot run LAMMPS after installing a new version of DeePMD-kit](md-version-compatibility.md) - [Model compatability](model-compatability.md) -- [Do we need to set rcut < half boxsize ?](rcut.md) + +## Parameters setting +- [How to tune Fitting/embedding-net size ?](howtoset_netsize.md) +- [How to control the number of nodes used by a job ?](howtoset_num_nodes.md) +- [Do we need to set rcut < half boxsize ?](howtoset_rcut.md) +- [How to set sel ?](howtoset_sel.md) diff --git a/doc/troubleshooting/rcut.md b/doc/troubleshooting/rcut.md deleted file mode 100644 index 74aafe72e1..0000000000 --- a/doc/troubleshooting/rcut.md +++ /dev/null @@ -1,7 +0,0 @@ -# Do we need to set rcut < half boxsize ? - -When seeking the neighbors of atom i under periodic boundary condition, deepmd-kit considers all j atoms within cutoff Rcut from atom i in all mirror cells. - -So there is no limitation on the setting of Rcut. - -PS: The reason why some softwares require Rcut < half boxsize is that they only consider the nearest mirrors from the center cell. Deepmd-kit is totally different from them.