diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index d223ab96fd..ed3ceda8c2 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -15,6 +15,7 @@ from deepmd.common import ( expand_sys_str, + j_loader, ) from deepmd.infer.deep_dipole import ( DeepDipole, @@ -39,9 +40,15 @@ DeepWFC, ) from deepmd.utils import random as dp_random +from deepmd.utils.compat import ( + update_deepmd_input, +) from deepmd.utils.data import ( DeepmdData, ) +from deepmd.utils.data_system import ( + process_systems, +) from deepmd.utils.weight_avg import ( weighted_average, ) @@ -59,8 +66,10 @@ def test( *, model: str, - system: str, - datafile: str, + system: Optional[str], + datafile: Optional[str], + train_json: Optional[str] = None, + valid_json: Optional[str] = None, numb_test: int, rand_seed: Optional[int], shuffle_test: bool, @@ -75,12 +84,16 @@ def test( ---------- model : str path where model is stored - system : str + system : str, optional system directory - datafile : str + datafile : str, optional the path to the list of systems to test + train_json : Optional[str] + Path to the input.json file provided via ``--train-data``. Training systems will be used for testing. + valid_json : Optional[str] + Path to the input.json file provided via ``--valid-data``. Validation systems will be used for testing. numb_test : int - munber of tests to do. 0 means all data. + number of tests to do. 0 means all data. rand_seed : Optional[int] seed for random generator shuffle_test : bool @@ -102,11 +115,41 @@ def test( if numb_test == 0: # only float has inf, but should work for min numb_test = float("inf") - if datafile is not None: + if train_json is not None: + jdata = j_loader(train_json) + jdata = update_deepmd_input(jdata) + data_params = jdata.get("training", {}).get("training_data", {}) + systems = data_params.get("systems") + if not systems: + raise RuntimeError("No training data found in input json") + root = Path(train_json).parent + if isinstance(systems, str): + systems = str((root / Path(systems)).resolve()) + else: + systems = [str((root / Path(ss)).resolve()) for ss in systems] + patterns = data_params.get("rglob_patterns", None) + all_sys = process_systems(systems, patterns=patterns) + elif valid_json is not None: + jdata = j_loader(valid_json) + jdata = update_deepmd_input(jdata) + data_params = jdata.get("training", {}).get("validation_data", {}) + systems = data_params.get("systems") + if not systems: + raise RuntimeError("No validation data found in input json") + root = Path(valid_json).parent + if isinstance(systems, str): + systems = str((root / Path(systems)).resolve()) + else: + systems = [str((root / Path(ss)).resolve()) for ss in systems] + patterns = data_params.get("rglob_patterns", None) + all_sys = process_systems(systems, patterns=patterns) + elif datafile is not None: with open(datafile) as datalist: all_sys = datalist.read().splitlines() - else: + elif system is not None: all_sys = expand_sys_str(system) + else: + raise RuntimeError("No data source specified for testing") if len(all_sys) == 0: raise RuntimeError("Did not find valid system") diff --git a/deepmd/main.py b/deepmd/main.py index 7acafd9c9a..ad77f17281 100644 --- a/deepmd/main.py +++ b/deepmd/main.py @@ -384,6 +384,24 @@ def main_parser() -> argparse.ArgumentParser: type=str, help="The path to the datafile, each line of which is a path to one data system.", ) + parser_tst_subgroup.add_argument( + "--train-data", + dest="train_json", + default=None, + type=str, + help=( + "The input json file. Training data in the file will be used for testing." + ), + ) + parser_tst_subgroup.add_argument( + "--valid-data", + dest="valid_json", + default=None, + type=str, + help=( + "The input json file. Validation data in the file will be used for testing." + ), + ) parser_tst.add_argument( "-S", "--set-prefix", diff --git a/source/tests/common/test_argument_parser.py b/source/tests/common/test_argument_parser.py index 4e39df8659..4aebb7dafc 100644 --- a/source/tests/common/test_argument_parser.py +++ b/source/tests/common/test_argument_parser.py @@ -322,6 +322,32 @@ def test_parser_test(self) -> None: self.run_test(command="test", mapping=ARGS) + def test_parser_test_train_data(self) -> None: + """Test test subparser with train-data.""" + ARGS = { + "--model": {"type": str, "value": "MODEL.PB"}, + "--train-data": { + "type": (str, type(None)), + "value": "INPUT.JSON", + "dest": "train_json", + }, + } + + self.run_test(command="test", mapping=ARGS) + + def test_parser_test_valid_data(self) -> None: + """Test test subparser with valid-data.""" + ARGS = { + "--model": {"type": str, "value": "MODEL.PB"}, + "--valid-data": { + "type": (str, type(None)), + "value": "INPUT.JSON", + "dest": "valid_json", + }, + } + + self.run_test(command="test", mapping=ARGS) + def test_parser_compress(self) -> None: """Test compress subparser.""" ARGS = { diff --git a/source/tests/pt/test_dp_test.py b/source/tests/pt/test_dp_test.py index 085bff88de..1c11541e50 100644 --- a/source/tests/pt/test_dp_test.py +++ b/source/tests/pt/test_dp_test.py @@ -37,7 +37,9 @@ class DPTest: - def test_dp_test_1_frame(self) -> None: + def _run_dp_test( + self, use_input_json: bool, numb_test: int = 0, use_train: bool = False + ) -> None: trainer = get_trainer(deepcopy(self.config)) with torch.device("cpu"): input_dict, label_dict, _ = trainer.get_data(is_train=False) @@ -51,12 +53,17 @@ def test_dp_test_1_frame(self) -> None: model = torch.jit.script(trainer.model) tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth") torch.jit.save(model, tmp_model.name) + val_sys = self.config["training"]["validation_data"]["systems"] + if isinstance(val_sys, list): + val_sys = val_sys[0] dp_test( model=tmp_model.name, - system=self.config["training"]["validation_data"]["systems"][0], + system=None if use_input_json else val_sys, datafile=None, + train_json=self.input_json if use_input_json and use_train else None, + valid_json=self.input_json if use_input_json and not use_train else None, set_prefix="set", - numb_test=0, + numb_test=numb_test, rand_seed=None, shuffle_test=False, detail_file=self.detail_file, @@ -100,6 +107,20 @@ def test_dp_test_1_frame(self) -> None: ).reshape(-1, 3), ) + def test_dp_test_1_frame(self) -> None: + self._run_dp_test(False) + + def test_dp_test_input_json(self) -> None: + self._run_dp_test(True) + + def test_dp_test_input_json_train(self) -> None: + with open(self.input_json) as f: + cfg = json.load(f) + cfg["training"]["validation_data"]["systems"] = ["non-existent"] + with open(self.input_json, "w") as f: + json.dump(cfg, f, indent=4) + self._run_dp_test(True, use_train=True) + def tearDown(self) -> None: for f in os.listdir("."): if f.startswith("model") and f.endswith(".pt"): @@ -147,6 +168,116 @@ def setUp(self) -> None: json.dump(self.config, fp, indent=4) +class TestDPTestSeARglob(unittest.TestCase): + def setUp(self) -> None: + self.detail_file = "test_dp_test_ener_rglob_detail" + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + data_file = [str(Path(__file__).parent / "water/data/single")] + self.config["training"]["training_data"]["systems"] = data_file + root_dir = str(Path(__file__).parent) + self.config["training"]["validation_data"]["systems"] = root_dir + self.config["training"]["validation_data"]["rglob_patterns"] = [ + "water/data/single" + ] + self.config["model"] = deepcopy(model_se_e2_a) + self.input_json = "test_dp_test_rglob.json" + with open(self.input_json, "w") as fp: + json.dump(self.config, fp, indent=4) + + def test_dp_test_input_json_rglob(self) -> None: + trainer = get_trainer(deepcopy(self.config)) + with torch.device("cpu"): + input_dict, _, _ = trainer.get_data(is_train=False) + input_dict.pop("spin", None) + model = torch.jit.script(trainer.model) + tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth") + torch.jit.save(model, tmp_model.name) + dp_test( + model=tmp_model.name, + system=None, + datafile=None, + valid_json=self.input_json, + set_prefix="set", + numb_test=1, + rand_seed=None, + shuffle_test=False, + detail_file=self.detail_file, + atomic=False, + ) + os.unlink(tmp_model.name) + self.assertTrue(os.path.exists(self.detail_file + ".e.out")) + + def tearDown(self) -> None: + for f in os.listdir("."): + if f.startswith("model") and f.endswith(".pt"): + os.remove(f) + if f.startswith(self.detail_file): + os.remove(f) + if f in ["lcurve.out", self.input_json]: + os.remove(f) + if f in ["stat_files"]: + shutil.rmtree(f) + + +class TestDPTestSeARglobTrain(unittest.TestCase): + def setUp(self) -> None: + self.detail_file = "test_dp_test_ener_rglob_train_detail" + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + root_dir = str(Path(__file__).parent) + self.config["training"]["training_data"]["systems"] = root_dir + self.config["training"]["training_data"]["rglob_patterns"] = [ + "water/data/single" + ] + data_file = [str(Path(__file__).parent / "water/data/single")] + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_se_e2_a) + self.input_json = "test_dp_test_rglob_train.json" + with open(self.input_json, "w") as fp: + json.dump(self.config, fp, indent=4) + + def test_dp_test_input_json_rglob_train(self) -> None: + trainer = get_trainer(deepcopy(self.config)) + with torch.device("cpu"): + input_dict, _, _ = trainer.get_data(is_train=False) + input_dict.pop("spin", None) + model = torch.jit.script(trainer.model) + tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth") + torch.jit.save(model, tmp_model.name) + dp_test( + model=tmp_model.name, + system=None, + datafile=None, + train_json=self.input_json, + set_prefix="set", + numb_test=1, + rand_seed=None, + shuffle_test=False, + detail_file=self.detail_file, + atomic=False, + ) + os.unlink(tmp_model.name) + self.assertTrue(os.path.exists(self.detail_file + ".e.out")) + + def tearDown(self) -> None: + for f in os.listdir("."): + if f.startswith("model") and f.endswith(".pt"): + os.remove(f) + if f.startswith(self.detail_file): + os.remove(f) + if f in ["lcurve.out", self.input_json]: + os.remove(f) + if f in ["stat_files"]: + shutil.rmtree(f) + + class TestDPTestForceWeight(DPTest, unittest.TestCase): def setUp(self) -> None: self.detail_file = "test_dp_test_force_weight_detail"