From f6318b35261ead386505ba76920633982f44a3e0 Mon Sep 17 00:00:00 2001 From: simleo Date: Wed, 8 Nov 2023 14:29:53 +0100 Subject: [PATCH 1/2] convert: add size and creation date to outputs --- src/runcrate/convert.py | 3 +++ tests/test_cwlprov_crate_builder.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py index 0d76432..6957b56 100644 --- a/src/runcrate/convert.py +++ b/src/runcrate/convert.py @@ -566,6 +566,8 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"): )) if len(action_p["exampleOfWork"]) == 1: action_p["exampleOfWork"] = action_p["exampleOfWork"][0] + if ptype == "generation": + action_p["dateCreated"] = rel.time.isoformat() action_params.append(action_p) return action_params @@ -606,6 +608,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None): source = self.manifest[hash_] action_p = crate.add_file(source, dest, properties={ "sha1": hash_, + "contentSize": str(Path(source).stat().st_size) }) self._set_alternate_name(prov_param, action_p, parent=parent) try: diff --git a/tests/test_cwlprov_crate_builder.py b/tests/test_cwlprov_crate_builder.py index 416351f..2ec6f9c 100644 --- a/tests/test_cwlprov_crate_builder.py +++ b/tests/test_cwlprov_crate_builder.py @@ -95,10 +95,13 @@ def test_revsort(data_dir, tmpdir): assert "File" in entity.type assert entity["alternateName"] == "whale.txt" assert entity["sha1"] == entity.id.rsplit("/")[-1] + assert entity["contentSize"] == "1111" wf_input_file = entity wf_output_file = wf_results[0] assert wf_output_file["alternateName"] == "output.txt" assert wf_output_file["sha1"] == wf_output_file.id.rsplit("/")[-1] + assert wf_output_file["dateCreated"] == "2018-10-25T15:46:38.058365" + assert wf_output_file["contentSize"] == "1111" assert "File" in wf_output_file.type steps = workflow["step"] assert len(steps) == 2 @@ -118,6 +121,8 @@ def test_revsort(data_dir, tmpdir): assert rev_input_file is wf_input_file rev_output_file = results[0] assert "File" in rev_output_file.type + assert rev_output_file["dateCreated"] == "2018-10-25T15:46:36.963254" + assert rev_output_file["contentSize"] == "1111" assert step["position"] == "0" assert set(_connected(step)) == set([ ("packed.cwl#main/input", "packed.cwl#revtool.cwl/input"), @@ -357,6 +362,7 @@ def test_dir_io(data_dir, tmpdir): assert "Dataset" in entity.type wf_input_dir = entity wf_output_dir = wf_results[0] + assert wf_output_dir["dateCreated"] == "2023-02-17T16:20:30.288242" assert wf_input_dir.type == wf_output_dir.type == "Dataset" assert wf_input_dir["alternateName"] == "grepucase_in" assert len(wf_input_dir["hasPart"]) == 2 @@ -395,6 +401,7 @@ def test_dir_io(data_dir, tmpdir): assert greptool_input_dir is wf_input_dir greptool_output_dir = greptool_results[0] assert "Dataset" in greptool_output_dir.type + assert greptool_output_dir["dateCreated"] == "2023-02-17T16:20:30.262141" ucasetool_action = action_map["packed.cwl#ucasetool.cwl"] ucasetool_objects = ucasetool_action["object"] ucasetool_results = ucasetool_action["result"] From c9a15fcea6df7e511d199d9f539cf4e45cc61f67 Mon Sep 17 00:00:00 2001 From: simleo Date: Wed, 8 Nov 2023 16:30:24 +0100 Subject: [PATCH 2/2] convert: add encodingFormat to files when available --- src/runcrate/convert.py | 13 +++++++++---- tests/test_cwlprov_crate_builder.py | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py index 6957b56..ff48935 100644 --- a/src/runcrate/convert.py +++ b/src/runcrate/convert.py @@ -742,9 +742,9 @@ def patch_workflow_input_collection(self, crate, wf=None): if "ComputationalWorkflow" in as_list(tool.type): self.patch_workflow_input_collection(crate, wf=tool) - def _map_input_data(self, data): + def _map_input_data(self, crate, data): if isinstance(data, list): - return [self._map_input_data(_) for _ in data] + return [self._map_input_data(crate, _) for _ in data] if isinstance(data, dict): rval = {} for k, v in data.items(): @@ -756,8 +756,13 @@ def _map_input_data(self, data): source_k = str(source) dest = self.file_map.get(source_k) rval[k] = str(dest) if dest else v + fmt = data.get("format") + if fmt: + entity = crate.get(str(dest)) + if entity: + entity["encodingFormat"] = fmt else: - rval[k] = self._map_input_data(v) + rval[k] = self._map_input_data(crate, v) return rval return data @@ -766,7 +771,7 @@ def add_inputs_file(self, crate): if path.is_file(): with open(path) as f: data = json.load(f) - data = self._map_input_data(data) + data = self._map_input_data(crate, data) source = StringIO(json.dumps(data, indent=4)) crate.add_file(source, path.name, properties={ "name": "input object document", diff --git a/tests/test_cwlprov_crate_builder.py b/tests/test_cwlprov_crate_builder.py index 2ec6f9c..0f6404d 100644 --- a/tests/test_cwlprov_crate_builder.py +++ b/tests/test_cwlprov_crate_builder.py @@ -96,6 +96,7 @@ def test_revsort(data_dir, tmpdir): assert entity["alternateName"] == "whale.txt" assert entity["sha1"] == entity.id.rsplit("/")[-1] assert entity["contentSize"] == "1111" + assert "encodingFormat" in entity wf_input_file = entity wf_output_file = wf_results[0] assert wf_output_file["alternateName"] == "output.txt"