From 8ffff3b99c879b521bcb197a96d5d4d5a84110d9 Mon Sep 17 00:00:00 2001 From: simleo Date: Thu, 23 Mar 2023 10:37:03 +0100 Subject: [PATCH 01/11] run module: initial skeleton --- src/runcrate/cli.py | 16 ++++++++++++++++ src/runcrate/run.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 src/runcrate/run.py diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 5567a2f..83329fb 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -20,6 +20,7 @@ from . import __version__ from .convert import ProvCrateBuilder from .report import dump_crate_actions +from .run import run_crate @click.group() @@ -86,6 +87,21 @@ def report(crate): dump_crate_actions(crate) +@cli.command() +@click.argument( + "crate", + metavar="RO_CRATE", + type=click.Path(exists=True, readable=True, path_type=Path), +) +def run(crate): + """\ + Run the workflow from a Workflow Run RO-Crate. + + RO_CRATE: RO-Crate directory or zip file. + """ + run_crate(crate) + + @cli.command() def version(): """\ diff --git a/src/runcrate/run.py b/src/runcrate/run.py new file mode 100644 index 0000000..f6b456c --- /dev/null +++ b/src/runcrate/run.py @@ -0,0 +1,45 @@ +# Copyright 2023 CRS4. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""\ +Run the workflow from a Workflow Run RO-Crate. +""" + +import sys + +from rocrate.rocrate import ROCrate + +from .utils import as_list + + +CWL_ID = "https://w3id.org/workflowhub/workflow-ro-crate#cwl" + + +def check_runnable(crate): + wf = crate.mainEntity + if not wf: + raise RuntimeError("crate does not have a mainEntity") + if "ComputationalWorkflow" not in as_list(wf.type): + raise RuntimeError("mainEntity is not a ComputationalWorkflow") + lang = wf.get("programmingLanguage") + if not lang or getattr(lang, "id", None) != CWL_ID: + raise RuntimeError(f"workflow language must be {CWL_ID}") + return wf + + +def run_crate(crate): + if not isinstance(crate, ROCrate): + crate = ROCrate(crate) + wf = check_runnable(crate) + sys.stdout.write(f"workflow: {wf.id}\n") From e0df57af397a49f0a8937c412f31a23c25ce9b96 Mon Sep 17 00:00:00 2001 From: simleo Date: Thu, 23 Mar 2023 12:01:20 +0100 Subject: [PATCH 02/11] check_runnable: check associated action --- src/runcrate/run.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/runcrate/run.py b/src/runcrate/run.py index f6b456c..383ec8b 100644 --- a/src/runcrate/run.py +++ b/src/runcrate/run.py @@ -35,11 +35,15 @@ def check_runnable(crate): lang = wf.get("programmingLanguage") if not lang or getattr(lang, "id", None) != CWL_ID: raise RuntimeError(f"workflow language must be {CWL_ID}") - return wf + actions = [_ for _ in crate.get_entities() + if "CreateAction" in as_list(_.type) and _.get("instrument") is wf] + if not actions: + raise RuntimeError(f"no CreateAction associated to {wf.id}") + return wf, actions[0] def run_crate(crate): if not isinstance(crate, ROCrate): crate = ROCrate(crate) - wf = check_runnable(crate) - sys.stdout.write(f"workflow: {wf.id}\n") + wf, action = check_runnable(crate) + sys.stdout.write(f"workflow: {wf.id}; action: {action.id}\n") From 2fa25df7185e5947e51474046375333193206d9c Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 24 Mar 2023 12:26:06 +0100 Subject: [PATCH 03/11] run module: add generation of CWL parameters file --- src/runcrate/run.py | 98 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/src/runcrate/run.py b/src/runcrate/run.py index 383ec8b..490a906 100644 --- a/src/runcrate/run.py +++ b/src/runcrate/run.py @@ -16,7 +16,9 @@ Run the workflow from a Workflow Run RO-Crate. """ +import json import sys +from pathlib import Path from rocrate.rocrate import ROCrate @@ -24,6 +26,7 @@ CWL_ID = "https://w3id.org/workflowhub/workflow-ro-crate#cwl" +PARAMS_FILENAME = "params.json" def check_runnable(crate): @@ -42,8 +45,103 @@ def check_runnable(crate): return wf, actions[0] +def auto_convert(value): + if not isinstance(value, str): # should not happen + return value + if value.lower() in {"true", "false"}: + return value.lower() == "true" + try: + return int(value) + except ValueError: + try: + return float(value) + except ValueError: + return value + + +def convert_file(fp, obj): + retval = {"class": "File"} + alt_name = obj.get("alternateName") + retval["location"] = alt_name or obj.id + format_ = fp.get("encodingFormat", obj.get("encodingFormat")) + if format_: + retval["format"] = format_ + return retval + + +def convert_dataset(fp, obj): + retval = {"class": "Directory"} + alt_name = obj.get("alternateName") + retval["location"] = alt_name or obj.id + return retval + + +def convert_value(fp, value): + if fp is None: + return auto_convert(value) + type_ = fp.get("additionalType") + if type_ == "PropertyValue": + return {_["name"]: convert_value(None, _["value"]) for _ in as_list(value)} + if isinstance(value, list): + return [convert_value(fp, _) for _ in value] + if not type_ or isinstance(type_, list) or type_ == "DataType": + return auto_convert(value) + if type_ == "Text": + return value + if type_ == "Integer": + return int(value) + if type_ == "Float": + return float(value) + if type_ == "Boolean": + return value.lower() == "true" + + +def convert_obj(fp, obj): + if "Collection" in as_list(obj.type): + main_entity = obj.get("mainEntity") + if not main_entity: + parts = obj.get("hasPart") + if not parts: + raise ValueError(f"collection {obj.id} has no parts") + main_entity = parts[0] + if "File" in as_list(main_entity.type): + return convert_file(fp, main_entity) + if "Dataset" in as_list(main_entity.type): + return convert_dataset(fp, main_entity) + raise ValueError(f"{main_entity.id} should be a File or Dataset") + elif "File" in as_list(obj.type): + return convert_file(fp, obj) + elif "Dataset" in as_list(obj.type): + return convert_dataset(fp, obj) + elif "PropertyValue" not in as_list(obj.type): + raise RuntimeError(f"object {obj.id} should be a File, Dataset, Collection or PropertyValue") + value = obj.get("value") + if not value: + raise RuntimeError(f"object {obj.id} has no value") + return convert_value(fp, value) + + +def gen_params(wf, action): + params = {} + wf_inputs = set(wf.get("input", [])) + for obj in action.get("object", []): + sel = [_ for _ in as_list(obj.get("exampleOfWork")) if _ in wf_inputs] + if not sel: + continue + fp = sel[0] # there should be only one of these + name = fp.get("name", obj.get("name")) + if not name: + continue + params[name] = convert_obj(fp, obj) + return params + + def run_crate(crate): if not isinstance(crate, ROCrate): crate = ROCrate(crate) wf, action = check_runnable(crate) sys.stdout.write(f"workflow: {wf.id}; action: {action.id}\n") + params = gen_params(wf, action) + params_path = Path(PARAMS_FILENAME) # to be changed + with open(params_path, "w") as f: + json.dump(params, f, indent=4) From da4fd7a0d37a17850c2b30b50c6cfeb185504047 Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 24 Mar 2023 16:40:55 +0100 Subject: [PATCH 04/11] run module: add file renaming and wf execution --- src/runcrate/run.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/runcrate/run.py b/src/runcrate/run.py index 490a906..cdb28c0 100644 --- a/src/runcrate/run.py +++ b/src/runcrate/run.py @@ -17,7 +17,10 @@ """ import json +import shutil +import subprocess import sys +import tempfile from pathlib import Path from rocrate.rocrate import ROCrate @@ -27,6 +30,7 @@ CWL_ID = "https://w3id.org/workflowhub/workflow-ro-crate#cwl" PARAMS_FILENAME = "params.json" +EXECUTABLE = "cwltool" def check_runnable(crate): @@ -141,7 +145,24 @@ def run_crate(crate): crate = ROCrate(crate) wf, action = check_runnable(crate) sys.stdout.write(f"workflow: {wf.id}; action: {action.id}\n") + workdir = Path(tempfile.mkdtemp(prefix="runcrate_")) + sys.stdout.write(f"working dir: {workdir}\n") params = gen_params(wf, action) - params_path = Path(PARAMS_FILENAME) # to be changed + crate.write(workdir) + params_path = Path(workdir / PARAMS_FILENAME) with open(params_path, "w") as f: json.dump(params, f, indent=4) + for obj in action.get("object", []): + alt_name = obj.get("alternateName") + if alt_name: + dst_path = workdir / alt_name + if "Dataset" in as_list(obj.type): + (dst_path).mkdir(parents=True, exist_ok=True) + if "File" in as_list(obj.type): + (dst_path.parent).mkdir(parents=True, exist_ok=True) + src_path = workdir / obj.id + shutil.copy(src_path, dst_path) + wf_path = workdir / wf.id + sys.stdout.write(f"running {wf_path}\n") + args = [EXECUTABLE, wf_path, params_path] + subprocess.run(args) From 283b1151dbf66630e566cf573acb2cb167d8c1f6 Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 24 Mar 2023 16:59:03 +0100 Subject: [PATCH 05/11] run module: add collection renaming --- src/runcrate/run.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/runcrate/run.py b/src/runcrate/run.py index cdb28c0..6310a90 100644 --- a/src/runcrate/run.py +++ b/src/runcrate/run.py @@ -140,6 +140,27 @@ def gen_params(wf, action): return params +def rename_data_entities(obj, workdir): + if "Collection" in as_list(obj.type): + parts = set(as_list(obj.get("hasPart", []))) + main_entity = obj.get("mainEntity") + if main_entity: + parts.add(main_entity) # should already be in parts + for p in parts: + rename_data_entities(p, workdir) + alt_name = obj.get("alternateName") + if alt_name: + dst_path = workdir / alt_name + if "Dataset" in as_list(obj.type): + (dst_path).mkdir(parents=True, exist_ok=True) + for p in set(as_list(obj.get("hasPart", []))): + rename_data_entities(p, workdir) + if "File" in as_list(obj.type): + (dst_path.parent).mkdir(parents=True, exist_ok=True) + src_path = workdir / obj.id + shutil.copy(src_path, dst_path) + + def run_crate(crate): if not isinstance(crate, ROCrate): crate = ROCrate(crate) @@ -153,15 +174,7 @@ def run_crate(crate): with open(params_path, "w") as f: json.dump(params, f, indent=4) for obj in action.get("object", []): - alt_name = obj.get("alternateName") - if alt_name: - dst_path = workdir / alt_name - if "Dataset" in as_list(obj.type): - (dst_path).mkdir(parents=True, exist_ok=True) - if "File" in as_list(obj.type): - (dst_path.parent).mkdir(parents=True, exist_ok=True) - src_path = workdir / obj.id - shutil.copy(src_path, dst_path) + rename_data_entities(obj, workdir) wf_path = workdir / wf.id sys.stdout.write(f"running {wf_path}\n") args = [EXECUTABLE, wf_path, params_path] From 0fdbc873897bbffa4cdd0177a65d8bd3803ecd80 Mon Sep 17 00:00:00 2001 From: simleo Date: Tue, 4 Apr 2023 17:13:22 +0200 Subject: [PATCH 06/11] run module: remove working dir, add option to keep it --- src/runcrate/cli.py | 5 +++-- src/runcrate/run.py | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 83329fb..aac5f82 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -93,13 +93,14 @@ def report(crate): metavar="RO_CRATE", type=click.Path(exists=True, readable=True, path_type=Path), ) -def run(crate): +@click.option("--keep-wd", help="keep working directory", is_flag=True) +def run(crate, keep_wd): """\ Run the workflow from a Workflow Run RO-Crate. RO_CRATE: RO-Crate directory or zip file. """ - run_crate(crate) + run_crate(crate, keep_wd=keep_wd) @cli.command() diff --git a/src/runcrate/run.py b/src/runcrate/run.py index 6310a90..5a8f75f 100644 --- a/src/runcrate/run.py +++ b/src/runcrate/run.py @@ -161,7 +161,7 @@ def rename_data_entities(obj, workdir): shutil.copy(src_path, dst_path) -def run_crate(crate): +def run_crate(crate, keep_wd=False): if not isinstance(crate, ROCrate): crate = ROCrate(crate) wf, action = check_runnable(crate) @@ -178,4 +178,8 @@ def run_crate(crate): wf_path = workdir / wf.id sys.stdout.write(f"running {wf_path}\n") args = [EXECUTABLE, wf_path, params_path] - subprocess.run(args) + try: + subprocess.check_call(args) + finally: + if not keep_wd: + shutil.rmtree(workdir) From 839747ff1065964131ea10a8d0566d66476792d0 Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 7 Apr 2023 12:35:00 +0200 Subject: [PATCH 07/11] update test data --- .../ro-crate-metadata.json | 107 ++++++++++++------ 1 file changed, 74 insertions(+), 33 deletions(-) diff --git a/tests/data/revsort-provenance-crate-minimal/ro-crate-metadata.json b/tests/data/revsort-provenance-crate-minimal/ro-crate-metadata.json index 8f45fbb..05f4b7b 100644 --- a/tests/data/revsort-provenance-crate-minimal/ro-crate-metadata.json +++ b/tests/data/revsort-provenance-crate-minimal/ro-crate-metadata.json @@ -1,10 +1,33 @@ { - "@context": "https://w3id.org/ro/crate/1.1/context", + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "ParameterConnection": "https://w3id.org/ro/terms/workflow-run#ParameterConnection", + "connection": "https://w3id.org/ro/terms/workflow-run#connection", + "sha1": "https://w3id.org/ro/terms/workflow-run#sha1", + "sourceParameter": "https://w3id.org/ro/terms/workflow-run#sourceParameter", + "targetParameter": "https://w3id.org/ro/terms/workflow-run#targetParameter" + } + ], "@graph": [ { "@id": "./", "@type": "Dataset", - "datePublished": "2022-10-07T10:01:24+00:00", + "conformsTo": [ + { + "@id": "https://w3id.org/ro/wfrun/process/0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/workflow/0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/provenance/0.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + } + ], + "datePublished": "2023-03-23T14:39:57+00:00", "hasPart": [ { "@id": "packed.cwl" @@ -19,12 +42,13 @@ "@id": "97fe1b50b4582cebc7d853796ebd62e3e163aa3f" } ], + "license": "Apache-2.0", "mainEntity": { "@id": "packed.cwl" }, "mentions": [ { - "@id": "#7aeba0c9-78f6-4fb7-85d9-fcbe18fce057" + "@id": "#654421a2-66b7-47c0-889a-4047fd22aace" } ] }, @@ -43,13 +67,36 @@ } ] }, + { + "@id": "https://w3id.org/ro/wfrun/process/0.1", + "@type": "CreativeWork", + "name": "Process Run Crate", + "version": "0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/workflow/0.1", + "@type": "CreativeWork", + "name": "Workflow Run Crate", + "version": "0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/provenance/0.1", + "@type": "CreativeWork", + "name": "Provenance Run Crate", + "version": "0.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0", + "@type": "CreativeWork", + "name": "Workflow RO-Crate", + "version": "1.0" + }, { "@id": "packed.cwl", "@type": [ "File", "SoftwareSourceCode", - "ComputationalWorkflow", - "HowTo" + "ComputationalWorkflow" ], "hasPart": [ { @@ -94,28 +141,22 @@ "@id": "packed.cwl#main/input", "@type": "FormalParameter", "additionalType": "File", - "connectedTo": { - "@id": "packed.cwl#revtool.cwl/input" - }, "defaultValue": "file:///home/stain/src/cwltool/tests/wf/hello.txt", "encodingFormat": "https://www.iana.org/assignments/media-types/text/plain", - "name": "main/input" + "name": "input" }, { "@id": "packed.cwl#main/reverse_sort", "@type": "FormalParameter", "additionalType": "Boolean", - "connectedTo": { - "@id": "packed.cwl#sorttool.cwl/reverse" - }, "defaultValue": "True", - "name": "main/reverse_sort" + "name": "reverse_sort" }, { "@id": "packed.cwl#main/output", "@type": "FormalParameter", "additionalType": "File", - "name": "main/output" + "name": "output" }, { "@id": "packed.cwl#revtool.cwl", @@ -137,16 +178,13 @@ "@id": "packed.cwl#revtool.cwl/input", "@type": "FormalParameter", "additionalType": "File", - "name": "revtool.cwl/input" + "name": "input" }, { "@id": "packed.cwl#revtool.cwl/output", "@type": "FormalParameter", "additionalType": "File", - "connectedTo": { - "@id": "packed.cwl#sorttool.cwl/input" - }, - "name": "revtool.cwl/output" + "name": "output" }, { "@id": "packed.cwl#sorttool.cwl", @@ -171,25 +209,22 @@ "@id": "packed.cwl#sorttool.cwl/reverse", "@type": "FormalParameter", "additionalType": "Boolean", - "name": "sorttool.cwl/reverse" + "name": "reverse" }, { "@id": "packed.cwl#sorttool.cwl/input", "@type": "FormalParameter", "additionalType": "File", - "name": "sorttool.cwl/input" + "name": "input" }, { "@id": "packed.cwl#sorttool.cwl/output", "@type": "FormalParameter", "additionalType": "File", - "connectedTo": { - "@id": "packed.cwl#main/output" - }, - "name": "sorttool.cwl/output" + "name": "output" }, { - "@id": "#7aeba0c9-78f6-4fb7-85d9-fcbe18fce057", + "@id": "#654421a2-66b7-47c0-889a-4047fd22aace", "@type": "CreateAction", "endTime": "2018-10-25T15:46:43.020168", "instrument": { @@ -214,6 +249,7 @@ { "@id": "327fc7aedf4f6b69a42a7c8b808dc5a7aff61376", "@type": "File", + "alternateName": "whale.txt", "exampleOfWork": [ { "@id": "packed.cwl#main/input" @@ -221,7 +257,8 @@ { "@id": "packed.cwl#revtool.cwl/input" } - ] + ], + "sha1": "327fc7aedf4f6b69a42a7c8b808dc5a7aff61376" }, { "@id": "#pv-main/reverse_sort", @@ -229,12 +266,13 @@ "exampleOfWork": { "@id": "packed.cwl#main/reverse_sort" }, - "name": "main/reverse_sort", + "name": "reverse_sort", "value": "True" }, { "@id": "b9214658cc453331b62c2282b772a5c063dbd284", "@type": "File", + "alternateName": "output.txt", "exampleOfWork": [ { "@id": "packed.cwl#main/output" @@ -242,10 +280,11 @@ { "@id": "packed.cwl#sorttool.cwl/output" } - ] + ], + "sha1": "b9214658cc453331b62c2282b772a5c063dbd284" }, { - "@id": "#a439c61f-2378-49fb-a7e5-7258248daaeb", + "@id": "#1b0a99b0-bff6-486f-b9d9-50e89f9f8cc0", "@type": "CreateAction", "endTime": "2018-10-25T15:46:36.967359", "instrument": { @@ -267,6 +306,7 @@ { "@id": "97fe1b50b4582cebc7d853796ebd62e3e163aa3f", "@type": "File", + "alternateName": "output.txt", "exampleOfWork": [ { "@id": "packed.cwl#revtool.cwl/output" @@ -274,10 +314,11 @@ { "@id": "packed.cwl#sorttool.cwl/input" } - ] + ], + "sha1": "97fe1b50b4582cebc7d853796ebd62e3e163aa3f" }, { - "@id": "#4377b674-1c08-4afe-b3a6-df827c03b1c4", + "@id": "#4d406f10-e4a8-4767-8b91-fc0631825b3a", "@type": "CreateAction", "endTime": "2018-10-25T15:46:38.069110", "instrument": { @@ -305,7 +346,7 @@ "exampleOfWork": { "@id": "packed.cwl#sorttool.cwl/reverse" }, - "name": "main/sorted/reverse", + "name": "reverse", "value": "True" } ] From 3fc95d389ecf05ac3329a83cf062b1bad2289c20 Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 7 Apr 2023 14:55:26 +0200 Subject: [PATCH 08/11] add test for runcrate run --- tests/test_cli.py | 15 +++++++++++++++ tox.ini | 1 + 2 files changed, 16 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 6419b6b..73188bd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path + from click.testing import CliRunner from rocrate.rocrate import ROCrate @@ -107,3 +109,16 @@ def test_cli_version(): result = runner.invoke(cli, ["version"]) assert result.exit_code == 0 assert result.stdout.strip() == __version__ + + +def test_cli_run_provenance_minimal(data_dir, tmpdir, monkeypatch): + crate_dir = data_dir / "revsort-provenance-crate-minimal" + runner = CliRunner() + args = ["run", str(crate_dir)] + monkeypatch.chdir(str(tmpdir)) + result = runner.invoke(cli, args) + assert result.exit_code == 0 + out_path = Path("output.txt") + assert out_path.is_file() + crate_out_path = crate_dir / "b9214658cc453331b62c2282b772a5c063dbd284" + assert out_path.read_text() == crate_out_path.read_text() diff --git a/tox.ini b/tox.ini index cfc2be2..ddeef00 100644 --- a/tox.ini +++ b/tox.ini @@ -11,6 +11,7 @@ skip_install = true deps = -r{toxinidir}/requirements.txt coverage + cwltool pytest pytest-cov hypothesis From 44e95ef6d1ba7fbb87abbe338f251698caeaf47f Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 7 Apr 2023 16:27:16 +0200 Subject: [PATCH 09/11] support for running streamflow crates --- src/runcrate/cli.py | 9 +++++++-- src/runcrate/run.py | 24 +++++++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index aac5f82..9ab28a5 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -94,13 +94,18 @@ def report(crate): type=click.Path(exists=True, readable=True, path_type=Path), ) @click.option("--keep-wd", help="keep working directory", is_flag=True) -def run(crate, keep_wd): +@click.option( + "--dry-run", + help="do not actually run the workflow (implies --keep-wd)", + is_flag=True +) +def run(crate, keep_wd, dry_run): """\ Run the workflow from a Workflow Run RO-Crate. RO_CRATE: RO-Crate directory or zip file. """ - run_crate(crate, keep_wd=keep_wd) + run_crate(crate, keep_wd=keep_wd, dry_run=dry_run) @cli.command() diff --git a/src/runcrate/run.py b/src/runcrate/run.py index 5a8f75f..d3e6c39 100644 --- a/src/runcrate/run.py +++ b/src/runcrate/run.py @@ -31,6 +31,7 @@ CWL_ID = "https://w3id.org/workflowhub/workflow-ro-crate#cwl" PARAMS_FILENAME = "params.json" EXECUTABLE = "cwltool" +STREAMFLOW_FILE = "streamflow.yml" def check_runnable(crate): @@ -161,7 +162,17 @@ def rename_data_entities(obj, workdir): shutil.copy(src_path, dst_path) -def run_crate(crate, keep_wd=False): +def find_streamflow_file(crate): + for e in crate.get_entities(): + if "File" not in as_list(e.type): + continue + candidates = e.id, e.get("alternateName", "") + for c in candidates: + if c.lower().rsplit("/", 1)[-1] == STREAMFLOW_FILE: + return e.id + + +def run_crate(crate, keep_wd=False, dry_run=False): if not isinstance(crate, ROCrate): crate = ROCrate(crate) wf, action = check_runnable(crate) @@ -175,9 +186,16 @@ def run_crate(crate, keep_wd=False): json.dump(params, f, indent=4) for obj in action.get("object", []): rename_data_entities(obj, workdir) + if dry_run: + return wf_path = workdir / wf.id - sys.stdout.write(f"running {wf_path}\n") - args = [EXECUTABLE, wf_path, params_path] + streamflow_relpath = find_streamflow_file(crate) + if streamflow_relpath: + streamflow_file = workdir / streamflow_relpath + args = ["cwl-runner", "--streamflow-file", streamflow_file, wf_path, params_path] + else: + args = [EXECUTABLE, wf_path, params_path] + sys.stdout.write(f"running {args}\n") try: subprocess.check_call(args) finally: From 4e359c3e7265b312c70351ace2a01c5f9da6ad9d Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 7 Apr 2023 17:08:24 +0200 Subject: [PATCH 10/11] lighter test for runcrate run --- .../4bd8e7e358488e833bf32cf5028695292cecb05b | 1 + tests/data/type-zoo-run-1-crate/index.html | 1 + tests/data/type-zoo-run-1-crate/packed.cwl | 125 ++ .../ro-crate-metadata.json | 432 ++++++ .../ro-crate-preview.html | 1175 +++++++++++++++++ tests/test_cli.py | 6 +- 6 files changed, 1737 insertions(+), 3 deletions(-) create mode 100644 tests/data/type-zoo-run-1-crate/4bd8e7e358488e833bf32cf5028695292cecb05b create mode 120000 tests/data/type-zoo-run-1-crate/index.html create mode 100644 tests/data/type-zoo-run-1-crate/packed.cwl create mode 100644 tests/data/type-zoo-run-1-crate/ro-crate-metadata.json create mode 100644 tests/data/type-zoo-run-1-crate/ro-crate-preview.html diff --git a/tests/data/type-zoo-run-1-crate/4bd8e7e358488e833bf32cf5028695292cecb05b b/tests/data/type-zoo-run-1-crate/4bd8e7e358488e833bf32cf5028695292cecb05b new file mode 100644 index 0000000..4be05b7 --- /dev/null +++ b/tests/data/type-zoo-run-1-crate/4bd8e7e358488e833bf32cf5028695292cecb05b @@ -0,0 +1 @@ +--in-any tar --in-bool --in-double 3.142 --in-enum B --in-float 3.14 --in-int 42 --in-long 420 --in-multi 9.99 --in-record-A Tom --in-record-B Jerry --in-str spam foo bar diff --git a/tests/data/type-zoo-run-1-crate/index.html b/tests/data/type-zoo-run-1-crate/index.html new file mode 120000 index 0000000..a33a179 --- /dev/null +++ b/tests/data/type-zoo-run-1-crate/index.html @@ -0,0 +1 @@ +ro-crate-preview.html \ No newline at end of file diff --git a/tests/data/type-zoo-run-1-crate/packed.cwl b/tests/data/type-zoo-run-1-crate/packed.cwl new file mode 100644 index 0000000..2536d64 --- /dev/null +++ b/tests/data/type-zoo-run-1-crate/packed.cwl @@ -0,0 +1,125 @@ +{ + "class": "CommandLineTool", + "baseCommand": "echo", + "stdout": "output.txt", + "inputs": [ + { + "type": "Any", + "inputBinding": { + "prefix": "--in-any" + }, + "id": "#main/in_any" + }, + { + "type": { + "type": "array", + "items": "string" + }, + "inputBinding": { + "position": 1 + }, + "id": "#main/in_array" + }, + { + "type": "boolean", + "inputBinding": { + "prefix": "--in-bool" + }, + "id": "#main/in_bool" + }, + { + "type": "double", + "inputBinding": { + "prefix": "--in-double" + }, + "id": "#main/in_double" + }, + { + "type": { + "type": "enum", + "symbols": [ + "#main/in_enum/A", + "#main/in_enum/B" + ] + }, + "inputBinding": { + "prefix": "--in-enum" + }, + "id": "#main/in_enum" + }, + { + "type": "float", + "inputBinding": { + "prefix": "--in-float" + }, + "id": "#main/in_float" + }, + { + "type": "int", + "inputBinding": { + "prefix": "--in-int" + }, + "id": "#main/in_int" + }, + { + "type": "long", + "inputBinding": { + "prefix": "--in-long" + }, + "id": "#main/in_long" + }, + { + "type": [ + "int", + "float", + "null" + ], + "default": 9.99, + "inputBinding": { + "prefix": "--in-multi" + }, + "id": "#main/in_multi" + }, + { + "type": { + "type": "record", + "name": "#main/in_record/in_record", + "fields": [ + { + "type": "string", + "inputBinding": { + "prefix": "--in-record-A" + }, + "name": "#main/in_record/in_record/in_record_A" + }, + { + "type": "string", + "inputBinding": { + "prefix": "--in-record-B" + }, + "name": "#main/in_record/in_record/in_record_B" + } + ] + }, + "id": "#main/in_record" + }, + { + "type": "string", + "inputBinding": { + "prefix": "--in-str" + }, + "id": "#main/in_str" + } + ], + "id": "#main", + "outputs": [ + { + "type": "File", + "id": "#main/cl_dump", + "outputBinding": { + "glob": "output.txt" + } + } + ], + "cwlVersion": "v1.0" +} \ No newline at end of file diff --git a/tests/data/type-zoo-run-1-crate/ro-crate-metadata.json b/tests/data/type-zoo-run-1-crate/ro-crate-metadata.json new file mode 100644 index 0000000..70448b1 --- /dev/null +++ b/tests/data/type-zoo-run-1-crate/ro-crate-metadata.json @@ -0,0 +1,432 @@ +{ + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "ParameterConnection": "https://w3id.org/ro/terms/workflow-run#ParameterConnection", + "connection": "https://w3id.org/ro/terms/workflow-run#connection", + "sha1": "https://w3id.org/ro/terms/workflow-run#sha1", + "sourceParameter": "https://w3id.org/ro/terms/workflow-run#sourceParameter", + "targetParameter": "https://w3id.org/ro/terms/workflow-run#targetParameter" + } + ], + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "conformsTo": [ + { + "@id": "https://w3id.org/ro/wfrun/process/0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/workflow/0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/provenance/0.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + } + ], + "datePublished": "2023-03-24T07:53:37+00:00", + "hasPart": [ + { + "@id": "packed.cwl" + }, + { + "@id": "4bd8e7e358488e833bf32cf5028695292cecb05b" + } + ], + "license": "Apache-2.0", + "mainEntity": { + "@id": "packed.cwl" + }, + "mentions": [ + { + "@id": "#f4a43df6-8216-4b72-abf2-8beab8ca9894" + } + ] + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + } + ] + }, + { + "@id": "https://w3id.org/ro/wfrun/process/0.1", + "@type": "CreativeWork", + "name": "Process Run Crate", + "version": "0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/workflow/0.1", + "@type": "CreativeWork", + "name": "Workflow Run Crate", + "version": "0.1" + }, + { + "@id": "https://w3id.org/ro/wfrun/provenance/0.1", + "@type": "CreativeWork", + "name": "Provenance Run Crate", + "version": "0.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0", + "@type": "CreativeWork", + "name": "Workflow RO-Crate", + "version": "1.0" + }, + { + "@id": "packed.cwl", + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow", + "HowTo" + ], + "input": [ + { + "@id": "packed.cwl#main/in_any" + }, + { + "@id": "packed.cwl#main/in_array" + }, + { + "@id": "packed.cwl#main/in_bool" + }, + { + "@id": "packed.cwl#main/in_double" + }, + { + "@id": "packed.cwl#main/in_enum" + }, + { + "@id": "packed.cwl#main/in_float" + }, + { + "@id": "packed.cwl#main/in_int" + }, + { + "@id": "packed.cwl#main/in_long" + }, + { + "@id": "packed.cwl#main/in_multi" + }, + { + "@id": "packed.cwl#main/in_record" + }, + { + "@id": "packed.cwl#main/in_str" + } + ], + "name": "packed.cwl", + "output": [ + { + "@id": "packed.cwl#main/cl_dump" + } + ], + "programmingLanguage": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#cwl" + } + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#cwl", + "@type": "ComputerLanguage", + "alternateName": "CWL", + "identifier": { + "@id": "https://w3id.org/cwl/v1.0/" + }, + "name": "Common Workflow Language", + "url": { + "@id": "https://www.commonwl.org/" + }, + "version": "v1.0" + }, + { + "@id": "packed.cwl#main/in_any", + "@type": "FormalParameter", + "additionalType": "DataType", + "name": "in_any" + }, + { + "@id": "packed.cwl#main/in_array", + "@type": "FormalParameter", + "additionalType": "Text", + "multipleValues": "True", + "name": "in_array" + }, + { + "@id": "packed.cwl#main/in_bool", + "@type": "FormalParameter", + "additionalType": "Boolean", + "name": "in_bool" + }, + { + "@id": "packed.cwl#main/in_double", + "@type": "FormalParameter", + "additionalType": "Float", + "name": "in_double" + }, + { + "@id": "packed.cwl#main/in_enum", + "@type": "FormalParameter", + "additionalType": "Text", + "name": "in_enum", + "valuePattern": "A|B" + }, + { + "@id": "packed.cwl#main/in_float", + "@type": "FormalParameter", + "additionalType": "Float", + "name": "in_float" + }, + { + "@id": "packed.cwl#main/in_int", + "@type": "FormalParameter", + "additionalType": "Integer", + "name": "in_int" + }, + { + "@id": "packed.cwl#main/in_long", + "@type": "FormalParameter", + "additionalType": "Integer", + "name": "in_long" + }, + { + "@id": "packed.cwl#main/in_multi", + "@type": "FormalParameter", + "additionalType": [ + "Float", + "Integer" + ], + "defaultValue": "9.99", + "name": "in_multi", + "valueRequired": "False" + }, + { + "@id": "packed.cwl#main/in_record", + "@type": "FormalParameter", + "additionalType": "PropertyValue", + "multipleValues": "True", + "name": "in_record" + }, + { + "@id": "packed.cwl#main/in_str", + "@type": "FormalParameter", + "additionalType": "Text", + "name": "in_str" + }, + { + "@id": "packed.cwl#main/cl_dump", + "@type": "FormalParameter", + "additionalType": "File", + "name": "cl_dump" + }, + { + "@id": "#011ba6cb-9622-4cec-bb95-da634b2ffc5e", + "@type": "SoftwareApplication", + "name": "cwltool 3.1.20220224085855" + }, + { + "@id": "#74481571-11f4-493c-8edf-3eb9bd5994e0", + "@type": "OrganizeAction", + "instrument": { + "@id": "#011ba6cb-9622-4cec-bb95-da634b2ffc5e" + }, + "name": "Run of cwltool 3.1.20220224085855", + "result": { + "@id": "#f4a43df6-8216-4b72-abf2-8beab8ca9894" + }, + "startTime": "2022-04-22T12:27:13.313333" + }, + { + "@id": "#f4a43df6-8216-4b72-abf2-8beab8ca9894", + "@type": "CreateAction", + "endTime": "2022-04-22T12:27:13.328912", + "instrument": { + "@id": "packed.cwl" + }, + "name": "Run of workflow/packed.cwl#main", + "object": [ + { + "@id": "#pv-main/in_array" + }, + { + "@id": "#pv-main/in_any" + }, + { + "@id": "#pv-main/in_str" + }, + { + "@id": "#pv-main/in_bool" + }, + { + "@id": "#pv-main/in_int" + }, + { + "@id": "#pv-main/in_long" + }, + { + "@id": "#pv-main/in_float" + }, + { + "@id": "#pv-main/in_double" + }, + { + "@id": "#pv-main/in_enum" + }, + { + "@id": "#pv-main/in_record" + }, + { + "@id": "#pv-main/in_multi" + } + ], + "result": [ + { + "@id": "4bd8e7e358488e833bf32cf5028695292cecb05b" + } + ], + "startTime": "2022-04-22T12:27:13.313422" + }, + { + "@id": "#pv-main/in_array", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_array" + }, + "name": "in_array", + "value": [ + "foo", + "bar" + ] + }, + { + "@id": "#pv-main/in_any", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_any" + }, + "name": "in_any", + "value": "tar" + }, + { + "@id": "#pv-main/in_str", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_str" + }, + "name": "in_str", + "value": "spam" + }, + { + "@id": "#pv-main/in_bool", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_bool" + }, + "name": "in_bool", + "value": "True" + }, + { + "@id": "#pv-main/in_int", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_int" + }, + "name": "in_int", + "value": "42" + }, + { + "@id": "#pv-main/in_long", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_long" + }, + "name": "in_long", + "value": "420" + }, + { + "@id": "#pv-main/in_float", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_float" + }, + "name": "in_float", + "value": "3.14" + }, + { + "@id": "#pv-main/in_double", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_double" + }, + "name": "in_double", + "value": "3.142" + }, + { + "@id": "#pv-main/in_enum", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_enum" + }, + "name": "in_enum", + "value": "B" + }, + { + "@id": "#pv-main/in_record/in_record_B", + "@type": "PropertyValue", + "name": "in_record_B", + "value": "Jerry" + }, + { + "@id": "#pv-main/in_record/in_record_A", + "@type": "PropertyValue", + "name": "in_record_A", + "value": "Tom" + }, + { + "@id": "#pv-main/in_record", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_record" + }, + "name": "in_record", + "value": [ + { + "@id": "#pv-main/in_record/in_record_B" + }, + { + "@id": "#pv-main/in_record/in_record_A" + } + ] + }, + { + "@id": "#pv-main/in_multi", + "@type": "PropertyValue", + "exampleOfWork": { + "@id": "packed.cwl#main/in_multi" + }, + "name": "in_multi", + "value": "9.99" + }, + { + "@id": "4bd8e7e358488e833bf32cf5028695292cecb05b", + "@type": "File", + "alternateName": "output.txt", + "exampleOfWork": { + "@id": "packed.cwl#main/cl_dump" + }, + "sha1": "4bd8e7e358488e833bf32cf5028695292cecb05b" + } + ] +} \ No newline at end of file diff --git a/tests/data/type-zoo-run-1-crate/ro-crate-preview.html b/tests/data/type-zoo-run-1-crate/ro-crate-preview.html new file mode 100644 index 0000000..22df055 --- /dev/null +++ b/tests/data/type-zoo-run-1-crate/ro-crate-preview.html @@ -0,0 +1,1175 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+

./

+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+


+
+

Go to: Process Run Crate

+ + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+


+
+

Go to: Workflow Run Crate

+ + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+


+
+

Go to: Provenance Run Crate

+ + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+


+
+

Go to: Workflow RO-Crate

+ + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+


+
+

⬇️ Download: packed.cwl

+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+


+
+

⬇️ Download: 4bd8e7e358488e833bf32cf5028695292cecb05b

+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+


+
+

Run of workflow/packed.cwl#main

+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+


+
+ + + + + + + diff --git a/tests/test_cli.py b/tests/test_cli.py index 73188bd..0c59129 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -111,8 +111,8 @@ def test_cli_version(): assert result.stdout.strip() == __version__ -def test_cli_run_provenance_minimal(data_dir, tmpdir, monkeypatch): - crate_dir = data_dir / "revsort-provenance-crate-minimal" +def test_cli_run(data_dir, tmpdir, monkeypatch): + crate_dir = data_dir / "type-zoo-run-1-crate" runner = CliRunner() args = ["run", str(crate_dir)] monkeypatch.chdir(str(tmpdir)) @@ -120,5 +120,5 @@ def test_cli_run_provenance_minimal(data_dir, tmpdir, monkeypatch): assert result.exit_code == 0 out_path = Path("output.txt") assert out_path.is_file() - crate_out_path = crate_dir / "b9214658cc453331b62c2282b772a5c063dbd284" + crate_out_path = crate_dir / "4bd8e7e358488e833bf32cf5028695292cecb05b" assert out_path.read_text() == crate_out_path.read_text() From 4a6663b119f22d8ab9ec1b4801c2e31aa32629f9 Mon Sep 17 00:00:00 2001 From: simleo Date: Wed, 12 Apr 2023 12:36:49 +0200 Subject: [PATCH 11/11] run module: allow to specify workflow runner executable --- src/runcrate/cli.py | 5 +++-- src/runcrate/run.py | 12 ++++++++---- tox.ini | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py index 9ab28a5..e8010d2 100644 --- a/src/runcrate/cli.py +++ b/src/runcrate/cli.py @@ -93,19 +93,20 @@ def report(crate): metavar="RO_CRATE", type=click.Path(exists=True, readable=True, path_type=Path), ) +@click.option("--executable", help="workflow runner executable") @click.option("--keep-wd", help="keep working directory", is_flag=True) @click.option( "--dry-run", help="do not actually run the workflow (implies --keep-wd)", is_flag=True ) -def run(crate, keep_wd, dry_run): +def run(crate, executable, keep_wd, dry_run): """\ Run the workflow from a Workflow Run RO-Crate. RO_CRATE: RO-Crate directory or zip file. """ - run_crate(crate, keep_wd=keep_wd, dry_run=dry_run) + run_crate(crate, executable=executable, keep_wd=keep_wd, dry_run=dry_run) @cli.command() diff --git a/src/runcrate/run.py b/src/runcrate/run.py index d3e6c39..7c6ad13 100644 --- a/src/runcrate/run.py +++ b/src/runcrate/run.py @@ -14,6 +14,8 @@ """\ Run the workflow from a Workflow Run RO-Crate. + +Only CWL is supported for now. """ import json @@ -30,7 +32,7 @@ CWL_ID = "https://w3id.org/workflowhub/workflow-ro-crate#cwl" PARAMS_FILENAME = "params.json" -EXECUTABLE = "cwltool" +EXECUTABLE = "cwl-runner" STREAMFLOW_FILE = "streamflow.yml" @@ -172,7 +174,9 @@ def find_streamflow_file(crate): return e.id -def run_crate(crate, keep_wd=False, dry_run=False): +def run_crate(crate, executable=None, keep_wd=False, dry_run=False): + if executable is None: + executable = EXECUTABLE if not isinstance(crate, ROCrate): crate = ROCrate(crate) wf, action = check_runnable(crate) @@ -192,9 +196,9 @@ def run_crate(crate, keep_wd=False, dry_run=False): streamflow_relpath = find_streamflow_file(crate) if streamflow_relpath: streamflow_file = workdir / streamflow_relpath - args = ["cwl-runner", "--streamflow-file", streamflow_file, wf_path, params_path] + args = [executable, "--streamflow-file", streamflow_file, wf_path, params_path] else: - args = [EXECUTABLE, wf_path, params_path] + args = [executable, wf_path, params_path] sys.stdout.write(f"running {args}\n") try: subprocess.check_call(args) diff --git a/tox.ini b/tox.ini index ddeef00..7729758 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,7 @@ deps = -r{toxinidir}/requirements.txt coverage cwltool + cwlref-runner pytest pytest-cov hypothesis