Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[![Python package](https://github.com/ResearchObject/ro-crate-py/workflows/Python%20package/badge.svg)](https://github.com/ResearchObject/ro-crate-py/actions?query=workflow%3A%22Python+package%22) [![Upload Python Package](https://github.com/ResearchObject/ro-crate-py/workflows/Upload%20Python%20Package/badge.svg)](https://github.com/ResearchObject/ro-crate-py/actions?query=workflow%3A%22Upload+Python+Package%22) [![PyPI version](https://badge.fury.io/py/rocrate.svg)](https://pypi.org/project/rocrate/) [![DOI](https://zenodo.org/badge/216605684.svg)](https://zenodo.org/badge/latestdoi/216605684)

ro-crate-py is a Python library to create and consume [Research Object Crates](https://w3id.org/ro/crate). It currently supports the [RO-Crate 1.1](https://w3id.org/ro/crate/1.1) specification.
ro-crate-py is a Python library to create and consume [Research Object Crates](https://w3id.org/ro/crate). It supports the current [RO-Crate 1.2](https://w3id.org/ro/crate/1.2) specification as well as the older [RO-Crate 1.1](https://w3id.org/ro/crate/1.1) and [RO-Crate 1.0](https://w3id.org/ro/crate/1.0).

## Installation

Expand Down Expand Up @@ -225,6 +225,25 @@ Note that entities can have multiple types, e.g.:
"@type" = ["File", "SoftwareSourceCode"]
```

#### Selecting the RO-Crate specification version

By default, a newly created RO-Crate conforms to the [RO-Crate 1.2](https://w3id.org/ro/crate/1.2) specification, but 1.0 and 1.1 are still supported:

```pycon
>>> from rocrate.rocrate import ROCrate
>>> crate = ROCrate()
>>> crate.version
'1.2'
>>> crate = ROCrate(version="1.0")
>>> crate.version
'1.0'
>>> crate.metadata.id
'ro-crate-metadata.jsonld'
```

When consuming an RO-Crate (see below), the `version` parameter is ignored, and the RO-Crate version is read from the metadata descriptor instead.


### Consuming an RO-Crate

An existing RO-Crate package can be loaded from a directory or zip file:
Expand Down
6 changes: 3 additions & 3 deletions rocrate/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import json
import warnings

from .model import Metadata, LegacyMetadata
from .model.metadata import BASENAME, LEGACY_BASENAME


def read_metadata(metadata_path):
Expand Down Expand Up @@ -85,13 +85,13 @@ def find_root_entity_id(entities):
is more than one, we just return an arbitrary pair.

"""
descriptor = entities.get(Metadata.BASENAME, entities.get(LegacyMetadata.BASENAME))
descriptor = entities.get(BASENAME, entities.get(LEGACY_BASENAME))
if descriptor:
return _check_descriptor(descriptor, entities)
candidates = []
for id_, e in entities.items():
basename = id_.rsplit("/", 1)[-1]
if basename == Metadata.BASENAME or basename == LegacyMetadata.BASENAME:
if basename == BASENAME or basename == LEGACY_BASENAME:
try:
candidates.append(_check_descriptor(e, entities))
except ValueError:
Expand Down
3 changes: 1 addition & 2 deletions rocrate/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from .entity import Entity
from .file import File
from .file_or_dir import FileOrDir
from .metadata import Metadata, LegacyMetadata
from .metadata import Metadata
from .person import Person
from .root_dataset import RootDataset
from .softwareapplication import SoftwareApplication
Expand All @@ -58,7 +58,6 @@
"Entity",
"File",
"FileOrDir",
"LegacyMetadata",
"Metadata",
"Person",
"Preview",
Expand Down
36 changes: 13 additions & 23 deletions rocrate/model/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,25 @@
from .dataset import Dataset


SUPPORTED_VERSIONS = {"1.0", "1.1", "1.2"}
DEFAULT_VERSION = "1.2"
BASENAME = "ro-crate-metadata.json"
LEGACY_BASENAME = "ro-crate-metadata.jsonld"

WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0"


class Metadata(File):
"""\
RO-Crate metadata file.
"""
BASENAME = "ro-crate-metadata.json"
PROFILE = "https://w3id.org/ro/crate/1.1"

def __init__(self, crate, source=None, dest_path=None, properties=None):
def __init__(self, crate, source=None, dest_path=None, properties=None, version=DEFAULT_VERSION):
if version not in SUPPORTED_VERSIONS:
raise ValueError(f"version {version!r} not supported")
self.version = version
self.profile = f"https://w3id.org/ro/crate/{self.version}"
if source is None and dest_path is None:
dest_path = self.BASENAME
dest_path = LEGACY_BASENAME if version == "1.0" else BASENAME
super().__init__(
crate,
source=source,
Expand All @@ -58,7 +64,7 @@ def _empty(self):
# default properties of the metadata entry
val = {"@id": self.id,
"@type": "CreativeWork",
"conformsTo": {"@id": self.PROFILE},
"conformsTo": {"@id": self.profile},
"about": {"@id": "./"}}
return val

Expand All @@ -68,7 +74,7 @@ def generate(self):
graph = []
for entity in self.crate.get_entities():
graph.append(entity.properties())
context = [f'{self.PROFILE}/context']
context = [f'{self.profile}/context']
context.extend(self.extra_contexts)
if self.extra_terms:
context.append(self.extra_terms)
Expand All @@ -92,12 +98,6 @@ def root(self) -> Dataset:
return self.crate.root_dataset


class LegacyMetadata(Metadata):

BASENAME = "ro-crate-metadata.jsonld"
PROFILE = "https://w3id.org/ro/crate/1.0"


# https://github.com/ResearchObject/ro-terms/tree/master/test
TESTING_EXTRA_TERMS = {
"TestSuite": "https://w3id.org/ro/terms/test#TestSuite",
Expand All @@ -114,13 +114,3 @@ class LegacyMetadata(Metadata):
"definition": "https://w3id.org/ro/terms/test#definition",
"engineVersion": "https://w3id.org/ro/terms/test#engineVersion"
}


def metadata_class(descriptor_id):
basename = descriptor_id.rsplit("/", 1)[-1]
if basename == Metadata.BASENAME:
return Metadata
elif basename == LegacyMetadata.BASENAME:
return LegacyMetadata
else:
raise ValueError(f"Invalid metadata descriptor ID: {descriptor_id!r}")
43 changes: 27 additions & 16 deletions rocrate/rocrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
Entity,
File,
FileOrDir,
LegacyMetadata,
Metadata,
Preview,
RootDataset,
Expand All @@ -54,7 +53,7 @@
TestSuite,
WorkflowDescription,
)
from .model.metadata import WORKFLOW_PROFILE, TESTING_EXTRA_TERMS, metadata_class
from .model.metadata import WORKFLOW_PROFILE, TESTING_EXTRA_TERMS, DEFAULT_VERSION, BASENAME, LEGACY_BASENAME
from .model.computationalworkflow import galaxy_to_abstract_cwl
from .model.computerlanguage import get_lang
from .model.testservice import get_service
Expand All @@ -76,9 +75,17 @@ def pick_type(json_entity, type_map, fallback=None):
return fallback


def get_version(metadata_properties):
for uri in get_norm_value(metadata_properties, "conformsTo"):
base_uri, version = uri.rsplit("/", 1)
if base_uri.startswith("https://w3id.org/ro/crate"):
return version
return None


class ROCrate():

def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
def __init__(self, source=None, gen_preview=False, init=False, exclude=None, version=DEFAULT_VERSION):
self.mode = None
self.source = source
self.exclude = exclude
Expand All @@ -92,7 +99,7 @@ def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
self.add(Preview(self))
if not source:
self.mode = Mode.CREATE
self.add(RootDataset(self), Metadata(self))
self.add(RootDataset(self), Metadata(self, version=version))
elif init:
self.mode = Mode.INIT
if isinstance(source, dict):
Expand All @@ -104,19 +111,19 @@ def __init__(self, source=None, gen_preview=False, init=False, exclude=None):
# in the zip case, self.source is the extracted dir
self.source = source

def __init_from_tree(self, top_dir, gen_preview=False):
def __init_from_tree(self, top_dir, gen_preview=False, version=DEFAULT_VERSION):
top_dir = Path(top_dir)
if not top_dir.is_dir():
raise NotADirectoryError(errno.ENOTDIR, f"'{top_dir}': not a directory")
self.add(RootDataset(self), Metadata(self))
self.add(RootDataset(self), Metadata(self, version=version))
for root, dirs, files in walk(top_dir, exclude=self.exclude):
root = Path(root)
for name in dirs:
source = root / name
self.add_dataset(source, source.relative_to(top_dir))
for name in files:
source = root / name
if source == top_dir / Metadata.BASENAME or source == top_dir / LegacyMetadata.BASENAME:
if source == top_dir / BASENAME or source == top_dir / LEGACY_BASENAME:
continue
if source != top_dir / Preview.BASENAME:
self.add_file(source, source.relative_to(top_dir))
Expand All @@ -136,11 +143,11 @@ def __read(self, source, gen_preview=False):
with zipfile.ZipFile(source, "r") as zf:
zf.extractall(zip_path)
source = Path(zip_path)
metadata_path = source / Metadata.BASENAME
metadata_path = source / BASENAME
if not metadata_path.is_file():
metadata_path = source / LegacyMetadata.BASENAME
metadata_path = source / LEGACY_BASENAME
if not metadata_path.is_file():
raise ValueError(f"Not a valid RO-Crate: missing {Metadata.BASENAME}")
raise ValueError(f"Not a valid RO-Crate: missing {BASENAME}")
_, entities = read_metadata(metadata_path)
self.__read_data_entities(entities, source, gen_preview)
self.__read_contextual_entities(entities)
Expand All @@ -154,9 +161,9 @@ def __read_data_entities(self, entities, source, gen_preview):
assert root_id == root_entity.pop('@id')
parts = as_list(root_entity.pop('hasPart', []))
self.add(RootDataset(self, root_id, properties=root_entity))
MetadataClass = metadata_class(metadata_id)
metadata_properties = entities.pop(metadata_id)
self.add(MetadataClass(self, metadata_id, properties=metadata_properties))
version = get_version(metadata_properties) or DEFAULT_VERSION
self.add(Metadata(self, metadata_id, properties=metadata_properties, version=version))

preview_entity = entities.pop(Preview.BASENAME, None)
if preview_entity and not gen_preview:
Expand Down Expand Up @@ -198,18 +205,18 @@ def __read_contextual_entities(self, entities):
@property
def default_entities(self):
return [e for e in self.__entity_map.values()
if isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview))]
if isinstance(e, (RootDataset, Metadata, Preview))]

@property
def data_entities(self):
return [e for e in self.__entity_map.values()
if not isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview))
if not isinstance(e, (RootDataset, Metadata, Preview))
and hasattr(e, "write")]

@property
def contextual_entities(self):
return [e for e in self.__entity_map.values()
if not isinstance(e, (RootDataset, Metadata, LegacyMetadata, Preview))
if not isinstance(e, (RootDataset, Metadata, Preview))
and not hasattr(e, "write")]

@property
Expand Down Expand Up @@ -300,6 +307,10 @@ def mainEntity(self):
def mainEntity(self, value):
self.root_dataset['mainEntity'] = value

@property
def version(self):
return self.metadata.version

@property
def test_dir(self):
rval = self.dereference("test")
Expand Down Expand Up @@ -417,7 +428,7 @@ def add(self, *entities):
key = e.canonical_id()
if isinstance(e, RootDataset):
self.root_dataset = e
elif isinstance(e, (Metadata, LegacyMetadata)):
elif isinstance(e, Metadata):
self.metadata = e
elif isinstance(e, Preview):
self.preview = e
Expand Down
19 changes: 10 additions & 9 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,22 @@
# limitations under the License.

import json
import pathlib
import shutil
from pathlib import Path

import pytest
from rocrate.utils import get_norm_value


THIS_DIR = pathlib.Path(__file__).absolute().parent
THIS_DIR = Path(__file__).absolute().parent
TEST_DATA_NAME = 'test-data'
BASE_URL = 'https://w3id.org/ro/crate'
VERSION = '1.1'
DEFAULT_VERSION = '1.2'
LEGACY_VERSION = '1.0'


class Helpers:

PROFILE = f"{BASE_URL}/{VERSION}"
LEGACY_PROFILE = f"{BASE_URL}/{LEGACY_VERSION}"
WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0"
METADATA_FILE_NAME = 'ro-crate-metadata.json'
LEGACY_METADATA_FILE_NAME = 'ro-crate-metadata.jsonld'
Expand All @@ -49,20 +47,23 @@ class Helpers:

@classmethod
def read_json_entities(cls, crate_base_path):
metadata_path = pathlib.Path(crate_base_path) / cls.METADATA_FILE_NAME
crate_base_path = Path(crate_base_path)
metadata_path = crate_base_path / cls.METADATA_FILE_NAME
if not metadata_path.is_file():
metadata_path = crate_base_path / cls.LEGACY_METADATA_FILE_NAME
with open(metadata_path, "rt") as f:
json_data = json.load(f)
return {_["@id"]: _ for _ in json_data["@graph"]}

@classmethod
def check_crate(cls, json_entities, root_id="./", data_entity_ids=None):
def check_crate(cls, json_entities, root_id="./", data_entity_ids=None, version=DEFAULT_VERSION):
assert root_id in json_entities
root = json_entities[root_id]
assert root["@type"] == "Dataset"
assert cls.METADATA_FILE_NAME in json_entities
metadata = json_entities[cls.METADATA_FILE_NAME]
assert metadata["@type"] == "CreativeWork"
assert cls.PROFILE in get_norm_value(metadata, "conformsTo")
assert f"{BASE_URL}/{version}" in get_norm_value(metadata, "conformsTo")
assert metadata["about"] == {"@id": root_id}
if data_entity_ids:
data_entity_ids = set(data_entity_ids)
Expand Down Expand Up @@ -91,7 +92,7 @@ def helpers():
# pytest's default tmpdir returns a py.path object
@pytest.fixture
def tmpdir(tmpdir):
return pathlib.Path(tmpdir)
return Path(tmpdir)


@pytest.fixture
Expand Down
3 changes: 3 additions & 0 deletions test/test-data/crate-1.0/data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name,number
foo,1
bar,2
30 changes: 30 additions & 0 deletions test/test-data/crate-1.0/ro-crate-metadata.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"@context": "https://w3id.org/ro/crate/1.0/context",
"@graph": [
{
"@id": "ro-crate-metadata.jsonld",
"@type": "CreativeWork",
"about": {"@id": "./"},
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.0"}
},
{
"@id": "./",
"@type": "Dataset",
"name": "Example crate",
"description": "An example RO-Crate",
"datePublished": "2025-10-17",
"license": {"@id": "http://spdx.org/licenses/CC0-1.0"},
"hasPart": [{"@id": "data.csv"}]
},
{
"@id": "data.csv",
"@type": "File",
"name": "CSV data"
},
{
"@id": "http://spdx.org/licenses/CC0-1.0",
"@type": "CreativeWork",
"name": "CC0-1.0"
}
]
}
3 changes: 3 additions & 0 deletions test/test-data/crate-1.1/data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name,number
foo,1
bar,2
Loading