From 21854b16df789d1ac257fe6373203aacc3edb6ba Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Thu, 2 Oct 2025 17:38:41 +0100 Subject: [PATCH 1/2] Configure ODK for the use of SSSOM mapping sets. Add a SSSOM section to the ODK configuration file and re-generate the Makefile. --- .gitignore | 1 + src/mappings/README.md | 2 ++ src/mappings/common.sssom.tsv | 8 ++++++ src/ontology/Makefile | 48 ++++++++++++++++++++++++++++++++--- src/ontology/fbcv-odk.yaml | 16 ++++++++++++ 5 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 src/mappings/README.md create mode 100644 src/mappings/common.sssom.tsv diff --git a/.gitignore b/.gitignore index e47ad3c..f4004be 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,7 @@ src/patterns/pattern_owl_seed.txt src/patterns/all_pattern_terms.txt # End of ODK-managed rules +src/mappings/fbcv.sssom.tsv src/ontology/oort src/ontology/oort/* src/ontology/oort_test diff --git a/src/mappings/README.md b/src/mappings/README.md new file mode 100644 index 0000000..d42f80e --- /dev/null +++ b/src/mappings/README.md @@ -0,0 +1,2 @@ +# Directory for managing SSSOM mappings files + diff --git a/src/mappings/common.sssom.tsv b/src/mappings/common.sssom.tsv new file mode 100644 index 0000000..86cc3e0 --- /dev/null +++ b/src/mappings/common.sssom.tsv @@ -0,0 +1,8 @@ +#curie_map: +# ORCID: https://orcid.org/ +#mapping_set_id: http://purl.obolibrary.org/obo/fbcv/fbcv.sssom.tsv +#mapping_set_description: Mappings between the FlyBase Controlled Vocabulary (FBcv) and foreign ontologies and vocabularies. +#creator_id: +# - ORCID:0000-0002-6095-8718 +#license: https://creativecommons.org/licenses/by/4.0/ +subject_id predicate_id object_id mapping_justification object_source diff --git a/src/ontology/Makefile b/src/ontology/Makefile index 83638d0..26d4011 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -10,7 +10,7 @@ # More information: https://github.com/INCATools/ontology-development-kit/ # Fingerprint of the configuration file when this Makefile was last generated -CONFIG_HASH= e8f37b5150af876d5eb7dc695969a933dbcb0c2fae0c59f7be440e67efbcc410 +CONFIG_HASH= 615898f6bc894bac8c75d184cd2460494f7ca7ec3abb6bbed8243ed1b05dc1e3 # ---------------------------------------- @@ -61,6 +61,12 @@ OTHER_SRC = $(COMPONENTSDIR)/dpo-simple.owl ONTOLOGYTERMS = $(TMPDIR)/ontologyterms.txt EDIT_PREPROCESSED = $(TMPDIR)/$(ONT)-preprocess.owl +MAPPINGDIR= ../mappings +MAPPING_TESTER= sssom validate +SSSOMPY= sssom +MAPPINGS= agr agr-vocabs common fbcv +MAPPING_RELEASE_FILES= $(foreach n,$(MAPPINGS), $(MAPPINGDIR)/$(n).sssom.tsv) + FORMATS = $(sort owl obo json owl) FORMATS_INCL_TSV = $(sort $(FORMATS) tsv) @@ -218,8 +224,9 @@ all_subsets: $(SUBSET_FILES) # ---------------------------------------- -MAPPINGS = +MAPPINGS = agr agr-vocabs common fbcv +RELEASED_MAPPINGS = fbcv MAPPING_FILES = $(foreach p, $(MAPPINGS), $(MAPPINGDIR)/$(p).sssom.tsv) RELEASED_MAPPING_FILES = $(foreach p, $(RELEASED_MAPPINGS), $(MAPPINGDIR)/$(p).sssom.tsv) @@ -339,9 +346,12 @@ prepare_release_fast: .PHONY: copy_release_files copy_release_files: rsync -R $(RELEASE_ASSETS) $(RELEASEDIR) + mkdir -p $(RELEASEDIR)/mappings + cp -rf $(RELEASED_MAPPING_FILES) $(RELEASEDIR)/mappings # All released assets, in their final location within the release -RELEASE_ASSETS_AFTER_RELEASE=$(foreach n,$(RELEASE_ASSETS), $(RELEASEDIR)/$(n)) +RELEASE_ASSETS_AFTER_RELEASE=$(foreach n,$(RELEASE_ASSETS), $(RELEASEDIR)/$(n)) \ + $(foreach n,$(RELEASED_MAPPINGS), $(RELEASEDIR)/mappings/$(n).sssom.tsv) .PHONY: show_release_assets show_release_assets: @@ -559,6 +569,38 @@ ifneq ($(SPARQL_EXPORTS_ARGS),) $(ROBOT) query -f tsv --use-graphs true -i $< $(SPARQL_EXPORTS_ARGS) endif +# ---------------------------------------- +# SSSOM Mapping Files +# ---------------------------------------- + +validate-sssom-%: + tsvalid $(MAPPINGDIR)/$*.sssom.tsv --comment "#" + sssom validate $(MAPPINGDIR)/$*.sssom.tsv + +validate_mappings: + $(MAKE_FAST) $(foreach n,$(MAPPINGS),validate-sssom-$(n)) + +normalize-sssom-%: + sssom-cli --output $(MAPPINGDIR)/$*.sssom.tsv $(MAPPINGDIR)/$*.sssom.tsv + +normalize_mappings: + $(MAKE_FAST) $(foreach n,$(MAPPINGS),normalize-sssom-$(n)) + +# This mappingset is manually curated, so we only check that the file actually exists. +$(MAPPINGDIR)/agr.sssom.tsv: + test -f $@ + +# This mappingset is manually curated, so we only check that the file actually exists. +$(MAPPINGDIR)/agr-vocabs.sssom.tsv: + test -f $@ + +# This mappingset is manually curated, so we only check that the file actually exists. +$(MAPPINGDIR)/common.sssom.tsv: + test -f $@ + +$(MAPPINGDIR)/fbcv.sssom.tsv: $(MAPPINGDIR)/common.sssom.tsv $(MAPPINGDIR)/agr.sssom.tsv $(MAPPINGDIR)/agr-vocabs.sssom.tsv + sssom-cli --output $@ $^ + # ---------------------------------------- # Release artefacts: export formats # ---------------------------------------- diff --git a/src/ontology/fbcv-odk.yaml b/src/ontology/fbcv-odk.yaml index 965ea9d..621793a 100644 --- a/src/ontology/fbcv-odk.yaml +++ b/src/ontology/fbcv-odk.yaml @@ -48,3 +48,19 @@ release_date: TRUE components: products: - filename: dpo-simple.owl +use_mappings: true +sssom_mappingset_group: + products: + - id: agr + maintenance: manual + - id: agr-vocabs + maintenance: manual + - id: common + maintenance: manual + - id: fbcv + maintenance: merged + source_mappings: + - common + - agr + - agr-vocabs + release_mappings: true From 3cd46fb676d68b7a21de98c256367cf8b174ab26 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Thu, 2 Oct 2025 17:40:43 +0100 Subject: [PATCH 2/2] Add the mappings to the Alliance ontologies and vocabularies. Add initial mappings to some of the ontologies and "vocabularies" used at the Alliance. For now, this covers only the terms needed for migrating the scRNAseq data. The mappings to the "vocabularies" are kept in a separate set for convenience, since they are "literal" mappings (vocabulary terms at the Alliance are not proper semantic entities and do not have any kind of public identifier). --- src/mappings/agr-vocabs.sssom.tsv | 27 ++++++++++++++++++++ src/mappings/agr.sssom.tsv | 42 +++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 src/mappings/agr-vocabs.sssom.tsv create mode 100644 src/mappings/agr.sssom.tsv diff --git a/src/mappings/agr-vocabs.sssom.tsv b/src/mappings/agr-vocabs.sssom.tsv new file mode 100644 index 0000000..663c9ba --- /dev/null +++ b/src/mappings/agr-vocabs.sssom.tsv @@ -0,0 +1,27 @@ +#curie_map: +# FBcv: http://purl.obolibrary.org/obo/FBcv_ +# ORCID: https://orcid.org/ +# obo: http://purl.obolibrary.org/obo/ +#mapping_set_id: http://purl.obolibrary.org/obo/fbcv/agr-vocabs.sssom.tsv +#mapping_set_description: Mappings between the FlyBase Controlled Vocabulary (FBcv) and vocabulary terms from the Alliance of Genome Resources. +#creator_id: +# - ORCID:0000-0002-6095-8718 +#license: https://creativecommons.org/licenses/by/4.0/ +#subject_source: obo:fbcv.owl +#object_type: rdfs literal +#mapping_date: 2025-10-02 +subject_id subject_label predicate_id object_label object_category mapping_justification comment +FBcv:0000222 male skos:exactMatch male Genetic Sex semapv:ManualMappingCuration +FBcv:0000334 female skos:exactMatch female Genetic Sex semapv:ManualMappingCuration +FBcv:0003124 species study skos:exactMatch species Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003125 strain study skos:exactMatch genome variation Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003127 developmental stage study skos:exactMatch developmental stage Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003128 circadian rhythm study skos:narrowMatch time of day Data Set Category Tags semapv:ManualMappingCuration The Alliance term is specifically about a study that compares samples collected at specified times of the day, while the FBcv term is more broadly about any study of the circadian rhythm (which would likely involve collecting samples at specified times of the day but not necessarily). +FBcv:0003129 cell cycle study skos:exactMatch cell cycle Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003130 tissue type study skos:narrowMatch anatomical structure Data Set Category Tags semapv:ManualMappingCuration The Alliance term is specifically about studying gene expression in tissues, while the FBcv term is more broadly about studying the form or function of tissues (including but not limited to gene expression). +FBcv:0003131 cell type study skos:exactMatch cell type Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003132 subcellular component study skos:exactMatch subcellular component Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003133 gene study skos:narrowMatch gene Data Set Category Tags semapv:ManualMappingCuration The Alliance term is specifically about studying the effects of gene perturbation, while the FBcv term is more broadly about studying the properties of a gene, whether in perturbed conditions or not. +FBcv:0003134 biotic stimulus study skos:exactMatch biotic stimulus Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003135 chemical stimulus study skos:exactMatch chemical stimulus Data Set Category Tags semapv:ManualMappingCuration +FBcv:0003136 physical stimulus study skos:exactMatch physical stimulus Data Set Category Tags semapv:ManualMappingCuration diff --git a/src/mappings/agr.sssom.tsv b/src/mappings/agr.sssom.tsv new file mode 100644 index 0000000..1a29504 --- /dev/null +++ b/src/mappings/agr.sssom.tsv @@ -0,0 +1,42 @@ +#curie_map: +# FBcv: http://purl.obolibrary.org/obo/FBcv_ +# MMO: http://purl.obolibrary.org/obo/MMO_ +# OBI: http://purl.obolibrary.org/obo/OBI_ +# ORCID: https://orcid.org/ +# obo: http://purl.obolibrary.org/obo/ +#mapping_set_id: http://purl.obolibrary.org/obo/fbcv/agr.sssom.tsv +#mapping_set_description: Mappings between the FlyBase Controlled Vocabulary (FBcv) and some of the ontologies used by the Alliance of Genome Resources (AGR). +#creator_id: +# - ORCID:0000-0002-6095-8718 +#license: https://creativecommons.org/licenses/by/4.0/ +#subject_source: obo:fbcv.owl +#mapping_date: 2025-10-02 +subject_id subject_label predicate_id object_id object_label mapping_justification object_source +FBcv:0003047 isolated cells skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl +FBcv:0003068 RNA-Seq skos:exactMatch MMO:0000658 ribonucleic acid in situ hybridization assay semapv:ManualMappingCuration obo:mmo.owl +FBcv:0003127 developmental stage study skos:exactMatch OBI:0003713 organism development assay semapv:ManualMappingCuration obo:obi.owl +FBcv:0003130 tissue type study skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl +FBcv:0003133 gene study skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl +FBcv:0003134 biotic stimulus study skos:broadMatch OBI:0001396 stimulus or stress design semapv:ManualMappingCuration obo:obi.owl +FBcv:0003135 chemical stimulus study skos:broadMatch OBI:0001396 stimulus or stress design semapv:ManualMappingCuration obo:obi.owl +FBcv:0003141 multi-individual sample skos:exactMatch OBI:0302716 pool of specimens semapv:ManualMappingCuration obo:obi.owl +FBcv:0003166 biotic treatment skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl +FBcv:0003167 chemical treatment skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl +FBcv:0003169 tissue dissection skos:exactMatch OBI:0001504 dissection semapv:ManualMappingCuration obo:obi.owl +FBcv:0003170 cell isolation skos:exactMatch OBI:0000512 isolation of cell population semapv:ManualMappingCuration obo:obi.owl +FBcv:0003213 Illumina sequencing skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl +FBcv:0009000 single-cell RNA-Seq skos:exactMatch MMO:0000862 single cell RNA-seq assay semapv:ManualMappingCuration obo:mmo.owl +FBcv:0009000 single-cell RNA-Seq skos:exactMatch OBI:0002631 single-cell RNA sequencing assay semapv:ManualMappingCuration obo:obi.owl +FBcv:0009001 single-nucleus RNA-Seq skos:exactMatch OBI:0003109 single-nucleus RNA sequencing assay semapv:ManualMappingCuration obo:obi.owl +FBcv:0009001 single-nucleus RNA-Seq skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:mmo.owl +FBcv:0009002 cell clustering analysis skos:broadMatch OBI:0200175 class discovery data transformation semapv:ManualMappingCuration obo:obi.owl +FBcv:0009004 isolated nuclei skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl +FBcv:0009006 DROP-Seq skos:broadMatch OBI:0002631 single-cell RNA sequencing assay semapv:ManualMappingCuration obo:obi.owl +FBcv:0009007 Smart-seq2 skos:broadMatch OBI:0002631 single-cell RNA sequencing assay semapv:ManualMappingCuration obo:obi.owl +FBcv:0009008 10x sequencing skos:broadMatch OBI:0002631 single-cell RNA sequencing assay semapv:ManualMappingCuration obo:obi.owl +FBcv:0009009 starvation skos:broadMatch OBI:0600030 experimental disease induction semapv:ManualMappingCuration obo:obi.owl +FBcv:0009011 mechanical cell dissociation skos:broadMatch OBI:0000512 isolation of cell population semapv:ManualMappingCuration obo:obi.owl +FBcv:0009012 enzymatic cell dissociation skos:broadMatch OBI:0000512 isolation of cell population semapv:ManualMappingCuration obo:obi.owl +FBcv:0009013 chemical cell dissociation skos:broadMatch OBI:0000512 isolation of cell population semapv:ManualMappingCuration obo:obi.owl +FBcv:0009056 single-cell combinatorial indexing RNA sequencing skos:exactMatch OBI:0003105 single-cell combinatorial indexing RNA sequencing assay semapv:ManualMappingCuration obo:obi.owl +FBcv:0009058 EMBL-EBI SCEA standard analysis method skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration obo:obi.owl