From 6b9bf4dc51815778177f8f4563bf6698a3b2c710 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 18:56:41 -0700 Subject: [PATCH 1/8] Add instructions for depending upon specific schema commit --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 8b017a4..5e4d83f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,12 @@ description = " Cross-BER Data Integration" readme = "README.md" requires-python = ">=3.10.0,<3.14" dependencies = [ + # Note: To depend upon the package built from the contents of the latest commit on the default branch in the `bertron-schema` repository, use: + # "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git" + # + # Note: To depend upon the package built from the contents of a specific commit in the `bertron-schema` repository, use: + # "bertron-schema @ https://raw.githubusercontent.com/ber-data/bertron-schema/{COMMIT_HASH}/src/schema/jsonschema/bertron_schema.json" + # "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git", # "dtspy @ https://github.com/kbase/dtspy/archive/730828cff3924fc4b2215fe5c1b67bc04aad377f.tar.gz", "fastapi[standard]>=0.115.12", From 8d156fe97be828be011244909b1a03e4ca946fe4 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 18:57:10 -0700 Subject: [PATCH 2/8] Fix typo in package description --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5e4d83f..df263d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ version = "0.0.0" authors = [ {name = "Chuck Parker", email = "ctparker@lbl.gov"}, ] -description = " Cross-BER Data Integration" +description = "Cross-BER Data Integration" readme = "README.md" requires-python = ">=3.10.0,<3.14" dependencies = [ From 43b12f8adffdbf3bfc5969087fc35b10ae891ea7 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 19:24:32 -0700 Subject: [PATCH 3/8] Pin schema dependency of API (not just default value for ingest script) --- pyproject.toml | 4 ++-- uv.lock | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index df263d6..30f6cfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,9 +25,9 @@ dependencies = [ # "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git" # # Note: To depend upon the package built from the contents of a specific commit in the `bertron-schema` repository, use: - # "bertron-schema @ https://raw.githubusercontent.com/ber-data/bertron-schema/{COMMIT_HASH}/src/schema/jsonschema/bertron_schema.json" + # "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git@{COMMIT_HASH}" # - "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git", + "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git@82498f5f5cbc71ed7abf71b8e2c01d15c003f8d8", # "dtspy @ https://github.com/kbase/dtspy/archive/730828cff3924fc4b2215fe5c1b67bc04aad377f.tar.gz", "fastapi[standard]>=0.115.12", # `httpx` is a dependency of FastAPI's `TestClient` class, which we use diff --git a/uv.lock b/uv.lock index f8bf59b..78c93e9 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10.0, <3.14" resolution-markers = [ "python_full_version >= '3.12'", @@ -129,7 +129,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "bertron-schema", git = "https://github.com/ber-data/bertron-schema.git" }, + { name = "bertron-schema", git = "https://github.com/ber-data/bertron-schema.git?rev=82498f5f5cbc71ed7abf71b8e2c01d15c003f8d8" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.115.12" }, { name = "httpx", specifier = ">=0.28.1" }, { name = "jsonschema", specifier = ">=4.0.0" }, @@ -151,7 +151,7 @@ dev = [ [[package]] name = "bertron-schema" version = "0.1.0" -source = { git = "https://github.com/ber-data/bertron-schema.git#96cbe257717d44137440be369a8414d153579926" } +source = { git = "https://github.com/ber-data/bertron-schema.git?rev=82498f5f5cbc71ed7abf71b8e2c01d15c003f8d8#82498f5f5cbc71ed7abf71b8e2c01d15c003f8d8" } dependencies = [ { name = "linkml" }, { name = "linkml-runtime" }, @@ -971,7 +971,7 @@ wheels = [ [[package]] name = "linkml" -version = "1.9.2" +version = "1.9.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "antlr4-python3-runtime" }, @@ -1000,14 +1000,14 @@ dependencies = [ { name = "typing-extensions", marker = "python_full_version < '3.12'" }, { name = "watchdog" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/77/e2/23136b7e063159dc25cb865ef10f1ceca5606136bbed469efe7a061cf707/linkml-1.9.2.tar.gz", hash = "sha256:2f9141d2bc8a93bfe1d4b86a015ad0acbb94c2af099177f5687a50d3331d2b34", size = 260216, upload-time = "2025-05-15T22:21:52.251Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/cf/ab84deb8130d63d9a7baa52cd88b134ea9ad2186eeb4df6cdd3b837e6058/linkml-1.9.3.tar.gz", hash = "sha256:96de208001dae5bde43092ce0f3fab61df4c85231939476dc3f93d0b5b0d4590", size = 263725, upload-time = "2025-07-30T19:58:33.837Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/33/19/430f4907cdad687af4c14e495c7027ed96d1bbb1901609838d55b2b32c4e/linkml-1.9.2-py3-none-any.whl", hash = "sha256:4c9cf217948367df8a20cdf68e8f6da24ba23ab97a551f8ae32e9d4264e702cc", size = 333519, upload-time = "2025-05-15T22:21:50.067Z" }, + { url = "https://files.pythonhosted.org/packages/91/54/5bb8f9fd0fbdd076a631bcae7c5aa3f8c1447e04650b79e45f77fab661e4/linkml-1.9.3-py3-none-any.whl", hash = "sha256:77f2e566ce03f897bc0a9dc49d4d933a859d2a78ef56f080c9a0f8415becb884", size = 336474, upload-time = "2025-07-30T19:58:30.483Z" }, ] [[package]] name = "linkml-runtime" -version = "1.9.3" +version = "1.9.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -1025,9 +1025,9 @@ dependencies = [ { name = "rdflib" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/24/3c/d0ec2b9b2472a06fb43784f67c89ebcaa6dfb69f9c9bc19c8ed358c88045/linkml_runtime-1.9.3.tar.gz", hash = "sha256:1b65358bf91868b7607675abb98c26597873bb45f73ab309b7d4c31a84e58e1b", size = 479939, upload-time = "2025-06-02T16:52:42.878Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/38/38ac19a5b81982f03709cda0a008327dc775d11f008d0cbdc0f2a5389e24/linkml_runtime-1.9.5.tar.gz", hash = "sha256:78dc1383adf11ad5f20bb11b6adde56ed566fbd2429a292d57699ad4596c738a", size = 480288, upload-time = "2025-08-15T22:22:51.098Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/64/93e53462fc222d6cbf6094781e55a6b3d129184f89dc533237beb544d62b/linkml_runtime-1.9.3-py3-none-any.whl", hash = "sha256:39a8aa51b40decd58fd04f4c02a213aad06b971df4c042aa7764f4b75cc09aa8", size = 577670, upload-time = "2025-06-02T16:52:40.853Z" }, + { url = "https://files.pythonhosted.org/packages/34/28/cdcbe1f0521a98b891dd30249513eef1ddcc7bb406be953b4a8d7825e68f/linkml_runtime-1.9.5-py3-none-any.whl", hash = "sha256:fece3e8aa25a4246165c6528b6a7fe83a929b985d2ce1951cc8a0f4da1a30b90", size = 576405, upload-time = "2025-08-15T22:22:49.264Z" }, ] [[package]] From 22a6fa0f550df77990f83d47cb5b5831a17b5b82 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 19:25:05 -0700 Subject: [PATCH 4/8] Update pinned schema version used as default value in ingest script --- mongodb/ingest_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongodb/ingest_data.py b/mongodb/ingest_data.py index 8be3e3f..59807e2 100644 --- a/mongodb/ingest_data.py +++ b/mongodb/ingest_data.py @@ -198,7 +198,7 @@ def main(): parser.add_argument("--db-name", default="bertron", help="MongoDB database name") parser.add_argument( "--schema-path", - default="https://raw.githubusercontent.com/ber-data/bertron-schema/96cbe257717d44137440be369a8414d153579926/src/schema/jsonschema/bertron_schema.json", + default="https://raw.githubusercontent.com/ber-data/bertron-schema/82498f5f5cbc71ed7abf71b8e2c01d15c003f8d8/src/schema/jsonschema/bertron_schema.json", help="Path or URL to the BERtron schema JSON file", ) parser.add_argument( From e196de209734dbca131f90a30921346889523a12 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 19:25:30 -0700 Subject: [PATCH 5/8] Update test data so it conforms to pinned schema version --- tests/data/gold-example.json | 4 ++-- tests/data/monet-example.json | 4 ++-- tests/data/nmdc-example.json | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/data/gold-example.json b/tests/data/gold-example.json index 3abe969..b65ada7 100644 --- a/tests/data/gold-example.json +++ b/tests/data/gold-example.json @@ -6,8 +6,8 @@ "altitude": null, "depth": null, "elevation": { - "has_numeric_value": 2280, - "has_unit": "meter (UO:0000008)" + "numeric_value": 2280, + "unit": "meter (UO:0000008)" } }, "entity_type": [ diff --git a/tests/data/monet-example.json b/tests/data/monet-example.json index 1e9044c..d99dcea 100644 --- a/tests/data/monet-example.json +++ b/tests/data/monet-example.json @@ -6,8 +6,8 @@ "altitude": null, "depth": null, "elevation": { - "has_numeric_value": 722.613, - "has_unit": "unknown" + "numeric_value": 722.613, + "unit": "unknown" } }, "entity_type": [ diff --git a/tests/data/nmdc-example.json b/tests/data/nmdc-example.json index 8a9e766..a514de0 100644 --- a/tests/data/nmdc-example.json +++ b/tests/data/nmdc-example.json @@ -5,14 +5,14 @@ "longitude": -81.434174, "altitude": null, "depth": { - "has_minimum_numeric_value": 0, - "has_maximum_numeric_value": 0.1, - "has_unit": "m", - "has_raw_value": "0 - 0.1m" + "minimum_numeric_value": 0, + "maximum_numeric_value": 0.1, + "unit": "m", + "raw_value": "0 - 0.1m" }, "elevation": { - "has_numeric_value": 24, - "has_unit": "m" + "numeric_value": 24, + "unit": "m" } }, "entity_type": [ From 548f1d6e70658a078f88c65d69a4f69894489fff Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 19:26:26 -0700 Subject: [PATCH 6/8] Add instructions for running tests from container shell --- CONTRIBUTING.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a9fed31..bb67efb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -103,6 +103,15 @@ Run the tests: docker compose up test ``` +If you plan to run the tests multiple times, we'd recommend running a shell within the `test` container and—from there—running the tests (as many times as you want). That will also enable syntax highlighting of the test results. + +```sh +docker compose run --rm -it test bash + +# In the container: +uv run --active pytest -v +``` +
Show/hide FAQ about the ingest script's role in testing From 073f8405827e3f36514fa5ac4d30a7f428a81954 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 19:31:15 -0700 Subject: [PATCH 7/8] Clarify comment about specifying dependency version --- pyproject.toml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 30f6cfa..0efddc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,11 +21,17 @@ description = "Cross-BER Data Integration" readme = "README.md" requires-python = ">=3.10.0,<3.14" dependencies = [ - # Note: To depend upon the package built from the contents of the latest commit on the default branch in the `bertron-schema` repository, use: + # Note: To depend upon the package built from the contents of the _latest commit_ + # on the _default branch_ in the `bertron-schema` repository, use: + # ``` # "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git" - # - # Note: To depend upon the package built from the contents of a specific commit in the `bertron-schema` repository, use: + # ``` + # To depend upon the package built from the contents of a _specific commit_ + # in the `bertron-schema` repository, use: + # ``` # "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git@{COMMIT_HASH}" + # ``` + # Reference: https://pip.pypa.io/en/stable/topics/vcs-support/ # "bertron-schema @ git+https://github.com/ber-data/bertron-schema.git@82498f5f5cbc71ed7abf71b8e2c01d15c003f8d8", # "dtspy @ https://github.com/kbase/dtspy/archive/730828cff3924fc4b2215fe5c1b67bc04aad377f.tar.gz", From 731b715c0fa21901ba8d23801a9e298f9959dd6f Mon Sep 17 00:00:00 2001 From: eecavanna Date: Mon, 18 Aug 2025 19:32:03 -0700 Subject: [PATCH 8/8] Refrain from using deprecated function to get timestamp --- mongodb/ingest_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mongodb/ingest_data.py b/mongodb/ingest_data.py index 59807e2..ceed102 100644 --- a/mongodb/ingest_data.py +++ b/mongodb/ingest_data.py @@ -5,7 +5,7 @@ import logging import os import sys -from datetime import datetime +from datetime import datetime, UTC from typing import Dict, Optional from schema.datamodel.bertron_schema_pydantic import Entity @@ -100,7 +100,7 @@ def insert_entity(self, entity: Dict) -> Optional[str]: try: # Add metadata entity["_metadata"] = { - "ingested_at": datetime.utcnow(), + "ingested_at": datetime.now(UTC), "schema_version": self.schema.get("version", "unknown"), }