From 08d7877554faecf1af57f0cee477c08ce642dfe7 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 13:36:17 +0100 Subject: [PATCH 01/14] add first draft of schema --- hepdata/analysis_example.json | 32 ++++++++ hepdata/templates/analysis_schema.json | 101 +++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 hepdata/analysis_example.json create mode 100644 hepdata/templates/analysis_schema.json diff --git a/hepdata/analysis_example.json b/hepdata/analysis_example.json new file mode 100644 index 000000000..694773828 --- /dev/null +++ b/hepdata/analysis_example.json @@ -0,0 +1,32 @@ +{ + "tool": "SModelS", + "version": "3.0.0", + "url_templates": { + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", + "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" + }, + "analyses" : [ + { + "inspire_id": 1795075, + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-47a" + }, + { + "name" : "ATLAS-EXOT-2018-47b" + } + ] + }, + { + "inspire_id": 1795076, + "signature_type": "prompt", + "pretty_name": "di-top resonance", + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-48", + "path": "13TeV/ATLAS/{name}/" + } + ] + } + ] +} \ No newline at end of file diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json new file mode 100644 index 000000000..72a216191 --- /dev/null +++ b/hepdata/templates/analysis_schema.json @@ -0,0 +1,101 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "HEPData_analysis_tool_schema", + "title": "HEPData analysis tool schema", + "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", + "type": "object", + "required": ["tool", "version", "url_templates", "analyses"], + + "properties": { + "tool": { + "description": "The tool used to implement the analysis", + "type": "string" + }, + + "version": { + "description": "The version of the tool used to implement the analysis", + "type": "string" + }, + + "url_templates": { + "description": "Templates for URLs to the main repository and important other pages", + "type": "object", + "required": ["main_url"], + + "properties": { + "main_url": { + "description": "The URL template for the main repository. Should contain e.g. a {name} placeholder for the analysis name.", + "type": "string" + }, + "val_url": { + "description": "The URL template for the validation page. Should contain e.g. a {name} placeholder for the analysis name.", + "type": "string" + } + } + }, + + "analyses": { + "description": "The analyses implemented in the tool", + "type": "array", + "items": { + "type": "object", + "$ref": "#/$defs/Analysis", + "minItems": 1, + "uniqueItems": true + } + } + }, + + "$defs": { + + "Analysis": { + "description": "An analysis, identified by the INSPIRE ID, implemented at least once in a tool", + "type": "object", + "required": ["inspire_id", "implementations"], + + "properties": { + "inspire_id": { + "description": "The INSPIRE ID of the analysis", + "type": "number" + }, + "implementations":{ + "description": "The implementations of the analysis in the tool", + "type": "array", + "item": { + "type": "object", + "$ref": "#/$defs/Implementation", + "minItems": 1, + "uniqueItems": true + } + }, + "signature_type": { + "description": "The signature of the analysis, e.g. 'prompt', 'displaced'", + "type": "string" + }, + "pretty_name": { + "description": "A pretty name for the analysis", + "type": "string" + } + } + }, + + "Implementation": { + "description": "An implementation of an analysis in a tool, giving the internal name to retrieve information", + "type": "object", + "required": ["name"], + + "properties": { + "name": { + "description": "Internal name of the implementation", + "type": "string" + }, + "path": { + "description": "The path to the implementation in the tool", + "type": "string" + } + } + } + + } + +} From 60e448d838f0e785f8305c3ec86979aa70f51aef Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 14:00:07 +0100 Subject: [PATCH 02/14] move test file and add schema test --- tests/analysis_schema_test.py | 39 +++++++++++++++++++ .../test_data}/analysis_example.json | 0 2 files changed, 39 insertions(+) create mode 100644 tests/analysis_schema_test.py rename {hepdata => tests/test_data}/analysis_example.json (100%) diff --git a/tests/analysis_schema_test.py b/tests/analysis_schema_test.py new file mode 100644 index 000000000..b2000fda3 --- /dev/null +++ b/tests/analysis_schema_test.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# This file is part of HEPData. +# Copyright (C) 2016 CERN. +# +# HEPData is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# HEPData is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HEPData; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +import json +import jsonschema + +def test_analysis_json_schema(): + schema_file_name = "../hepdata/templates/analysis_schema.json" + test_file_name = "test_data/analysis_example.json" + + with open(schema_file_name) as f: + schema = json.load(f) + with open(test_file_name) as f: + test = json.load(f) + + jsonschema.validate(instance=test, schema=schema) + +if __name__ == "__main__": + test_analysis_json_schema() diff --git a/hepdata/analysis_example.json b/tests/test_data/analysis_example.json similarity index 100% rename from hepdata/analysis_example.json rename to tests/test_data/analysis_example.json From 5f6e5afef892c16f458d32a624a1ca41a09390a5 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 14:30:52 +0100 Subject: [PATCH 03/14] add jsonschema package to CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a64aede67..3cccfad35 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -137,7 +137,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools py - python -m pip install twine wheel coveralls requirements-builder + python -m pip install twine wheel coveralls jsonschema requirements-builder python -m pip install --force-reinstall -r requirements.txt python -m pip install -e .[tests] - name: Initialise hepdata From b49198a572c8be2b173b13609c8903827c493074 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 16:06:33 +0100 Subject: [PATCH 04/14] fix schema path --- .github/workflows/ci.yml | 2 +- tests/analysis_schema_test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3cccfad35..a64aede67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -137,7 +137,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools py - python -m pip install twine wheel coveralls jsonschema requirements-builder + python -m pip install twine wheel coveralls requirements-builder python -m pip install --force-reinstall -r requirements.txt python -m pip install -e .[tests] - name: Initialise hepdata diff --git a/tests/analysis_schema_test.py b/tests/analysis_schema_test.py index b2000fda3..0a85c27c3 100644 --- a/tests/analysis_schema_test.py +++ b/tests/analysis_schema_test.py @@ -23,10 +23,12 @@ # as an Intergovernmental Organization or submit itself to any jurisdiction. import json import jsonschema +import os def test_analysis_json_schema(): - schema_file_name = "../hepdata/templates/analysis_schema.json" - test_file_name = "test_data/analysis_example.json" + base_dir = os.path.dirname(os.path.realpath(__file__)) + schema_file_name = os.path.join(base_dir, "..", "hepdata", "templates", "analysis_schema.json") + test_file_name = os.path.join(base_dir, "test_data", "analysis_example.json") with open(schema_file_name) as f: schema = json.load(f) From 27564c60494d6cbea64341df53255a53a4e09841 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 2 May 2025 14:46:16 +0100 Subject: [PATCH 05/14] add optional field 'implementations_license' --- hepdata/templates/analysis_schema.json | 17 +++++++ tests/test_data/analysis_example.json | 63 ++++++++++++++------------ 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 72a216191..b7b48ae04 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -43,6 +43,23 @@ "minItems": 1, "uniqueItems": true } + }, + + "implementations_license": { + "description": "The license for the implementations of the analyses in the tool", + "type": "object", + "required": ["name", "url"], + + "properties": { + "name": { + "description": "The name of the license", + "type": "string" + }, + "url": { + "description": "The URL to the license", + "type": "string" + } + } } }, diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index 694773828..47dcb68a4 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,32 +1,37 @@ { - "tool": "SModelS", - "version": "3.0.0", - "url_templates": { - "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", - "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" + "tool": "SModelS", + "version": "3.0.0", + "url_templates": { + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", + "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" + }, + "analyses" : [ + { + "inspire_id": 1795075, + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-47" + } + ] }, - "analyses" : [ - { - "inspire_id": 1795075, - "implementations": [ - { - "name" : "ATLAS-EXOT-2018-47a" - }, - { - "name" : "ATLAS-EXOT-2018-47b" - } - ] - }, - { - "inspire_id": 1795076, - "signature_type": "prompt", - "pretty_name": "di-top resonance", - "implementations": [ - { - "name" : "ATLAS-EXOT-2018-48", - "path": "13TeV/ATLAS/{name}/" - } - ] - } - ] + { + "inspire_id": 1795076, + "signature_type": "prompt", + "pretty_name": "di-top resonance", + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-48", + "path": "13TeV/ATLAS/{name}/" + }, + { + "name" : "ATLAS-EXOT-2018-48b", + "path": "13TeV/ATLAS/{name}/" + } + ] + } + ], + "implementations_license": { + "name": "cc-by-4.0", + "url": "https://creativecommons.org/licenses/by/4.0" + } } \ No newline at end of file From 0bddeb9f011d3560733948ed44c315db2d844869 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 2 May 2025 14:50:38 +0100 Subject: [PATCH 06/14] specify used default license --- hepdata/templates/analysis_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index b7b48ae04..55bad6ebe 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -46,7 +46,7 @@ }, "implementations_license": { - "description": "The license for the implementations of the analyses in the tool", + "description": "The license for the implementations of the analyses in the tool. Taken to be CC0 if not specified.", "type": "object", "required": ["name", "url"], From f05c92a3446f89870562b3b902939bdbfd35f8d4 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 2 May 2025 16:08:42 +0100 Subject: [PATCH 07/14] make 'license' field consistent with hepdata-validator (https://github.com/HEPData/hepdata-validator/blob/91b182772eac3a6d01451b98e4e24a9e7a865887/hepdata_validator/schemas/1.1.1/additional_resources_schema.json\#L12-L21): limit number of characters in license, add 'description' field --- hepdata/templates/analysis_schema.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 55bad6ebe..837e364a3 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -49,14 +49,21 @@ "description": "The license for the implementations of the analyses in the tool. Taken to be CC0 if not specified.", "type": "object", "required": ["name", "url"], + "additionalProperties": false, "properties": { "name": { "description": "The name of the license", - "type": "string" + "type": "string", + "maxLength": 256 }, "url": { "description": "The URL to the license", + "type": "string", + "maxLength": 256 + }, + "description": { + "description": "A description of the license", "type": "string" } } From 51445c1053efd656d0ecea3b1854e2d6508ce164 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 6 May 2025 16:59:10 +0100 Subject: [PATCH 08/14] add tool_type field --- hepdata/templates/analysis_schema.json | 8 +++++++- tests/test_data/analysis_example.json | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 837e364a3..05988f77a 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -4,7 +4,7 @@ "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "url_templates", "analyses"], + "required": ["tool", "version", "tool_type", "url_templates", "analyses"], "properties": { "tool": { @@ -17,6 +17,12 @@ "type": "string" }, + "tool_type":{ + "description": "The type of the information provided for the analyses by the tool, see enum choices", + "type": "string", + "enum": ["Statistical model", "Simplified analysis"] + }, + "url_templates": { "description": "Templates for URLs to the main repository and important other pages", "type": "object", diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index 47dcb68a4..0ddc3e1de 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,6 +1,7 @@ { "tool": "SModelS", "version": "3.0.0", + "tool_type": "Simplified analysis", "url_templates": { "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" From 503ff8f0bb81e7cacf12bb7705d6468959f8f64e Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 13 May 2025 10:04:49 +0100 Subject: [PATCH 09/14] rename 'tool_type' field to 'implementations_description' --- hepdata/templates/analysis_schema.json | 9 ++++----- tests/test_data/analysis_example.json | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 05988f77a..5100c70fb 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -4,7 +4,7 @@ "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "tool_type", "url_templates", "analyses"], + "required": ["tool", "version", "implementations_description", "url_templates", "analyses"], "properties": { "tool": { @@ -17,10 +17,9 @@ "type": "string" }, - "tool_type":{ - "description": "The type of the information provided for the analyses by the tool, see enum choices", - "type": "string", - "enum": ["Statistical model", "Simplified analysis"] + "implementations_description":{ + "description": "The type of information provided for the analyses by the tool", + "type": "string" }, "url_templates": { diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index 0ddc3e1de..ac8b14e2d 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,7 +1,7 @@ { "tool": "SModelS", "version": "3.0.0", - "tool_type": "Simplified analysis", + "implementations_description": "SModelS analysis", "url_templates": { "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" From 0d972a956c589a4a7f3808f7bfa24414df1071ad Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 24 Jun 2025 15:22:59 +0100 Subject: [PATCH 10/14] add date_created to schema --- hepdata/templates/analysis_schema.json | 8 +++++++- tests/test_data/analysis_example.json | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 5100c70fb..b870c6cef 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -4,7 +4,7 @@ "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "implementations_description", "url_templates", "analyses"], + "required": ["tool", "version", "date_created", "implementations_description", "url_templates", "analyses"], "properties": { "tool": { @@ -17,6 +17,12 @@ "type": "string" }, + "date_created": { + "description": "The date at which the JSON file was created, formatted as RFC 3339, section 5.6 (https://json-schema.org/understanding-json-schema/reference/type#dates-and-times), e.g. 2018-11-13T20:20:39+00:00", + "type": "string", + "format": "date-time" + }, + "implementations_description":{ "description": "The type of information provided for the analyses by the tool", "type": "string" diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index ac8b14e2d..c3b93d6de 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,6 +1,7 @@ { "tool": "SModelS", "version": "3.0.0", + "date_created": "2018-11-13T20:20:39+00:00", "implementations_description": "SModelS analysis", "url_templates": { "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", From 9f204bacfcdafe679ab6ed6770b80493c19b3178 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 13 Aug 2025 11:36:52 +0100 Subject: [PATCH 11/14] introduce schema_version field --- .../{analysis_schema.json => analyses_schema.json} | 9 +++++++-- tests/analysis_schema_test.py | 4 ++-- .../{analysis_example.json => analyses_example.json} | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) rename hepdata/templates/{analysis_schema.json => analyses_schema.json} (92%) rename tests/test_data/{analysis_example.json => analyses_example.json} (96%) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analyses_schema.json similarity index 92% rename from hepdata/templates/analysis_schema.json rename to hepdata/templates/analyses_schema.json index b870c6cef..c12e05baf 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analyses_schema.json @@ -1,12 +1,17 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "HEPData_analysis_tool_schema", + "$id": "https://hepdata.net/analyses/schemas/1.0.0/analyses_schema.json", "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "date_created", "implementations_description", "url_templates", "analyses"], + "required": ["schema_version", "tool", "version", "date_created", "implementations_description", "url_templates", "analyses"], "properties": { + "schema_version": { + "description": "The version of the JSON schema applying to this file", + "const": "1.0.0" + }, + "tool": { "description": "The tool used to implement the analysis", "type": "string" diff --git a/tests/analysis_schema_test.py b/tests/analysis_schema_test.py index 0a85c27c3..25e44810b 100644 --- a/tests/analysis_schema_test.py +++ b/tests/analysis_schema_test.py @@ -27,8 +27,8 @@ def test_analysis_json_schema(): base_dir = os.path.dirname(os.path.realpath(__file__)) - schema_file_name = os.path.join(base_dir, "..", "hepdata", "templates", "analysis_schema.json") - test_file_name = os.path.join(base_dir, "test_data", "analysis_example.json") + schema_file_name = os.path.join(base_dir, "..", "hepdata", "templates", "analyses_schema.json") + test_file_name = os.path.join(base_dir, "test_data", "analyses_example.json") with open(schema_file_name) as f: schema = json.load(f) diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analyses_example.json similarity index 96% rename from tests/test_data/analysis_example.json rename to tests/test_data/analyses_example.json index c3b93d6de..b9ab8ed80 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analyses_example.json @@ -1,4 +1,5 @@ { + "schema_version" : "1.0.0", "tool": "SModelS", "version": "3.0.0", "date_created": "2018-11-13T20:20:39+00:00", From f4dd17533a291bb085caf6f2ec0e358d48d682c5 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 13 Aug 2025 14:13:00 +0100 Subject: [PATCH 12/14] more renames, add readme --- hepdata/templates/analyses_schema.json | 6 +- hepdata/templates/readme.md | 111 ++++++++++++++++++ ...schema_test.py => analyses_schema_test.py} | 0 3 files changed, 114 insertions(+), 3 deletions(-) create mode 100644 hepdata/templates/readme.md rename tests/{analysis_schema_test.py => analyses_schema_test.py} (100%) diff --git a/hepdata/templates/analyses_schema.json b/hepdata/templates/analyses_schema.json index c12e05baf..4a6ff2926 100644 --- a/hepdata/templates/analyses_schema.json +++ b/hepdata/templates/analyses_schema.json @@ -13,12 +13,12 @@ }, "tool": { - "description": "The tool used to implement the analysis", + "description": "The name of the tool used to implement the analyses", "type": "string" }, "version": { - "description": "The version of the tool used to implement the analysis", + "description": "The version of the tool used to implement the analyses", "type": "string" }, @@ -28,7 +28,7 @@ "format": "date-time" }, - "implementations_description":{ + "implementations_description": { "description": "The type of information provided for the analyses by the tool", "type": "string" }, diff --git a/hepdata/templates/readme.md b/hepdata/templates/readme.md new file mode 100644 index 000000000..6b1026c43 --- /dev/null +++ b/hepdata/templates/readme.md @@ -0,0 +1,111 @@ +# About the analyses JSON schema + +This readme details a JSON schema which is used by reinterpretation tools to communicate to HEPData which analyses are implemented in that tool and where to find the implementations. + +## Goals +- **Self-descriptiveness**: the JSON format includes information about the tool and tool version it's valid for as well as basic information of the analyses implemented in the tool. + It also allows tools to include very rough human-readable information instead of just bare identifiers. +- **Standardisation**: a common standard for everyone ensures easy exchange and findability of information. +- **Future-proofness**: the standard aims to foresee future needs such that it doesn't require frequent updates. +- **Redundancy reduction**: the JSON format allows to codify URLs such that the URL stem doesn't have to be repeated. + This makes it more compact, better human-readable and better maintainable. + +## The standard + +### Required fields +The following fields are required by the analyses JSON standard: +- **schema_version** (`const`): the version of the analyses JSON schema applying the the file. + Currently 1.0.0. +- **tool** (`string`): the name of the tool used to implement the analyses. +- **version** (`string`): the version of the tool used to implement the analyses. +- **date_created** (`string` in `date-time` format): the date at which the JSON file was created, formatted as [RFC 3339, section 5.6](https://json-schema.org/understanding-json-schema/reference/type#dates-and-times), e.g. "2018-11-13T20:20:39+00:00". +- **implementations_description** (`string`): the type of information provided for the analyses by the tool. + This information is used to provide text describing links to the analysis implementation on HEPData and INSPIRE. +- **url_templates** (`dict`): a dictionary of templates for URLs to the main tool repository and important other pages. + + It has to include the following fields: + - **main_url** (`string`): the URL template for the main repository. + Should contain e.g. a "{name}" placeholder for the analysis name. +- **analyses** (`array`): an array of analyses implemented in the tool. + All entries have to be unique. + Needs at least one entry. + Each array item has to have the following fields: + - **inspire_id** (`number`): the INSPIRE ID of the analysis. + - **implementations** (`array`): an array of the various implementations of the analysis in the tool. + All entries have to be unique. + Needs at least one entry. + + Each array item has to have the following fields: + - **name** (`string`): the internal name of the implementation used to retrieve information. + +### Additional standardised fields +The following fields are included in the standard but not required: + +- **url_templates** (`dict`): the URL templates dict can also have the following fields: + - **val_url** (`string`): the URL template for the validation page. + Should contain e.g. a `{name}` placeholder for the analysis name. +- **analyses** (`array`): the analyses array can also have the following fields: + - **signature_type** (`string`): the signature of the analysis, e.g. 'prompt', 'displaced'. + - **pretty_name** (`string`): a pretty name for the analysis. + - **implementations** (`array`): the implementations array can also have the following fields: + - **path** (`string`): the path to the implementation in the tool. +- **implementations_license** (`dict`): a dictionary describing the license for the implementations of the analyses in the tool. + Taken to be CC0 if not specified. + + It *has to* include the following fields: + - **name** (`string`): the name of the license. + The maximum length for this field is 256 characters. + - **url** (`string`): the URL to the license. + The maximum length for this field is 256 characters. + + It *can* include the following fields: + - **description** (`string`): a description of the license + + No other fields are allowed. + + +### Additional unknown fields +Apart from the fields mentioned above, the standard allows for any number of additional fields. +These are however not standardised are not being being checked by the schema. + + +## Examples +A minimal example for an analyses JSON adhering to the standard looks like this: +```JSON +{ + "schema_version" : "1.0.0", + "tool": "SModelS", + "version": "3.0.0", + "date_created": "2018-11-13T20:20:39+00:00", + "implementations_description": "SModelS analysis", + "url_templates": { + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}" + }, + "analyses" : [ + { + "inspire_id": 1795076, + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-48", + } + ] + } + ] +} +``` +See [here](../../tests/test_data/analyses_example.json) for a more elaborate example. + +## Testing an implementation + +Whether an analyses JSON file adheres to the standard defined here, can be with python checked as follows: +```python +import json +import jsonschema + +with open("analyses_schema.json") as f: + schema = json.load(f) +with open("analyses_example.json") as f: + test = json.load(f) + +jsonschema.validate(instance=test, schema=schema) +``` \ No newline at end of file diff --git a/tests/analysis_schema_test.py b/tests/analyses_schema_test.py similarity index 100% rename from tests/analysis_schema_test.py rename to tests/analyses_schema_test.py From 698e8c57d373182140debbcd231fff93f87eed92 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 13 Aug 2025 14:18:50 +0100 Subject: [PATCH 13/14] correct url --- hepdata/templates/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepdata/templates/readme.md b/hepdata/templates/readme.md index 6b1026c43..f8362ea63 100644 --- a/hepdata/templates/readme.md +++ b/hepdata/templates/readme.md @@ -79,7 +79,7 @@ A minimal example for an analyses JSON adhering to the standard looks like this: "date_created": "2018-11-13T20:20:39+00:00", "implementations_description": "SModelS analysis", "url_templates": { - "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}" + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{name}" }, "analyses" : [ { From 34df6e12dfb33b2f71205cf36f02b3b21c971b3f Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Thu, 14 Aug 2025 14:57:46 +0100 Subject: [PATCH 14/14] include schema validation --- hepdata/modules/records/utils/analyses.py | 207 +++++++++++++++++----- tests/analyses_schema_test.py | 6 +- 2 files changed, 162 insertions(+), 51 deletions(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 3893339fd..7aba654af 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -23,27 +23,42 @@ # as an Intergovernmental Organization or submit itself to any jurisdiction. import logging +import os from celery import shared_task from flask import current_app from invenio_db import db import requests +import json +import jsonschema from hepdata.ext.opensearch.api import index_record_ids from hepdata.modules.submission.api import get_latest_hepsubmission, is_resource_added_to_submission from hepdata.modules.submission.models import DataResource, HEPSubmission, data_reference_link +from hepdata.utils.users import get_user_from_id +from hepdata.modules.records.subscribers.rest import subscribe +from hepdata.modules.records.subscribers.api import is_current_user_subscribed_to_record +from hepdata.modules.records.utils.common import get_license logging.basicConfig() log = logging.getLogger(__name__) +def get_analyses_schema(): + schema_path = os.path.join("hepdata", "templates", "analyses_schema.json") + with open(schema_path) as f: + return json.load(f) @shared_task def update_analyses(endpoint=None): """ - Update (Rivet and MadAnalysis 5) analyses and remove outdated resources. + Update (Rivet, MadAnalysis 5, SModelS, CheckMATE, HackAnalysis and Combine) analyses and remove outdated resources. + Allow bulk subscription to record update notifications if "subscribe_user_id" in endpoint. + Add optional "description" and "license" fields if present in endpoint. - :param endpoint: either "Rivet" or "MadAnalysis" or None (default) for both + :param endpoint: either "rivet" or "MadAnalysis" or "SModelS" or "CheckMATE" or "HackAnalysis" or "Combine" or None (default) for all """ + analyses_schema = get_analyses_schema() + endpoints = current_app.config["ANALYSES_ENDPOINTS"] for analysis_endpoint in endpoints: @@ -58,54 +73,133 @@ def update_analyses(endpoint=None): if response and response.status_code == 200: - analyses = response.json() - analysis_resources = DataResource.query.filter_by(file_type=analysis_endpoint).all() - # Check for missing analyses. - for record in analyses: - submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') - - if submission: - num_new_resources = 0 - - for analysis in analyses[record]: - _resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis) - - if not is_resource_added_to_submission(submission.publication_recid, submission.version, - _resource_url): - - log.info('Adding {} analysis to ins{} with URL {}'.format( - analysis_endpoint, record, _resource_url) - ) - new_resource = DataResource( - file_location=_resource_url, - file_type=analysis_endpoint) - - submission.resources.append(new_resource) - num_new_resources += 1 - - else: - - # Remove resource from 'analysis_resources' list. - resource = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))[0] - analysis_resources.remove(resource) - - if num_new_resources: - - try: - db.session.add(submission) - db.session.commit() - latest_submission = get_latest_hepsubmission(inspire_id=record) - if submission.version == latest_submission.version: - index_record_ids([submission.publication_recid]) - except Exception as e: - db.session.rollback() - log.error(e) - - else: - log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format( - analysis_endpoint, record)) + r_json = response.json() + try: + jsonschema.validate(instance=r_json, schema=analyses_schema) + new_json = True + except jsonschema.ValidationError: + new_json = False + + if new_json: + + # Check for missing analyses. + for ana in r_json["analyses"]: + inspire_id = ana["inspire_id"] + submission = get_latest_hepsubmission(inspire_id=str(inspire_id), overall_status='finished') # TODO: make inspire_id an int + + if submission: + num_new_resources = 0 + + for implementation in ana["implementations"]: + ana_name = implementation["name"] + ana_path = implementation["path"] if "path" in implementation else "" + _resource_url = r_json["url_templates"]["main_url"] + prev_url = None + n_tries, max_tries = 0, 10 + while _resource_url!=prev_url and n_tries