From 08d7877554faecf1af57f0cee477c08ce642dfe7 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 13:36:17 +0100 Subject: [PATCH 01/32] add first draft of schema --- hepdata/analysis_example.json | 32 ++++++++ hepdata/templates/analysis_schema.json | 101 +++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 hepdata/analysis_example.json create mode 100644 hepdata/templates/analysis_schema.json diff --git a/hepdata/analysis_example.json b/hepdata/analysis_example.json new file mode 100644 index 000000000..694773828 --- /dev/null +++ b/hepdata/analysis_example.json @@ -0,0 +1,32 @@ +{ + "tool": "SModelS", + "version": "3.0.0", + "url_templates": { + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", + "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" + }, + "analyses" : [ + { + "inspire_id": 1795075, + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-47a" + }, + { + "name" : "ATLAS-EXOT-2018-47b" + } + ] + }, + { + "inspire_id": 1795076, + "signature_type": "prompt", + "pretty_name": "di-top resonance", + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-48", + "path": "13TeV/ATLAS/{name}/" + } + ] + } + ] +} \ No newline at end of file diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json new file mode 100644 index 000000000..72a216191 --- /dev/null +++ b/hepdata/templates/analysis_schema.json @@ -0,0 +1,101 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "HEPData_analysis_tool_schema", + "title": "HEPData analysis tool schema", + "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", + "type": "object", + "required": ["tool", "version", "url_templates", "analyses"], + + "properties": { + "tool": { + "description": "The tool used to implement the analysis", + "type": "string" + }, + + "version": { + "description": "The version of the tool used to implement the analysis", + "type": "string" + }, + + "url_templates": { + "description": "Templates for URLs to the main repository and important other pages", + "type": "object", + "required": ["main_url"], + + "properties": { + "main_url": { + "description": "The URL template for the main repository. Should contain e.g. a {name} placeholder for the analysis name.", + "type": "string" + }, + "val_url": { + "description": "The URL template for the validation page. Should contain e.g. a {name} placeholder for the analysis name.", + "type": "string" + } + } + }, + + "analyses": { + "description": "The analyses implemented in the tool", + "type": "array", + "items": { + "type": "object", + "$ref": "#/$defs/Analysis", + "minItems": 1, + "uniqueItems": true + } + } + }, + + "$defs": { + + "Analysis": { + "description": "An analysis, identified by the INSPIRE ID, implemented at least once in a tool", + "type": "object", + "required": ["inspire_id", "implementations"], + + "properties": { + "inspire_id": { + "description": "The INSPIRE ID of the analysis", + "type": "number" + }, + "implementations":{ + "description": "The implementations of the analysis in the tool", + "type": "array", + "item": { + "type": "object", + "$ref": "#/$defs/Implementation", + "minItems": 1, + "uniqueItems": true + } + }, + "signature_type": { + "description": "The signature of the analysis, e.g. 'prompt', 'displaced'", + "type": "string" + }, + "pretty_name": { + "description": "A pretty name for the analysis", + "type": "string" + } + } + }, + + "Implementation": { + "description": "An implementation of an analysis in a tool, giving the internal name to retrieve information", + "type": "object", + "required": ["name"], + + "properties": { + "name": { + "description": "Internal name of the implementation", + "type": "string" + }, + "path": { + "description": "The path to the implementation in the tool", + "type": "string" + } + } + } + + } + +} From 60e448d838f0e785f8305c3ec86979aa70f51aef Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 14:00:07 +0100 Subject: [PATCH 02/32] move test file and add schema test --- tests/analysis_schema_test.py | 39 +++++++++++++++++++ .../test_data}/analysis_example.json | 0 2 files changed, 39 insertions(+) create mode 100644 tests/analysis_schema_test.py rename {hepdata => tests/test_data}/analysis_example.json (100%) diff --git a/tests/analysis_schema_test.py b/tests/analysis_schema_test.py new file mode 100644 index 000000000..b2000fda3 --- /dev/null +++ b/tests/analysis_schema_test.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# This file is part of HEPData. +# Copyright (C) 2016 CERN. +# +# HEPData is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# HEPData is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HEPData; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +import json +import jsonschema + +def test_analysis_json_schema(): + schema_file_name = "../hepdata/templates/analysis_schema.json" + test_file_name = "test_data/analysis_example.json" + + with open(schema_file_name) as f: + schema = json.load(f) + with open(test_file_name) as f: + test = json.load(f) + + jsonschema.validate(instance=test, schema=schema) + +if __name__ == "__main__": + test_analysis_json_schema() diff --git a/hepdata/analysis_example.json b/tests/test_data/analysis_example.json similarity index 100% rename from hepdata/analysis_example.json rename to tests/test_data/analysis_example.json From 5f6e5afef892c16f458d32a624a1ca41a09390a5 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 14:30:52 +0100 Subject: [PATCH 03/32] add jsonschema package to CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a64aede67..3cccfad35 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -137,7 +137,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools py - python -m pip install twine wheel coveralls requirements-builder + python -m pip install twine wheel coveralls jsonschema requirements-builder python -m pip install --force-reinstall -r requirements.txt python -m pip install -e .[tests] - name: Initialise hepdata From b49198a572c8be2b173b13609c8903827c493074 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 25 Apr 2025 16:06:33 +0100 Subject: [PATCH 04/32] fix schema path --- .github/workflows/ci.yml | 2 +- tests/analysis_schema_test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3cccfad35..a64aede67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -137,7 +137,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools py - python -m pip install twine wheel coveralls jsonschema requirements-builder + python -m pip install twine wheel coveralls requirements-builder python -m pip install --force-reinstall -r requirements.txt python -m pip install -e .[tests] - name: Initialise hepdata diff --git a/tests/analysis_schema_test.py b/tests/analysis_schema_test.py index b2000fda3..0a85c27c3 100644 --- a/tests/analysis_schema_test.py +++ b/tests/analysis_schema_test.py @@ -23,10 +23,12 @@ # as an Intergovernmental Organization or submit itself to any jurisdiction. import json import jsonschema +import os def test_analysis_json_schema(): - schema_file_name = "../hepdata/templates/analysis_schema.json" - test_file_name = "test_data/analysis_example.json" + base_dir = os.path.dirname(os.path.realpath(__file__)) + schema_file_name = os.path.join(base_dir, "..", "hepdata", "templates", "analysis_schema.json") + test_file_name = os.path.join(base_dir, "test_data", "analysis_example.json") with open(schema_file_name) as f: schema = json.load(f) From 27564c60494d6cbea64341df53255a53a4e09841 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 2 May 2025 14:46:16 +0100 Subject: [PATCH 05/32] add optional field 'implementations_license' --- hepdata/templates/analysis_schema.json | 17 +++++++ tests/test_data/analysis_example.json | 63 ++++++++++++++------------ 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 72a216191..b7b48ae04 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -43,6 +43,23 @@ "minItems": 1, "uniqueItems": true } + }, + + "implementations_license": { + "description": "The license for the implementations of the analyses in the tool", + "type": "object", + "required": ["name", "url"], + + "properties": { + "name": { + "description": "The name of the license", + "type": "string" + }, + "url": { + "description": "The URL to the license", + "type": "string" + } + } } }, diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index 694773828..47dcb68a4 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,32 +1,37 @@ { - "tool": "SModelS", - "version": "3.0.0", - "url_templates": { - "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", - "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" + "tool": "SModelS", + "version": "3.0.0", + "url_templates": { + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", + "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" + }, + "analyses" : [ + { + "inspire_id": 1795075, + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-47" + } + ] }, - "analyses" : [ - { - "inspire_id": 1795075, - "implementations": [ - { - "name" : "ATLAS-EXOT-2018-47a" - }, - { - "name" : "ATLAS-EXOT-2018-47b" - } - ] - }, - { - "inspire_id": 1795076, - "signature_type": "prompt", - "pretty_name": "di-top resonance", - "implementations": [ - { - "name" : "ATLAS-EXOT-2018-48", - "path": "13TeV/ATLAS/{name}/" - } - ] - } - ] + { + "inspire_id": 1795076, + "signature_type": "prompt", + "pretty_name": "di-top resonance", + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-48", + "path": "13TeV/ATLAS/{name}/" + }, + { + "name" : "ATLAS-EXOT-2018-48b", + "path": "13TeV/ATLAS/{name}/" + } + ] + } + ], + "implementations_license": { + "name": "cc-by-4.0", + "url": "https://creativecommons.org/licenses/by/4.0" + } } \ No newline at end of file From 0bddeb9f011d3560733948ed44c315db2d844869 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 2 May 2025 14:50:38 +0100 Subject: [PATCH 06/32] specify used default license --- hepdata/templates/analysis_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index b7b48ae04..55bad6ebe 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -46,7 +46,7 @@ }, "implementations_license": { - "description": "The license for the implementations of the analyses in the tool", + "description": "The license for the implementations of the analyses in the tool. Taken to be CC0 if not specified.", "type": "object", "required": ["name", "url"], From f05c92a3446f89870562b3b902939bdbfd35f8d4 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Fri, 2 May 2025 16:08:42 +0100 Subject: [PATCH 07/32] make 'license' field consistent with hepdata-validator (https://github.com/HEPData/hepdata-validator/blob/91b182772eac3a6d01451b98e4e24a9e7a865887/hepdata_validator/schemas/1.1.1/additional_resources_schema.json\#L12-L21): limit number of characters in license, add 'description' field --- hepdata/templates/analysis_schema.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 55bad6ebe..837e364a3 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -49,14 +49,21 @@ "description": "The license for the implementations of the analyses in the tool. Taken to be CC0 if not specified.", "type": "object", "required": ["name", "url"], + "additionalProperties": false, "properties": { "name": { "description": "The name of the license", - "type": "string" + "type": "string", + "maxLength": 256 }, "url": { "description": "The URL to the license", + "type": "string", + "maxLength": 256 + }, + "description": { + "description": "A description of the license", "type": "string" } } From 51445c1053efd656d0ecea3b1854e2d6508ce164 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 6 May 2025 16:59:10 +0100 Subject: [PATCH 08/32] add tool_type field --- hepdata/templates/analysis_schema.json | 8 +++++++- tests/test_data/analysis_example.json | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 837e364a3..05988f77a 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -4,7 +4,7 @@ "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "url_templates", "analyses"], + "required": ["tool", "version", "tool_type", "url_templates", "analyses"], "properties": { "tool": { @@ -17,6 +17,12 @@ "type": "string" }, + "tool_type":{ + "description": "The type of the information provided for the analyses by the tool, see enum choices", + "type": "string", + "enum": ["Statistical model", "Simplified analysis"] + }, + "url_templates": { "description": "Templates for URLs to the main repository and important other pages", "type": "object", diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index 47dcb68a4..0ddc3e1de 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,6 +1,7 @@ { "tool": "SModelS", "version": "3.0.0", + "tool_type": "Simplified analysis", "url_templates": { "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" From 503ff8f0bb81e7cacf12bb7705d6468959f8f64e Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 13 May 2025 10:04:49 +0100 Subject: [PATCH 09/32] rename 'tool_type' field to 'implementations_description' --- hepdata/templates/analysis_schema.json | 9 ++++----- tests/test_data/analysis_example.json | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 05988f77a..5100c70fb 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -4,7 +4,7 @@ "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "tool_type", "url_templates", "analyses"], + "required": ["tool", "version", "implementations_description", "url_templates", "analyses"], "properties": { "tool": { @@ -17,10 +17,9 @@ "type": "string" }, - "tool_type":{ - "description": "The type of the information provided for the analyses by the tool, see enum choices", - "type": "string", - "enum": ["Statistical model", "Simplified analysis"] + "implementations_description":{ + "description": "The type of information provided for the analyses by the tool", + "type": "string" }, "url_templates": { diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index 0ddc3e1de..ac8b14e2d 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,7 +1,7 @@ { "tool": "SModelS", "version": "3.0.0", - "tool_type": "Simplified analysis", + "implementations_description": "SModelS analysis", "url_templates": { "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" From 0d972a956c589a4a7f3808f7bfa24414df1071ad Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 24 Jun 2025 15:22:59 +0100 Subject: [PATCH 10/32] add date_created to schema --- hepdata/templates/analysis_schema.json | 8 +++++++- tests/test_data/analysis_example.json | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analysis_schema.json index 5100c70fb..b870c6cef 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analysis_schema.json @@ -4,7 +4,7 @@ "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "implementations_description", "url_templates", "analyses"], + "required": ["tool", "version", "date_created", "implementations_description", "url_templates", "analyses"], "properties": { "tool": { @@ -17,6 +17,12 @@ "type": "string" }, + "date_created": { + "description": "The date at which the JSON file was created, formatted as RFC 3339, section 5.6 (https://json-schema.org/understanding-json-schema/reference/type#dates-and-times), e.g. 2018-11-13T20:20:39+00:00", + "type": "string", + "format": "date-time" + }, + "implementations_description":{ "description": "The type of information provided for the analyses by the tool", "type": "string" diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analysis_example.json index ac8b14e2d..c3b93d6de 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analysis_example.json @@ -1,6 +1,7 @@ { "tool": "SModelS", "version": "3.0.0", + "date_created": "2018-11-13T20:20:39+00:00", "implementations_description": "SModelS analysis", "url_templates": { "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", From 9f204bacfcdafe679ab6ed6770b80493c19b3178 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 13 Aug 2025 11:36:52 +0100 Subject: [PATCH 11/32] introduce schema_version field --- .../{analysis_schema.json => analyses_schema.json} | 9 +++++++-- tests/analysis_schema_test.py | 4 ++-- .../{analysis_example.json => analyses_example.json} | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) rename hepdata/templates/{analysis_schema.json => analyses_schema.json} (92%) rename tests/test_data/{analysis_example.json => analyses_example.json} (96%) diff --git a/hepdata/templates/analysis_schema.json b/hepdata/templates/analyses_schema.json similarity index 92% rename from hepdata/templates/analysis_schema.json rename to hepdata/templates/analyses_schema.json index b870c6cef..c12e05baf 100644 --- a/hepdata/templates/analysis_schema.json +++ b/hepdata/templates/analyses_schema.json @@ -1,12 +1,17 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "HEPData_analysis_tool_schema", + "$id": "https://hepdata.net/analyses/schemas/1.0.0/analyses_schema.json", "title": "HEPData analysis tool schema", "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", "type": "object", - "required": ["tool", "version", "date_created", "implementations_description", "url_templates", "analyses"], + "required": ["schema_version", "tool", "version", "date_created", "implementations_description", "url_templates", "analyses"], "properties": { + "schema_version": { + "description": "The version of the JSON schema applying to this file", + "const": "1.0.0" + }, + "tool": { "description": "The tool used to implement the analysis", "type": "string" diff --git a/tests/analysis_schema_test.py b/tests/analysis_schema_test.py index 0a85c27c3..25e44810b 100644 --- a/tests/analysis_schema_test.py +++ b/tests/analysis_schema_test.py @@ -27,8 +27,8 @@ def test_analysis_json_schema(): base_dir = os.path.dirname(os.path.realpath(__file__)) - schema_file_name = os.path.join(base_dir, "..", "hepdata", "templates", "analysis_schema.json") - test_file_name = os.path.join(base_dir, "test_data", "analysis_example.json") + schema_file_name = os.path.join(base_dir, "..", "hepdata", "templates", "analyses_schema.json") + test_file_name = os.path.join(base_dir, "test_data", "analyses_example.json") with open(schema_file_name) as f: schema = json.load(f) diff --git a/tests/test_data/analysis_example.json b/tests/test_data/analyses_example.json similarity index 96% rename from tests/test_data/analysis_example.json rename to tests/test_data/analyses_example.json index c3b93d6de..b9ab8ed80 100644 --- a/tests/test_data/analysis_example.json +++ b/tests/test_data/analyses_example.json @@ -1,4 +1,5 @@ { + "schema_version" : "1.0.0", "tool": "SModelS", "version": "3.0.0", "date_created": "2018-11-13T20:20:39+00:00", From f4dd17533a291bb085caf6f2ec0e358d48d682c5 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 13 Aug 2025 14:13:00 +0100 Subject: [PATCH 12/32] more renames, add readme --- hepdata/templates/analyses_schema.json | 6 +- hepdata/templates/readme.md | 111 ++++++++++++++++++ ...schema_test.py => analyses_schema_test.py} | 0 3 files changed, 114 insertions(+), 3 deletions(-) create mode 100644 hepdata/templates/readme.md rename tests/{analysis_schema_test.py => analyses_schema_test.py} (100%) diff --git a/hepdata/templates/analyses_schema.json b/hepdata/templates/analyses_schema.json index c12e05baf..4a6ff2926 100644 --- a/hepdata/templates/analyses_schema.json +++ b/hepdata/templates/analyses_schema.json @@ -13,12 +13,12 @@ }, "tool": { - "description": "The tool used to implement the analysis", + "description": "The name of the tool used to implement the analyses", "type": "string" }, "version": { - "description": "The version of the tool used to implement the analysis", + "description": "The version of the tool used to implement the analyses", "type": "string" }, @@ -28,7 +28,7 @@ "format": "date-time" }, - "implementations_description":{ + "implementations_description": { "description": "The type of information provided for the analyses by the tool", "type": "string" }, diff --git a/hepdata/templates/readme.md b/hepdata/templates/readme.md new file mode 100644 index 000000000..6b1026c43 --- /dev/null +++ b/hepdata/templates/readme.md @@ -0,0 +1,111 @@ +# About the analyses JSON schema + +This readme details a JSON schema which is used by reinterpretation tools to communicate to HEPData which analyses are implemented in that tool and where to find the implementations. + +## Goals +- **Self-descriptiveness**: the JSON format includes information about the tool and tool version it's valid for as well as basic information of the analyses implemented in the tool. + It also allows tools to include very rough human-readable information instead of just bare identifiers. +- **Standardisation**: a common standard for everyone ensures easy exchange and findability of information. +- **Future-proofness**: the standard aims to foresee future needs such that it doesn't require frequent updates. +- **Redundancy reduction**: the JSON format allows to codify URLs such that the URL stem doesn't have to be repeated. + This makes it more compact, better human-readable and better maintainable. + +## The standard + +### Required fields +The following fields are required by the analyses JSON standard: +- **schema_version** (`const`): the version of the analyses JSON schema applying the the file. + Currently 1.0.0. +- **tool** (`string`): the name of the tool used to implement the analyses. +- **version** (`string`): the version of the tool used to implement the analyses. +- **date_created** (`string` in `date-time` format): the date at which the JSON file was created, formatted as [RFC 3339, section 5.6](https://json-schema.org/understanding-json-schema/reference/type#dates-and-times), e.g. "2018-11-13T20:20:39+00:00". +- **implementations_description** (`string`): the type of information provided for the analyses by the tool. + This information is used to provide text describing links to the analysis implementation on HEPData and INSPIRE. +- **url_templates** (`dict`): a dictionary of templates for URLs to the main tool repository and important other pages. + + It has to include the following fields: + - **main_url** (`string`): the URL template for the main repository. + Should contain e.g. a "{name}" placeholder for the analysis name. +- **analyses** (`array`): an array of analyses implemented in the tool. + All entries have to be unique. + Needs at least one entry. + Each array item has to have the following fields: + - **inspire_id** (`number`): the INSPIRE ID of the analysis. + - **implementations** (`array`): an array of the various implementations of the analysis in the tool. + All entries have to be unique. + Needs at least one entry. + + Each array item has to have the following fields: + - **name** (`string`): the internal name of the implementation used to retrieve information. + +### Additional standardised fields +The following fields are included in the standard but not required: + +- **url_templates** (`dict`): the URL templates dict can also have the following fields: + - **val_url** (`string`): the URL template for the validation page. + Should contain e.g. a `{name}` placeholder for the analysis name. +- **analyses** (`array`): the analyses array can also have the following fields: + - **signature_type** (`string`): the signature of the analysis, e.g. 'prompt', 'displaced'. + - **pretty_name** (`string`): a pretty name for the analysis. + - **implementations** (`array`): the implementations array can also have the following fields: + - **path** (`string`): the path to the implementation in the tool. +- **implementations_license** (`dict`): a dictionary describing the license for the implementations of the analyses in the tool. + Taken to be CC0 if not specified. + + It *has to* include the following fields: + - **name** (`string`): the name of the license. + The maximum length for this field is 256 characters. + - **url** (`string`): the URL to the license. + The maximum length for this field is 256 characters. + + It *can* include the following fields: + - **description** (`string`): a description of the license + + No other fields are allowed. + + +### Additional unknown fields +Apart from the fields mentioned above, the standard allows for any number of additional fields. +These are however not standardised are not being being checked by the schema. + + +## Examples +A minimal example for an analyses JSON adhering to the standard looks like this: +```JSON +{ + "schema_version" : "1.0.0", + "tool": "SModelS", + "version": "3.0.0", + "date_created": "2018-11-13T20:20:39+00:00", + "implementations_description": "SModelS analysis", + "url_templates": { + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}" + }, + "analyses" : [ + { + "inspire_id": 1795076, + "implementations": [ + { + "name" : "ATLAS-EXOT-2018-48", + } + ] + } + ] +} +``` +See [here](../../tests/test_data/analyses_example.json) for a more elaborate example. + +## Testing an implementation + +Whether an analyses JSON file adheres to the standard defined here, can be with python checked as follows: +```python +import json +import jsonschema + +with open("analyses_schema.json") as f: + schema = json.load(f) +with open("analyses_example.json") as f: + test = json.load(f) + +jsonschema.validate(instance=test, schema=schema) +``` \ No newline at end of file diff --git a/tests/analysis_schema_test.py b/tests/analyses_schema_test.py similarity index 100% rename from tests/analysis_schema_test.py rename to tests/analyses_schema_test.py From 698e8c57d373182140debbcd231fff93f87eed92 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 13 Aug 2025 14:18:50 +0100 Subject: [PATCH 13/32] correct url --- hepdata/templates/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepdata/templates/readme.md b/hepdata/templates/readme.md index 6b1026c43..f8362ea63 100644 --- a/hepdata/templates/readme.md +++ b/hepdata/templates/readme.md @@ -79,7 +79,7 @@ A minimal example for an analyses JSON adhering to the standard looks like this: "date_created": "2018-11-13T20:20:39+00:00", "implementations_description": "SModelS analysis", "url_templates": { - "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}" + "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{name}" }, "analyses" : [ { From 34df6e12dfb33b2f71205cf36f02b3b21c971b3f Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Thu, 14 Aug 2025 14:57:46 +0100 Subject: [PATCH 14/32] include schema validation --- hepdata/modules/records/utils/analyses.py | 207 +++++++++++++++++----- tests/analyses_schema_test.py | 6 +- 2 files changed, 162 insertions(+), 51 deletions(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 3893339fd..7aba654af 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -23,27 +23,42 @@ # as an Intergovernmental Organization or submit itself to any jurisdiction. import logging +import os from celery import shared_task from flask import current_app from invenio_db import db import requests +import json +import jsonschema from hepdata.ext.opensearch.api import index_record_ids from hepdata.modules.submission.api import get_latest_hepsubmission, is_resource_added_to_submission from hepdata.modules.submission.models import DataResource, HEPSubmission, data_reference_link +from hepdata.utils.users import get_user_from_id +from hepdata.modules.records.subscribers.rest import subscribe +from hepdata.modules.records.subscribers.api import is_current_user_subscribed_to_record +from hepdata.modules.records.utils.common import get_license logging.basicConfig() log = logging.getLogger(__name__) +def get_analyses_schema(): + schema_path = os.path.join("hepdata", "templates", "analyses_schema.json") + with open(schema_path) as f: + return json.load(f) @shared_task def update_analyses(endpoint=None): """ - Update (Rivet and MadAnalysis 5) analyses and remove outdated resources. + Update (Rivet, MadAnalysis 5, SModelS, CheckMATE, HackAnalysis and Combine) analyses and remove outdated resources. + Allow bulk subscription to record update notifications if "subscribe_user_id" in endpoint. + Add optional "description" and "license" fields if present in endpoint. - :param endpoint: either "Rivet" or "MadAnalysis" or None (default) for both + :param endpoint: either "rivet" or "MadAnalysis" or "SModelS" or "CheckMATE" or "HackAnalysis" or "Combine" or None (default) for all """ + analyses_schema = get_analyses_schema() + endpoints = current_app.config["ANALYSES_ENDPOINTS"] for analysis_endpoint in endpoints: @@ -58,54 +73,133 @@ def update_analyses(endpoint=None): if response and response.status_code == 200: - analyses = response.json() - analysis_resources = DataResource.query.filter_by(file_type=analysis_endpoint).all() - # Check for missing analyses. - for record in analyses: - submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') - - if submission: - num_new_resources = 0 - - for analysis in analyses[record]: - _resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis) - - if not is_resource_added_to_submission(submission.publication_recid, submission.version, - _resource_url): - - log.info('Adding {} analysis to ins{} with URL {}'.format( - analysis_endpoint, record, _resource_url) - ) - new_resource = DataResource( - file_location=_resource_url, - file_type=analysis_endpoint) - - submission.resources.append(new_resource) - num_new_resources += 1 - - else: - - # Remove resource from 'analysis_resources' list. - resource = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))[0] - analysis_resources.remove(resource) - - if num_new_resources: - - try: - db.session.add(submission) - db.session.commit() - latest_submission = get_latest_hepsubmission(inspire_id=record) - if submission.version == latest_submission.version: - index_record_ids([submission.publication_recid]) - except Exception as e: - db.session.rollback() - log.error(e) - - else: - log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format( - analysis_endpoint, record)) + r_json = response.json() + try: + jsonschema.validate(instance=r_json, schema=analyses_schema) + new_json = True + except jsonschema.ValidationError: + new_json = False + + if new_json: + + # Check for missing analyses. + for ana in r_json["analyses"]: + inspire_id = ana["inspire_id"] + submission = get_latest_hepsubmission(inspire_id=str(inspire_id), overall_status='finished') # TODO: make inspire_id an int + + if submission: + num_new_resources = 0 + + for implementation in ana["implementations"]: + ana_name = implementation["name"] + ana_path = implementation["path"] if "path" in implementation else "" + _resource_url = r_json["url_templates"]["main_url"] + prev_url = None + n_tries, max_tries = 0, 10 + while _resource_url!=prev_url and n_tries Date: Tue, 2 Sep 2025 09:45:07 +0100 Subject: [PATCH 15/32] add schema version 0.1.0 --- hepdata/modules/records/utils/analyses.py | 4 +- .../0.1.0/analyses_schema.json | 17 +++++++++ .../templates/analyses_schema/0.1.0/readme.md | 38 +++++++++++++++++++ .../1.0.0}/analyses_schema.json | 0 .../{ => analyses_schema/1.0.0}/readme.md | 0 5 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 hepdata/templates/analyses_schema/0.1.0/analyses_schema.json create mode 100644 hepdata/templates/analyses_schema/0.1.0/readme.md rename hepdata/templates/{ => analyses_schema/1.0.0}/analyses_schema.json (100%) rename hepdata/templates/{ => analyses_schema/1.0.0}/readme.md (100%) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 7aba654af..8e2834c0b 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -43,8 +43,8 @@ logging.basicConfig() log = logging.getLogger(__name__) -def get_analyses_schema(): - schema_path = os.path.join("hepdata", "templates", "analyses_schema.json") +def get_analyses_schema(schema_version="1.0.0"): + schema_path = os.path.join("hepdata", "templates", "analyses_schema", schema_version, "analyses_schema.json") with open(schema_path) as f: return json.load(f) diff --git a/hepdata/templates/analyses_schema/0.1.0/analyses_schema.json b/hepdata/templates/analyses_schema/0.1.0/analyses_schema.json new file mode 100644 index 000000000..b71529ada --- /dev/null +++ b/hepdata/templates/analyses_schema/0.1.0/analyses_schema.json @@ -0,0 +1,17 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://hepdata.net/analyses/schemas/0.1.0/analyses_schema.json", + "title": "HEPData analysis tool schema", + "description": "A JSON schema for tracking implementations of HEPData analyses in different tools", + "type": "object", + "patternProperties": { + "^[0-9]+$": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + } + }, + "additionalProperties": false +} diff --git a/hepdata/templates/analyses_schema/0.1.0/readme.md b/hepdata/templates/analyses_schema/0.1.0/readme.md new file mode 100644 index 000000000..26af936a3 --- /dev/null +++ b/hepdata/templates/analyses_schema/0.1.0/readme.md @@ -0,0 +1,38 @@ +# About the analyses JSON schema + +This readme details a JSON schema which is used by reinterpretation tools to communicate to HEPData which analyses are implemented in that tool and where to find the implementations. + +## The standard + +The standard is quite simple: the whole file is basically a dictionary where the keys are the different INSPIRE IDs for the analyses implemented in the tool and the values are lists of tool-internal names for the reimplentations, i.e. +```JSON +{ + "" : ["", ""] +} +``` + +No other fields are allowed. + +## Example +A minimal example for an analyses JSON adhering to the standard looks like this: +```JSON +{ + "100592": ["MARKI_1975_I100592", "MARKI_ALTERNATIVE_IMPLEMENTATION"], + "1081268": ["LHCB_2013_I1081268"] +} +``` + +## Testing an implementation + +Whether an analyses JSON file adheres to the standard defined here, can be with python checked as follows: +```python +import json +import jsonschema + +with open("analyses_schema.json") as f: + schema = json.load(f) +with open("analyses_example.json") as f: + test = json.load(f) + +jsonschema.validate(instance=test, schema=schema) +``` \ No newline at end of file diff --git a/hepdata/templates/analyses_schema.json b/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json similarity index 100% rename from hepdata/templates/analyses_schema.json rename to hepdata/templates/analyses_schema/1.0.0/analyses_schema.json diff --git a/hepdata/templates/readme.md b/hepdata/templates/analyses_schema/1.0.0/readme.md similarity index 100% rename from hepdata/templates/readme.md rename to hepdata/templates/analyses_schema/1.0.0/readme.md From a44e6984a370d9277bb761b3ecee25b1d686575a Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 2 Sep 2025 10:09:38 +0100 Subject: [PATCH 16/32] test for analyses schema version --- hepdata/modules/records/utils/analyses.py | 23 +++++++++++------------ tests/analyses_schema_test.py | 12 ++++-------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 8e2834c0b..b4706c7b7 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -43,10 +43,11 @@ logging.basicConfig() log = logging.getLogger(__name__) -def get_analyses_schema(schema_version="1.0.0"): +def test_analyses_schema(json_file, schema_version="1.0.0"): schema_path = os.path.join("hepdata", "templates", "analyses_schema", schema_version, "analyses_schema.json") with open(schema_path) as f: - return json.load(f) + schema = json.load(f) + jsonschema.validate(instance=json_file, schema=schema) @shared_task def update_analyses(endpoint=None): @@ -57,7 +58,6 @@ def update_analyses(endpoint=None): :param endpoint: either "rivet" or "MadAnalysis" or "SModelS" or "CheckMATE" or "HackAnalysis" or "Combine" or None (default) for all """ - analyses_schema = get_analyses_schema() endpoints = current_app.config["ANALYSES_ENDPOINTS"] for analysis_endpoint in endpoints: @@ -76,14 +76,13 @@ def update_analyses(endpoint=None): analysis_resources = DataResource.query.filter_by(file_type=analysis_endpoint).all() r_json = response.json() - try: - jsonschema.validate(instance=r_json, schema=analyses_schema) - new_json = True - except jsonschema.ValidationError: - new_json = False - if new_json: + schema_version = "0.1.0" # this schema doesn't have "schema_version" field + if "schema_version" in r_json: + schema_version = r_json["schema_version"] + test_analyses_schema(r_json, schema_version=schema_version) + if schema_version=="1.0.0": # Check for missing analyses. for ana in r_json["analyses"]: inspire_id = ana["inspire_id"] @@ -145,7 +144,7 @@ def update_analyses(endpoint=None): log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format( analysis_endpoint, inspire_id)) - else: # old JSON file + else: # schema_version=="0.1.0" analyses = r_json # Check for missing analyses. @@ -235,13 +234,13 @@ def update_analyses(endpoint=None): user = get_user_from_id(endpoints[analysis_endpoint]["subscribe_user_id"]) if user: # Check for missing analyses. - if new_json: + if schema_version=="1.0.0": for ana in r_json["analyses"]: submission = get_latest_hepsubmission(inspire_id=str(ana["inspire_id"]), overall_status='finished') if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user): subscribe(submission.publication_recid, user) - else: # old JSON file + else: # schema_version=="0.1.0" for record in analyses: submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user): diff --git a/tests/analyses_schema_test.py b/tests/analyses_schema_test.py index 3fd48c82c..cde6fd559 100644 --- a/tests/analyses_schema_test.py +++ b/tests/analyses_schema_test.py @@ -22,20 +22,16 @@ # waive the privileges and immunities granted to it by virtue of its status # as an Intergovernmental Organization or submit itself to any jurisdiction. import json -import jsonschema import os -from hepdata.modules.records.utils.analyses import get_analyses_schema +from hepdata.modules.records.utils.analyses import test_analyses_schema -def test_analysis_json_schema(): +def test_analyses_json_schema(): base_dir = os.path.dirname(os.path.realpath(__file__)) test_file_name = os.path.join(base_dir, "test_data", "analyses_example.json") - schema = get_analyses_schema() with open(test_file_name) as f: - test = json.load(f) - - jsonschema.validate(instance=test, schema=schema) + test_analyses_schema(json.load(f)) if __name__ == "__main__": - test_analysis_json_schema() + test_analyses_json_schema() From 2014de37d34d5a8131c2cb673100530b55d3b71b Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Tue, 2 Sep 2025 10:48:27 +0100 Subject: [PATCH 17/32] avoid name starting with 'test' because that is being picked up by pytest --- tests/analyses_schema_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/analyses_schema_test.py b/tests/analyses_schema_test.py index cde6fd559..8a6190d87 100644 --- a/tests/analyses_schema_test.py +++ b/tests/analyses_schema_test.py @@ -24,14 +24,14 @@ import json import os -from hepdata.modules.records.utils.analyses import test_analyses_schema +import hepdata.modules.records.utils.analyses as analyses def test_analyses_json_schema(): base_dir = os.path.dirname(os.path.realpath(__file__)) test_file_name = os.path.join(base_dir, "test_data", "analyses_example.json") with open(test_file_name) as f: - test_analyses_schema(json.load(f)) + analyses.test_analyses_schema(json.load(f)) if __name__ == "__main__": test_analyses_json_schema() From 63fb262c5e62304f6a92b7d9ecd62e524831ec4f Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 3 Sep 2025 16:37:14 +0100 Subject: [PATCH 18/32] fix typos in schema --- hepdata/templates/analyses_schema/0.1.0/readme.md | 2 +- .../analyses_schema/1.0.0/analyses_schema.json | 14 +++++++------- hepdata/templates/analyses_schema/1.0.0/readme.md | 6 +++--- tests/test_data/analyses_example.json | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/hepdata/templates/analyses_schema/0.1.0/readme.md b/hepdata/templates/analyses_schema/0.1.0/readme.md index 26af936a3..54bd10963 100644 --- a/hepdata/templates/analyses_schema/0.1.0/readme.md +++ b/hepdata/templates/analyses_schema/0.1.0/readme.md @@ -7,7 +7,7 @@ This readme details a JSON schema which is used by reinterpretation tools to com The standard is quite simple: the whole file is basically a dictionary where the keys are the different INSPIRE IDs for the analyses implemented in the tool and the values are lists of tool-internal names for the reimplentations, i.e. ```JSON { - "" : ["", ""] + "": ["", ""] } ``` diff --git a/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json b/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json index 4a6ff2926..2e7557d01 100644 --- a/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json +++ b/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json @@ -53,11 +53,11 @@ "analyses": { "description": "The analyses implemented in the tool", "type": "array", + "minItems": 1, + "uniqueItems": true, "items": { "type": "object", - "$ref": "#/$defs/Analysis", - "minItems": 1, - "uniqueItems": true + "$ref": "#/$defs/Analysis" } }, @@ -101,11 +101,11 @@ "implementations":{ "description": "The implementations of the analysis in the tool", "type": "array", - "item": { + "minItems": 1, + "uniqueItems": true, + "items": { "type": "object", - "$ref": "#/$defs/Implementation", - "minItems": 1, - "uniqueItems": true + "$ref": "#/$defs/Implementation" } }, "signature_type": { diff --git a/hepdata/templates/analyses_schema/1.0.0/readme.md b/hepdata/templates/analyses_schema/1.0.0/readme.md index f8362ea63..4e5bf5768 100644 --- a/hepdata/templates/analyses_schema/1.0.0/readme.md +++ b/hepdata/templates/analyses_schema/1.0.0/readme.md @@ -73,7 +73,7 @@ These are however not standardised are not being being checked by the schema. A minimal example for an analyses JSON adhering to the standard looks like this: ```JSON { - "schema_version" : "1.0.0", + "schema_version": "1.0.0", "tool": "SModelS", "version": "3.0.0", "date_created": "2018-11-13T20:20:39+00:00", @@ -81,12 +81,12 @@ A minimal example for an analyses JSON adhering to the standard looks like this: "url_templates": { "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{name}" }, - "analyses" : [ + "analyses": [ { "inspire_id": 1795076, "implementations": [ { - "name" : "ATLAS-EXOT-2018-48", + "name": "ATLAS-EXOT-2018-48", } ] } diff --git a/tests/test_data/analyses_example.json b/tests/test_data/analyses_example.json index b9ab8ed80..95393cc69 100644 --- a/tests/test_data/analyses_example.json +++ b/tests/test_data/analyses_example.json @@ -1,5 +1,5 @@ { - "schema_version" : "1.0.0", + "schema_version": "1.0.0", "tool": "SModelS", "version": "3.0.0", "date_created": "2018-11-13T20:20:39+00:00", @@ -8,12 +8,12 @@ "main_url": "https://github.com/SModelS/smodels-database-release/tree/main/{path}", "val_url": "https://smodels.github.io/docs/Validation#{name}_ul" }, - "analyses" : [ + "analyses": [ { "inspire_id": 1795075, "implementations": [ { - "name" : "ATLAS-EXOT-2018-47" + "name": "ATLAS-EXOT-2018-47" } ] }, @@ -23,11 +23,11 @@ "pretty_name": "di-top resonance", "implementations": [ { - "name" : "ATLAS-EXOT-2018-48", + "name": "ATLAS-EXOT-2018-48", "path": "13TeV/ATLAS/{name}/" }, { - "name" : "ATLAS-EXOT-2018-48b", + "name": "ATLAS-EXOT-2018-48b", "path": "13TeV/ATLAS/{name}/" } ] From 9b46ff795a3dc6e569c24ca5f77372b9b4125c1a Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 3 Sep 2025 16:51:37 +0100 Subject: [PATCH 19/32] fix implementations_license typo --- hepdata/modules/records/utils/analyses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index b4706c7b7..a2e427501 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -114,8 +114,8 @@ def update_analyses(endpoint=None): file_description=r_json["implementations_description"] ) - if "license" in r_json: - resource_license = get_license(r_json["license"]) + if "implementations_license" in r_json: + resource_license = get_license(r_json["implementations_license"]) new_resource.file_license = resource_license.id submission.resources.append(new_resource) From 534e8fc5e4608271425636aa9cf124b0a8e614d0 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Thu, 4 Sep 2025 11:57:43 +0100 Subject: [PATCH 20/32] disallow nested placeholders --- hepdata/modules/records/utils/analyses.py | 14 ++---- .../templates/analyses_schema/1.0.0/readme.md | 45 ++++++++++++++++++- tests/test_data/analyses_example.json | 4 +- 3 files changed, 49 insertions(+), 14 deletions(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index a2e427501..2fa6984a3 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -85,22 +85,14 @@ def update_analyses(endpoint=None): if schema_version=="1.0.0": # Check for missing analyses. for ana in r_json["analyses"]: - inspire_id = ana["inspire_id"] - submission = get_latest_hepsubmission(inspire_id=str(inspire_id), overall_status='finished') # TODO: make inspire_id an int + inspire_id = str(ana["inspire_id"]) # TODO: make inspire_id an int in get_latest_hepsubmission + submission = get_latest_hepsubmission(inspire_id=inspire_id, overall_status='finished') if submission: num_new_resources = 0 for implementation in ana["implementations"]: - ana_name = implementation["name"] - ana_path = implementation["path"] if "path" in implementation else "" - _resource_url = r_json["url_templates"]["main_url"] - prev_url = None - n_tries, max_tries = 0, 10 - while _resource_url!=prev_url and n_tries Date: Thu, 4 Sep 2025 12:13:39 +0100 Subject: [PATCH 21/32] test for v0.1.0 instead of 1.0.0 --- hepdata/modules/records/utils/analyses.py | 73 ++++++++++++----------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 2fa6984a3..959771336 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -82,32 +82,34 @@ def update_analyses(endpoint=None): schema_version = r_json["schema_version"] test_analyses_schema(r_json, schema_version=schema_version) - if schema_version=="1.0.0": + if schema_version=="0.1.0": + analyses = r_json + # Check for missing analyses. - for ana in r_json["analyses"]: - inspire_id = str(ana["inspire_id"]) # TODO: make inspire_id an int in get_latest_hepsubmission - submission = get_latest_hepsubmission(inspire_id=inspire_id, overall_status='finished') + for record in analyses: + submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') if submission: num_new_resources = 0 - for implementation in ana["implementations"]: - _resource_url = r_json["url_templates"]["main_url"].format(**implementation) + for analysis in analyses[record]: + _resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis) if not is_resource_added_to_submission(submission.publication_recid, submission.version, _resource_url): log.info('Adding {} analysis to ins{} with URL {}'.format( - analysis_endpoint, inspire_id, _resource_url) + analysis_endpoint, record, _resource_url) ) new_resource = DataResource( file_location=_resource_url, - file_type=analysis_endpoint, - file_description=r_json["implementations_description"] - ) + file_type=analysis_endpoint) - if "implementations_license" in r_json: - resource_license = get_license(r_json["implementations_license"]) + if "description" in endpoints[analysis_endpoint]: + new_resource.file_description = str(endpoints[analysis_endpoint]["description"]) + + if "license" in endpoints[analysis_endpoint]: + resource_license = get_license(endpoints[analysis_endpoint]["license"]) new_resource.file_license = resource_license.id submission.resources.append(new_resource) @@ -125,7 +127,7 @@ def update_analyses(endpoint=None): try: db.session.add(submission) db.session.commit() - latest_submission = get_latest_hepsubmission(inspire_id=inspire_id) + latest_submission = get_latest_hepsubmission(inspire_id=record) if submission.version == latest_submission.version: index_record_ids([submission.publication_recid]) except Exception as e: @@ -134,36 +136,34 @@ def update_analyses(endpoint=None): else: log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format( - analysis_endpoint, inspire_id)) - - else: # schema_version=="0.1.0" - analyses = r_json + analysis_endpoint, record)) + else: # schema_version>="1.0.0" # Check for missing analyses. - for record in analyses: - submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') + for ana in r_json["analyses"]: + inspire_id = str(ana["inspire_id"]) # TODO: make inspire_id an int in get_latest_hepsubmission + submission = get_latest_hepsubmission(inspire_id=inspire_id, overall_status='finished') if submission: num_new_resources = 0 - for analysis in analyses[record]: - _resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis) + for implementation in ana["implementations"]: + _resource_url = r_json["url_templates"]["main_url"].format(**implementation) if not is_resource_added_to_submission(submission.publication_recid, submission.version, _resource_url): log.info('Adding {} analysis to ins{} with URL {}'.format( - analysis_endpoint, record, _resource_url) + analysis_endpoint, inspire_id, _resource_url) ) new_resource = DataResource( file_location=_resource_url, - file_type=analysis_endpoint) - - if "description" in endpoints[analysis_endpoint]: - new_resource.file_description = str(endpoints[analysis_endpoint]["description"]) + file_type=analysis_endpoint, + file_description=r_json["implementations_description"] + ) - if "license" in endpoints[analysis_endpoint]: - resource_license = get_license(endpoints[analysis_endpoint]["license"]) + if "implementations_license" in r_json: + resource_license = get_license(r_json["implementations_license"]) new_resource.file_license = resource_license.id submission.resources.append(new_resource) @@ -181,7 +181,7 @@ def update_analyses(endpoint=None): try: db.session.add(submission) db.session.commit() - latest_submission = get_latest_hepsubmission(inspire_id=record) + latest_submission = get_latest_hepsubmission(inspire_id=inspire_id) if submission.version == latest_submission.version: index_record_ids([submission.publication_recid]) except Exception as e: @@ -190,7 +190,7 @@ def update_analyses(endpoint=None): else: log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format( - analysis_endpoint, record)) + analysis_endpoint, inspire_id)) if analysis_resources: # Extra resources that were not found in the analyses JSON file. @@ -226,17 +226,18 @@ def update_analyses(endpoint=None): user = get_user_from_id(endpoints[analysis_endpoint]["subscribe_user_id"]) if user: # Check for missing analyses. - if schema_version=="1.0.0": - for ana in r_json["analyses"]: - submission = get_latest_hepsubmission(inspire_id=str(ana["inspire_id"]), overall_status='finished') + if schema_version=="0.1.0": + for record in analyses: + submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user): subscribe(submission.publication_recid, user) - else: # schema_version=="0.1.0" - for record in analyses: - submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') + else: # schema_version>="1.0.0" + for ana in r_json["analyses"]: + submission = get_latest_hepsubmission(inspire_id=str(ana["inspire_id"]), overall_status='finished') if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user): subscribe(submission.publication_recid, user) + else: log.debug("No endpoint url configured for {0}".format(analysis_endpoint)) From 98c072aa52cfb439a4f621562b1600be36674fb9 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Thu, 4 Sep 2025 12:14:43 +0100 Subject: [PATCH 22/32] use SModelS as test case, test for implementation license --- hepdata/config.py | 4 +--- tests/records_test.py | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/hepdata/config.py b/hepdata/config.py index cbbc39de3..9eb482177 100644 --- a/hepdata/config.py +++ b/hepdata/config.py @@ -331,9 +331,7 @@ def _(x): 'description': 'MadAnalysis 5 analysis' }, 'SModelS': { - 'endpoint_url': 'https://zenodo.org/records/13952092/files/smodels-analyses.hepdata.json?download=1', - 'url_template': '{0}', - 'description': 'SModelS analysis', + 'endpoint_url': 'https://smodels.github.io/docs/smodels-analyses.hepdata.json', 'subscribe_user_id': 7766 }, 'CheckMATE': { diff --git a/tests/records_test.py b/tests/records_test.py index 1f6e2cc6f..1175b8f8e 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -1075,8 +1075,9 @@ def test_update_analyses(app): db.session.commit() update_analyses('SModelS') analysis_resources = DataResource.query.filter_by(file_type='SModelS').all() - assert len(analysis_resources) == 1 - assert analysis_resources[0].file_location == 'https://smodels.github.io/docs/ListOfAnalyses#ATLAS-EXOT-2018-06' + assert len(analysis_resources) == 2 + assert analysis_resources[0].file_location == 'https://github.com/SModelS/smodels-database-release/tree/main/13TeV/ATLAS/ATLAS-EXOT-2018-06/' + assert analysis_resources[0].file_license == 'cc-by-4.0' submission = get_latest_hepsubmission(inspire_id='1847779', overall_status='finished') assert is_current_user_subscribed_to_record(submission.publication_recid, user) From e747050154687d11092663a702e0c33003f42509 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Thu, 4 Sep 2025 12:20:02 +0100 Subject: [PATCH 23/32] fix license id --- tests/records_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/records_test.py b/tests/records_test.py index 1175b8f8e..d4cd81a9b 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -1077,7 +1077,7 @@ def test_update_analyses(app): analysis_resources = DataResource.query.filter_by(file_type='SModelS').all() assert len(analysis_resources) == 2 assert analysis_resources[0].file_location == 'https://github.com/SModelS/smodels-database-release/tree/main/13TeV/ATLAS/ATLAS-EXOT-2018-06/' - assert analysis_resources[0].file_license == 'cc-by-4.0' + assert analysis_resources[0].file_license == 1 submission = get_latest_hepsubmission(inspire_id='1847779', overall_status='finished') assert is_current_user_subscribed_to_record(submission.publication_recid, user) From 9657ff6521d65bf5703b586de7f6f7dc406c860b Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Thu, 4 Sep 2025 12:33:22 +0100 Subject: [PATCH 24/32] use license name instead of id --- tests/records_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/records_test.py b/tests/records_test.py index d4cd81a9b..18ed8262c 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -1077,7 +1077,7 @@ def test_update_analyses(app): analysis_resources = DataResource.query.filter_by(file_type='SModelS').all() assert len(analysis_resources) == 2 assert analysis_resources[0].file_location == 'https://github.com/SModelS/smodels-database-release/tree/main/13TeV/ATLAS/ATLAS-EXOT-2018-06/' - assert analysis_resources[0].file_license == 1 + assert License.query.filter_by(id=analysis_resources[0].file_license).first().name == 'cc-by-4.0' submission = get_latest_hepsubmission(inspire_id='1847779', overall_status='finished') assert is_current_user_subscribed_to_record(submission.publication_recid, user) From 5add1d92c1b0c510afc4eadc56016cee70e8d7b2 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Fri, 5 Sep 2025 11:03:08 +0100 Subject: [PATCH 25/32] theme: add Flask route to analyses JSON schema * --- hepdata/modules/theme/views.py | 10 +++++++++- hepdata/version.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hepdata/modules/theme/views.py b/hepdata/modules/theme/views.py index 5ecc16035..99ccc9ddb 100644 --- a/hepdata/modules/theme/views.py +++ b/hepdata/modules/theme/views.py @@ -25,8 +25,9 @@ """Theme blueprint in order for template and static files to be loaded.""" import re +import json -from flask import Blueprint, render_template, current_app, redirect, request, url_for +from flask import Blueprint, render_template, current_app, redirect, request, url_for, jsonify from hepdata_validator import LATEST_SCHEMA_VERSION, RAW_SCHEMAS_URL from hepdata.modules.email.utils import send_flask_message_email @@ -84,6 +85,13 @@ def submission_schema(jsonschema): return redirect(RAW_SCHEMAS_URL + '/' + jsonschema) +@blueprint.route('/analyses/schemas/') +def analyses_schema(jsonschema): + with current_app.open_resource('templates/analyses_schema/' + jsonschema) as jsonfile: + schema = json.load(jsonfile) + return jsonify(schema) + + @blueprint.route('/cookies') def cookie_policy(): return render_template('hepdata_theme/pages/cookies.html') diff --git a/hepdata/version.py b/hepdata/version.py index b4655d31a..7e09878b0 100644 --- a/hepdata/version.py +++ b/hepdata/version.py @@ -28,4 +28,4 @@ and parsed by ``setup.py``. """ -__version__ = "0.9.4dev20250903" +__version__ = "0.9.4dev20250905" From ee2d5b7315c682e1f4b2a11cc804320026c384db Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Fri, 5 Sep 2025 11:07:23 +0100 Subject: [PATCH 26/32] tests: extend e2e tests for analyses JSON schema * Simplify test_general_pages to avoid calling flask.url_for twice. * Access URLs for submission_schema, analyses_schema, formats, ping. --- tests/e2e/test_general.py | 41 +++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tests/e2e/test_general.py b/tests/e2e/test_general.py index c31c6e738..2dee10b84 100644 --- a/tests/e2e/test_general.py +++ b/tests/e2e/test_general.py @@ -37,6 +37,7 @@ from hepdata.ext.opensearch.api import reindex_all from hepdata.modules.submission.api import get_latest_hepsubmission from hepdata.modules.records.utils.submission import unload_submission +from hepdata_validator import LATEST_SCHEMA_VERSION, RAW_SCHEMAS_URL def test_home(app, live_server, env_browser, e2e_identifiers): @@ -219,21 +220,37 @@ def test_general_pages(live_server, env_browser): """Test general pages can be loaded without errors""" browser = env_browser - browser.get(flask.url_for('hepdata_theme.about', _external=True)) - assert (flask.url_for('hepdata_theme.about', _external=True) in - browser.current_url) + url = flask.url_for('hepdata_theme.about', _external=True) + browser.get(url) + assert url in browser.current_url - browser.get(flask.url_for('hepdata_theme.submission_help', _external=True)) - assert (flask.url_for('hepdata_theme.submission_help', _external=True) in - browser.current_url) + url = flask.url_for('hepdata_theme.submission_help', _external=True) + browser.get(url) + assert url in browser.current_url - browser.get(flask.url_for('hepdata_theme.terms', _external=True)) - assert (flask.url_for('hepdata_theme.terms', _external=True) in - browser.current_url) + url = flask.url_for('hepdata_theme.terms', _external=True) + browser.get(url) + assert url in browser.current_url - browser.get(flask.url_for('hepdata_theme.cookie_policy', _external=True)) - assert (flask.url_for('hepdata_theme.cookie_policy', _external=True) in - browser.current_url) + url = flask.url_for('hepdata_theme.cookie_policy', _external=True) + browser.get(url) + assert url in browser.current_url + + url = flask.url_for('hepdata_theme.submission_schema', jsonschema='submission_schema.json', _external=True) + browser.get(url) + assert RAW_SCHEMAS_URL + '/' + LATEST_SCHEMA_VERSION + '/submission_schema.json' in browser.current_url + + url = flask.url_for('hepdata_theme.analyses_schema', jsonschema='1.0.0/analyses_schema.json', _external=True) + browser.get(url) + assert url in browser.current_url + + url = flask.url_for('hepdata_theme.formats', _external=True) + browser.get(url) + assert url in browser.current_url + + url = flask.url_for('hepdata_theme.ping', _external=True) + browser.get(url) + assert url in browser.current_url def test_accept_headers(app, live_server, e2e_identifiers): From 3d8f340969c0f054023fa1d5ed0f6df78dedb696 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Fri, 5 Sep 2025 11:10:38 +0100 Subject: [PATCH 27/32] tests: extend test_update_analyses for coverage * Test case of updating SModelS analyses with no analyses to add. * Test case of updating analyses for an endpoint with no endpoint_url. --- hepdata/modules/records/utils/analyses.py | 2 +- tests/records_test.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 959771336..9777c9bc1 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -240,4 +240,4 @@ def update_analyses(endpoint=None): else: - log.debug("No endpoint url configured for {0}".format(analysis_endpoint)) + log.debug("No endpoint_url configured for {0}".format(analysis_endpoint)) diff --git a/tests/records_test.py b/tests/records_test.py index 18ed8262c..143831c8e 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -33,6 +33,7 @@ import tempfile import datetime +from flask import current_app from flask_login import login_user from invenio_accounts.models import User from invenio_db import db @@ -1081,6 +1082,12 @@ def test_update_analyses(app): submission = get_latest_hepsubmission(inspire_id='1847779', overall_status='finished') assert is_current_user_subscribed_to_record(submission.publication_recid, user) + # Call update_analyses() again: should be no further changes (but covers more lines of code) + update_analyses('SModelS') + analysis_resources = DataResource.query.filter_by(file_type='SModelS').all() + assert len(analysis_resources) == 2 + assert analysis_resources[0].file_location == 'https://github.com/SModelS/smodels-database-release/tree/main/13TeV/ATLAS/ATLAS-EXOT-2018-06/' + # ins1847779 also has a CheckMATE analysis, so don't need to import another record analysis_resources = DataResource.query.filter_by(file_type='CheckMATE').all() assert len(analysis_resources) == 0 @@ -1124,6 +1131,10 @@ def test_update_analyses(app): assert license_data.name == 'cc-by-4.0' assert license_data.url == 'https://creativecommons.org/licenses/by/4.0' + # Call update_analysis using an endpoint with no endpoint_url + current_app.config["ANALYSES_ENDPOINTS"]["TestAnalysis"] = {} + update_analyses('TestAnalysis') + def test_generate_license_data_by_id(app): """ From d25ee478aba0a851ad8625475e9ea53a0f129a84 Mon Sep 17 00:00:00 2001 From: mhabedan <67378401+mhabedan@users.noreply.github.com> Date: Fri, 5 Sep 2025 11:37:15 +0100 Subject: [PATCH 28/32] Space around == according to Python style guide Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- hepdata/modules/records/utils/analyses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 9777c9bc1..3bcfb5fbc 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -82,7 +82,7 @@ def update_analyses(endpoint=None): schema_version = r_json["schema_version"] test_analyses_schema(r_json, schema_version=schema_version) - if schema_version=="0.1.0": + if schema_version == "0.1.0": analyses = r_json # Check for missing analyses. From 17a2b31c89b165d1bc75c5b9649c5dd94426d62b Mon Sep 17 00:00:00 2001 From: mhabedan <67378401+mhabedan@users.noreply.github.com> Date: Fri, 5 Sep 2025 11:38:31 +0100 Subject: [PATCH 29/32] Another space around == according to Python style guide Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- hepdata/modules/records/utils/analyses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 3bcfb5fbc..9128f7308 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -226,7 +226,7 @@ def update_analyses(endpoint=None): user = get_user_from_id(endpoints[analysis_endpoint]["subscribe_user_id"]) if user: # Check for missing analyses. - if schema_version=="0.1.0": + if schema_version == "0.1.0": for record in analyses: submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user): From 091b8ca32a7e1bd17e76e446aea8c2a48989575d Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Fri, 5 Sep 2025 12:15:10 +0100 Subject: [PATCH 30/32] records: address review by Copilot and codecov-ai * Clarify some code comments and use spaces around comparison operators. * Catch ValidationError exception and add a test for coverage. * Also replace "is not" by "!=" twice in records_test.py. --- hepdata/modules/records/utils/analyses.py | 16 +++++++++++----- tests/records_test.py | 8 ++++++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py index 9128f7308..a33f3328c 100644 --- a/hepdata/modules/records/utils/analyses.py +++ b/hepdata/modules/records/utils/analyses.py @@ -77,10 +77,16 @@ def update_analyses(endpoint=None): r_json = response.json() - schema_version = "0.1.0" # this schema doesn't have "schema_version" field + schema_version = "0.1.0" # default to 0.1.0 for backward compatibility when schema_version field is missing if "schema_version" in r_json: schema_version = r_json["schema_version"] - test_analyses_schema(r_json, schema_version=schema_version) + + # Validate analyses JSON file against the schema. + try: + test_analyses_schema(r_json, schema_version=schema_version) + except jsonschema.exceptions.ValidationError as e: + log.error("Validation error for analyses schema {0} in {1}: {2}".format(schema_version, analysis_endpoint, e)) + continue if schema_version == "0.1.0": analyses = r_json @@ -138,10 +144,10 @@ def update_analyses(endpoint=None): log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format( analysis_endpoint, record)) - else: # schema_version>="1.0.0" + else: # schema_version >= "1.0.0" # Check for missing analyses. for ana in r_json["analyses"]: - inspire_id = str(ana["inspire_id"]) # TODO: make inspire_id an int in get_latest_hepsubmission + inspire_id = str(ana["inspire_id"]) # inspire_id is stored as a string in the database submission = get_latest_hepsubmission(inspire_id=inspire_id, overall_status='finished') if submission: @@ -232,7 +238,7 @@ def update_analyses(endpoint=None): if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user): subscribe(submission.publication_recid, user) - else: # schema_version>="1.0.0" + else: # schema_version >= "1.0.0" for ana in r_json["analyses"]: submission = get_latest_hepsubmission(inspire_id=str(ana["inspire_id"]), overall_status='finished') if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user): diff --git a/tests/records_test.py b/tests/records_test.py index 143831c8e..3f422da99 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -1135,6 +1135,10 @@ def test_update_analyses(app): current_app.config["ANALYSES_ENDPOINTS"]["TestAnalysis"] = {} update_analyses('TestAnalysis') + # Call update_analyses using an endpoint_url that will fail validation. + current_app.config["ANALYSES_ENDPOINTS"]["TestAnalysis"]['endpoint_url'] = 'https://www.hepdata.net/search/?format=json&size=1' + update_analyses('TestAnalysis') + def test_generate_license_data_by_id(app): """ @@ -1371,7 +1375,7 @@ def test_version_related_functions(app): expected_backward_sub_relations = [] # Finished records will have other record references appear - if test["overall_status"] is not "todo": + if test["overall_status"] != "todo": expected_backward_sub_relations.append(test["other_recid"]) assert [sub.publication_recid for sub in backward_sub_relations] == expected_backward_sub_relations @@ -1394,7 +1398,7 @@ def test_version_related_functions(app): expected_backward_dt_relations = [] # We expect unfinished records to NOT have `other_recid` tables - if test["overall_status"] is not "todo": + if test["overall_status"] != "todo": expected_backward_dt_relations.append(f"10.17182/hepdata.{test['other_recid']}.v2/t{table_number}") # Here we expect the second table to reference ITS OWN table one From 597566964538282d3e2c4b6a5c6c4ea58624f136 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Fri, 5 Sep 2025 13:31:18 +0100 Subject: [PATCH 31/32] tests: filter out error message for favicon.ico * submission_schema, analyses_schema and ping in test_general_pages return either JSON or "OK" where favicon.ico is not present. --- tests/e2e/conftest.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 7973ca477..8757f3e84 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -247,11 +247,16 @@ def finalizer(): # Filter out error message for: # WARNING: security - Error with Permissions-Policy header: # Origin trial controlled feature not enabled: 'interest-cohort' - temp_log = [t for t in log if 'interest-cohort' not in t['message']] + log = [t for t in log if 'interest-cohort' not in t['message']] - assert len(temp_log) == 0, \ + # Filter out error message for: + # SEVERE: http://localhost:5555/favicon.ico - Failed to load resource: + # the server responded with a status of 404 (Not Found) + log = [t for t in log if 'favicon.ico' not in t['message']] + + assert len(log) == 0, \ "Errors in browser log:\n" + \ - "\n".join([f"{line['level']}: {line['message']}" for line in temp_log]) + "\n".join([f"{line['level']}: {line['message']}" for line in log]) @pytest.fixture() From 3897cffcbcf96aedfd6b942b374151836033d520 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Mon, 8 Sep 2025 16:24:30 +0100 Subject: [PATCH 32/32] fix grammar/typos --- hepdata/templates/analyses_schema/0.1.0/readme.md | 2 +- hepdata/templates/analyses_schema/1.0.0/analyses_schema.json | 4 ++-- hepdata/templates/analyses_schema/1.0.0/readme.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hepdata/templates/analyses_schema/0.1.0/readme.md b/hepdata/templates/analyses_schema/0.1.0/readme.md index 54bd10963..40f80d227 100644 --- a/hepdata/templates/analyses_schema/0.1.0/readme.md +++ b/hepdata/templates/analyses_schema/0.1.0/readme.md @@ -24,7 +24,7 @@ A minimal example for an analyses JSON adhering to the standard looks like this: ## Testing an implementation -Whether an analyses JSON file adheres to the standard defined here, can be with python checked as follows: +Whether an analyses JSON file adheres to the standard defined here, can be checked with python as follows: ```python import json import jsonschema diff --git a/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json b/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json index 2e7557d01..d86771cbd 100644 --- a/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json +++ b/hepdata/templates/analyses_schema/1.0.0/analyses_schema.json @@ -89,7 +89,7 @@ "$defs": { "Analysis": { - "description": "An analysis, identified by the INSPIRE ID, implemented at least once in a tool", + "description": "An analysis, identified by the INSPIRE ID, implemented at least once in the tool", "type": "object", "required": ["inspire_id", "implementations"], @@ -120,7 +120,7 @@ }, "Implementation": { - "description": "An implementation of an analysis in a tool, giving the internal name to retrieve information", + "description": "An implementation of an analysis in the tool, giving the internal name to retrieve information", "type": "object", "required": ["name"], diff --git a/hepdata/templates/analyses_schema/1.0.0/readme.md b/hepdata/templates/analyses_schema/1.0.0/readme.md index ba5abae97..0a3132242 100644 --- a/hepdata/templates/analyses_schema/1.0.0/readme.md +++ b/hepdata/templates/analyses_schema/1.0.0/readme.md @@ -109,7 +109,7 @@ The following fields are included in the standard but not required: ### Additional unknown fields Apart from the fields mentioned above, the standard allows for any number of additional fields. -These are however not standardised are not being being checked by the schema. +These are however not standardised and not checked by the schema. ## Examples @@ -140,7 +140,7 @@ See [here](../../../../tests/test_data/analyses_example.json) for a more elabora ## Testing an implementation -Whether an analyses JSON file adheres to the standard defined here, can be with python checked as follows: +Whether an analyses JSON file adheres to the standard defined here, can be checked with python as follows: ```python import json import jsonschema