Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
08d7877
add first draft of schema
mhabedan Apr 25, 2025
60e448d
move test file and add schema test
mhabedan Apr 25, 2025
5f6e5af
add jsonschema package to CI
mhabedan Apr 25, 2025
b49198a
fix schema path
mhabedan Apr 25, 2025
27564c6
add optional field 'implementations_license'
mhabedan May 2, 2025
0bddeb9
specify used default license
mhabedan May 2, 2025
f05c92a
make 'license' field consistent with hepdata-validator (https://githu…
mhabedan May 2, 2025
51445c1
add tool_type field
mhabedan May 6, 2025
503ff8f
rename 'tool_type' field to 'implementations_description'
mhabedan May 13, 2025
0d972a9
add date_created to schema
mhabedan Jun 24, 2025
9f204ba
introduce schema_version field
mhabedan Aug 13, 2025
f4dd175
more renames, add readme
mhabedan Aug 13, 2025
698e8c5
correct url
mhabedan Aug 13, 2025
34df6e1
include schema validation
mhabedan Aug 14, 2025
ab9fce0
Merge branch 'main' into analysisSchema
mhabedan Aug 14, 2025
788c6bb
add schema version 0.1.0
mhabedan Sep 2, 2025
a44e698
test for analyses schema version
mhabedan Sep 2, 2025
87b94b6
Merge branch 'main' into analysisSchema
mhabedan Sep 2, 2025
2014de3
avoid name starting with 'test' because that is being picked up by py…
mhabedan Sep 2, 2025
63fb262
fix typos in schema
mhabedan Sep 3, 2025
9b46ff7
fix implementations_license typo
mhabedan Sep 3, 2025
534e8fc
disallow nested placeholders
mhabedan Sep 4, 2025
c2c5d76
test for v0.1.0 instead of 1.0.0
mhabedan Sep 4, 2025
98c072a
use SModelS as test case, test for implementation license
mhabedan Sep 4, 2025
e747050
fix license id
mhabedan Sep 4, 2025
9657ff6
use license name instead of id
mhabedan Sep 4, 2025
ce4b4ca
Merge branch 'main' into analysisSchema
GraemeWatt Sep 4, 2025
5add1d9
theme: add Flask route to analyses JSON schema
GraemeWatt Sep 5, 2025
ee2d5b7
tests: extend e2e tests for analyses JSON schema
GraemeWatt Sep 5, 2025
3d8f340
tests: extend test_update_analyses for coverage
GraemeWatt Sep 5, 2025
d25ee47
Space around == according to Python style guide
mhabedan Sep 5, 2025
17a2b31
Another space around == according to Python style guide
mhabedan Sep 5, 2025
091b8ca
records: address review by Copilot and codecov-ai
GraemeWatt Sep 5, 2025
5975669
tests: filter out error message for favicon.ico
GraemeWatt Sep 5, 2025
3897cff
fix grammar/typos
mhabedan Sep 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions hepdata/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,7 @@ def _(x):
'description': 'MadAnalysis 5 analysis'
},
'SModelS': {
'endpoint_url': 'https://zenodo.org/records/13952092/files/smodels-analyses.hepdata.json?download=1',
'url_template': '{0}',
'description': 'SModelS analysis',
'endpoint_url': 'https://smodels.github.io/docs/smodels-analyses.hepdata.json',
'subscribe_user_id': 7766
},
Comment thread
GraemeWatt marked this conversation as resolved.
'CheckMATE': {
Expand Down
198 changes: 142 additions & 56 deletions hepdata/modules/records/utils/analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@
# as an Intergovernmental Organization or submit itself to any jurisdiction.

import logging
import os

from celery import shared_task
from flask import current_app
from invenio_db import db
import requests
import json
import jsonschema

from hepdata.ext.opensearch.api import index_record_ids
from hepdata.modules.submission.api import get_latest_hepsubmission, is_resource_added_to_submission
Expand All @@ -40,6 +43,11 @@
logging.basicConfig()
log = logging.getLogger(__name__)

def test_analyses_schema(json_file, schema_version="1.0.0"):
schema_path = os.path.join("hepdata", "templates", "analyses_schema", schema_version, "analyses_schema.json")
with open(schema_path) as f:
schema = json.load(f)
jsonschema.validate(instance=json_file, schema=schema)
Comment thread
GraemeWatt marked this conversation as resolved.

@shared_task
def update_analyses(endpoint=None):
Expand All @@ -50,6 +58,7 @@ def update_analyses(endpoint=None):

:param endpoint: either "rivet" or "MadAnalysis" or "SModelS" or "CheckMATE" or "HackAnalysis" or "Combine" or None (default) for all
"""

endpoints = current_app.config["ANALYSES_ENDPOINTS"]
for analysis_endpoint in endpoints:

Expand All @@ -64,62 +73,130 @@ def update_analyses(endpoint=None):

if response and response.status_code == 200:

analyses = response.json()

analysis_resources = DataResource.query.filter_by(file_type=analysis_endpoint).all()

# Check for missing analyses.
for record in analyses:
submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished')

if submission:
num_new_resources = 0

for analysis in analyses[record]:
_resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis)

if not is_resource_added_to_submission(submission.publication_recid, submission.version,
_resource_url):

log.info('Adding {} analysis to ins{} with URL {}'.format(
analysis_endpoint, record, _resource_url)
)
new_resource = DataResource(
file_location=_resource_url,
file_type=analysis_endpoint)

if "description" in endpoints[analysis_endpoint]:
new_resource.file_description = str(endpoints[analysis_endpoint]["description"])
r_json = response.json()

if "license" in endpoints[analysis_endpoint]:
resource_license = get_license(endpoints[analysis_endpoint]["license"])
new_resource.file_license = resource_license.id
schema_version = "0.1.0" # default to 0.1.0 for backward compatibility when schema_version field is missing
if "schema_version" in r_json:
schema_version = r_json["schema_version"]

submission.resources.append(new_resource)
num_new_resources += 1
# Validate analyses JSON file against the schema.
try:
test_analyses_schema(r_json, schema_version=schema_version)
except jsonschema.exceptions.ValidationError as e:
log.error("Validation error for analyses schema {0} in {1}: {2}".format(schema_version, analysis_endpoint, e))
continue

else:

# Remove resources from 'analysis_resources' list.
resources = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))
for resource in resources:
analysis_resources.remove(resource)

if num_new_resources:

try:
db.session.add(submission)
db.session.commit()
latest_submission = get_latest_hepsubmission(inspire_id=record)
if submission.version == latest_submission.version:
index_record_ids([submission.publication_recid])
except Exception as e:
db.session.rollback()
log.error(e)

else:
log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format(
analysis_endpoint, record))
if schema_version == "0.1.0":
analyses = r_json

# Check for missing analyses.
for record in analyses:
submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished')

if submission:
num_new_resources = 0

for analysis in analyses[record]:
_resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis)

if not is_resource_added_to_submission(submission.publication_recid, submission.version,
_resource_url):

log.info('Adding {} analysis to ins{} with URL {}'.format(
analysis_endpoint, record, _resource_url)
)
new_resource = DataResource(
file_location=_resource_url,
file_type=analysis_endpoint)

if "description" in endpoints[analysis_endpoint]:
new_resource.file_description = str(endpoints[analysis_endpoint]["description"])

if "license" in endpoints[analysis_endpoint]:
resource_license = get_license(endpoints[analysis_endpoint]["license"])
new_resource.file_license = resource_license.id

submission.resources.append(new_resource)
num_new_resources += 1

else:

# Remove resources from 'analysis_resources' list.
resources = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))
for resource in resources:
analysis_resources.remove(resource)

if num_new_resources:

try:
db.session.add(submission)
db.session.commit()
latest_submission = get_latest_hepsubmission(inspire_id=record)
if submission.version == latest_submission.version:
index_record_ids([submission.publication_recid])
except Exception as e:
db.session.rollback()
log.error(e)

else:
log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format(
analysis_endpoint, record))

else: # schema_version >= "1.0.0"
# Check for missing analyses.
for ana in r_json["analyses"]:
inspire_id = str(ana["inspire_id"]) # inspire_id is stored as a string in the database
submission = get_latest_hepsubmission(inspire_id=inspire_id, overall_status='finished')

if submission:
num_new_resources = 0

for implementation in ana["implementations"]:
_resource_url = r_json["url_templates"]["main_url"].format(**implementation)

if not is_resource_added_to_submission(submission.publication_recid, submission.version,
_resource_url):

log.info('Adding {} analysis to ins{} with URL {}'.format(
analysis_endpoint, inspire_id, _resource_url)
)
new_resource = DataResource(
file_location=_resource_url,
file_type=analysis_endpoint,
file_description=r_json["implementations_description"]
)

if "implementations_license" in r_json:
resource_license = get_license(r_json["implementations_license"])
new_resource.file_license = resource_license.id

submission.resources.append(new_resource)
num_new_resources += 1

else:

# Remove resources from 'analysis_resources' list.
resources = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))
for resource in resources:
analysis_resources.remove(resource)

if num_new_resources:

try:
db.session.add(submission)
db.session.commit()
latest_submission = get_latest_hepsubmission(inspire_id=inspire_id)
if submission.version == latest_submission.version:
index_record_ids([submission.publication_recid])
except Exception as e:
db.session.rollback()
log.error(e)

else:
log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format(
analysis_endpoint, inspire_id))

if analysis_resources:
# Extra resources that were not found in the analyses JSON file.
Expand Down Expand Up @@ -154,10 +231,19 @@ def update_analyses(endpoint=None):
if "subscribe_user_id" in endpoints[analysis_endpoint]:
user = get_user_from_id(endpoints[analysis_endpoint]["subscribe_user_id"])
if user:
for record in analyses:
submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished')
if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user):
subscribe(submission.publication_recid, user)
# Check for missing analyses.
if schema_version == "0.1.0":
for record in analyses:
submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished')
if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user):
subscribe(submission.publication_recid, user)

else: # schema_version >= "1.0.0"
for ana in r_json["analyses"]:
submission = get_latest_hepsubmission(inspire_id=str(ana["inspire_id"]), overall_status='finished')
if submission and not is_current_user_subscribed_to_record(submission.publication_recid, user):
subscribe(submission.publication_recid, user)


else:
log.debug("No endpoint url configured for {0}".format(analysis_endpoint))
log.debug("No endpoint_url configured for {0}".format(analysis_endpoint))
10 changes: 9 additions & 1 deletion hepdata/modules/theme/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
"""Theme blueprint in order for template and static files to be loaded."""

import re
import json

from flask import Blueprint, render_template, current_app, redirect, request, url_for
from flask import Blueprint, render_template, current_app, redirect, request, url_for, jsonify
from hepdata_validator import LATEST_SCHEMA_VERSION, RAW_SCHEMAS_URL

from hepdata.modules.email.utils import send_flask_message_email
Expand Down Expand Up @@ -84,6 +85,13 @@ def submission_schema(jsonschema):
return redirect(RAW_SCHEMAS_URL + '/' + jsonschema)


@blueprint.route('/analyses/schemas/<path:jsonschema>')
def analyses_schema(jsonschema):
with current_app.open_resource('templates/analyses_schema/' + jsonschema) as jsonfile:
schema = json.load(jsonfile)
return jsonify(schema)
Comment thread
GraemeWatt marked this conversation as resolved.


@blueprint.route('/cookies')
def cookie_policy():
return render_template('hepdata_theme/pages/cookies.html')
Expand Down
17 changes: 17 additions & 0 deletions hepdata/templates/analyses_schema/0.1.0/analyses_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://hepdata.net/analyses/schemas/0.1.0/analyses_schema.json",
"title": "HEPData analysis tool schema",
"description": "A JSON schema for tracking implementations of HEPData analyses in different tools",
"type": "object",
"patternProperties": {
"^[0-9]+$": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1
}
},
"additionalProperties": false
}
38 changes: 38 additions & 0 deletions hepdata/templates/analyses_schema/0.1.0/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# About the analyses JSON schema

This readme details a JSON schema which is used by reinterpretation tools to communicate to HEPData which analyses are implemented in that tool and where to find the implementations.

## The standard

The standard is quite simple: the whole file is basically a dictionary where the keys are the different INSPIRE IDs for the analyses implemented in the tool and the values are lists of tool-internal names for the reimplentations, i.e.
```JSON
{
"<INSPIRE ID>": ["<implementation 1>", "<implementation 2>"]
}
```

No other fields are allowed.

## Example
A minimal example for an analyses JSON adhering to the standard looks like this:
```JSON
{
"100592": ["MARKI_1975_I100592", "MARKI_ALTERNATIVE_IMPLEMENTATION"],
"1081268": ["LHCB_2013_I1081268"]
}
```

## Testing an implementation

Whether an analyses JSON file adheres to the standard defined here, can be checked with python as follows:
```python
import json
import jsonschema

with open("analyses_schema.json") as f:
schema = json.load(f)
with open("analyses_example.json") as f:
test = json.load(f)

jsonschema.validate(instance=test, schema=schema)
```
Loading