From 94d9fdb27272c3d04f887412c5110cdb9959e568 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 21 May 2025 10:10:16 +0100 Subject: [PATCH 1/9] allow download as yoda.h5 format --- hepdata/config.py | 2 +- hepdata/ext/opensearch/document_enhancers.py | 2 +- hepdata/modules/converter/views.py | 14 +++++++------- .../modules/records/utils/data_processing_utils.py | 6 ++++++ .../templates/hepdata_theme/pages/formats.html | 4 +++- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/hepdata/config.py b/hepdata/config.py index 9065b9d39..cbbc39de3 100644 --- a/hepdata/config.py +++ b/hepdata/config.py @@ -196,7 +196,7 @@ def _(x): CFG_SEARCH_RANGE_TERMS = ["recid", "publication_recid", "inspire_id"] # Possible terms used to OpenSearch API range searches CFG_CONVERTER_URL = 'https://converter.hepdata.net' -CFG_SUPPORTED_FORMATS = ['yaml', 'root', 'csv', 'yoda', 'yoda1', 'original'] +CFG_SUPPORTED_FORMATS = ['yaml', 'root', 'csv', 'yoda', 'yoda1', 'yoda.h5', 'original'] CFG_CONVERTER_TIMEOUT = 220 # timeout in seconds CFG_TMPDIR = tempfile.gettempdir() diff --git a/hepdata/ext/opensearch/document_enhancers.py b/hepdata/ext/opensearch/document_enhancers.py index 52d68fdf5..a2e217cd6 100644 --- a/hepdata/ext/opensearch/document_enhancers.py +++ b/hepdata/ext/opensearch/document_enhancers.py @@ -37,7 +37,7 @@ from hepdata.modules.submission.models import DataSubmission from hepdata.utils.miscellaneous import get_resource_data -FORMATS = ['json', 'root', 'yaml', 'csv', 'yoda'] +FORMATS = ['json', 'root', 'yaml', 'csv', 'yoda', 'yoda.h5'] logging.basicConfig() log = logging.getLogger(__name__) diff --git a/hepdata/modules/converter/views.py b/hepdata/modules/converter/views.py index 8b808deea..be6dc9cda 100644 --- a/hepdata/modules/converter/views.py +++ b/hepdata/modules/converter/views.py @@ -60,7 +60,7 @@ @blueprint.route(f'/submission//') @blueprint.route(f'/submission///') -@blueprint.route('/submission////') +@blueprint.route('/submission////') def download_submission_with_inspire_id(*args, **kwargs): """ Gets the submission file and either serves it back directly from YAML, or converts it @@ -71,7 +71,7 @@ def download_submission_with_inspire_id(*args, **kwargs): :param inspire_id: inspire id :param version: version of submission to export. If absent, returns the latest. - :param file_format: json, yaml, csv, root, yoda, yoda1 or original + :param file_format: json, yaml, csv, root, yoda, yoda1, yoda.h5 or original :param rivet: Rivet analysis name to override default written in YODA export :return: download_submission """ @@ -115,7 +115,7 @@ def download_submission_with_inspire_id(*args, **kwargs): @blueprint.route(f'/submission//') @blueprint.route(f'/submission///') -@blueprint.route('/submission////') +@blueprint.route('/submission////') def download_submission_with_recid(*args, **kwargs): """ Gets the submission file and either serves it back directly from YAML, or converts it @@ -126,7 +126,7 @@ def download_submission_with_recid(*args, **kwargs): :param recid: submissions recid :param version: version of submission to export. If absent, returns the latest. - :param file_format: json, yaml, csv, root, yoda, yoda1 or original + :param file_format: json, yaml, csv, root, yoda, yoda1, yoda.h5 or original :param rivet: Rivet analysis name to override default written in YODA export :return: download_submission """ @@ -160,7 +160,7 @@ def download_submission(submission, file_format, offline=False, force=False, riv for other formats. :param submission: HEPSubmission - :param file_format: json, yaml, csv, root, yoda, yoda1 or original + :param file_format: json, yaml, csv, root, yoda, yoda1, yoda.h5 or original :param offline: offline creation of the conversion when a record is finalised :param force: force recreation of the conversion :param rivet_analysis_name: Rivet analysis name to override default written in YODA export @@ -253,7 +253,7 @@ def download_submission(submission, file_format, offline=False, force=False, riv @blueprint.route(f'/table///') @blueprint.route(f'/table////') -@blueprint.route('/table/////') +@blueprint.route('/table/////') def download_data_table_by_inspire_id(*args, **kwargs): """ Downloads the latest data file given the url ``/download/submission/ins1283842/Table 1/yaml`` or @@ -324,7 +324,7 @@ def download_data_table_by_inspire_id(*args, **kwargs): @blueprint.route(f'/table///') @blueprint.route(f'/table////') -@blueprint.route('/table/////') +@blueprint.route('/table/////') def download_data_table_by_recid(*args, **kwargs): """ Record ID download. diff --git a/hepdata/modules/records/utils/data_processing_utils.py b/hepdata/modules/records/utils/data_processing_utils.py index d8c492049..4f3279174 100644 --- a/hepdata/modules/records/utils/data_processing_utils.py +++ b/hepdata/modules/records/utils/data_processing_utils.py @@ -311,6 +311,11 @@ def process_ctx(ctx, light_mode=False): _cleaned_table_name = data_table['name'].replace('%', '%25').replace('\\', '%5C') + #data_table['data'] = {} + #for format in ['json', 'root', 'csv', 'yoda', 'yoda1', 'yoda.h5', 'yaml']: + # data_table[format] = '{0}/download/table/{1}/{2}/{4}'.format( + # site_url, _recid, _cleaned_table_name, format) + data_table['data'] = { 'json': '{0}/download/table/{1}/{2}/json'.format( site_url, _recid, _cleaned_table_name), @@ -325,4 +330,5 @@ def process_ctx(ctx, light_mode=False): 'yaml': '{0}/download/table/{1}/{2}/yaml'.format( site_url, _recid, _cleaned_table_name)} + return ctx diff --git a/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html b/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html index 054d8b26b..70dc4c011 100644 --- a/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html +++ b/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html @@ -109,6 +109,7 @@

Detailed record

toolkit. Again, the appropriate YODA object is written according to the number of independent variables in a table. The default yoda output is now in the YODA2 format, but there is still an option yoda1 to output the legacy YODA1 format. + Alternatively, the output can be given in the yoda.h5 format, designed for parallel computing.

@@ -123,6 +124,7 @@

Detailed record

  • ?format=root
  • ?format=yoda
  • ?format=yoda1
  • +
  • ?format=yoda.h5
  • Optional parameters can also be added (separated by a & symbol):

      @@ -140,7 +142,7 @@

      Detailed record

      the data tables from the response.
    • - rivet=ALICE_2016_I1419244: when using format=yoda or format=yoda1, specify the desired + rivet=ALICE_2016_I1419244: when using format=yoda, format=yoda1 or format=yoda.h5, specify the desired Rivet analysis name to be written in the YODA files if it does not match the automatically generated name.
    From 14993364dd7daffd3187e2ce6d38ad1fcced1fa2 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 21 May 2025 16:40:12 +0100 Subject: [PATCH 2/9] remove unnecessary change --- hepdata/modules/records/utils/data_processing_utils.py | 6 ------ .../theme/templates/hepdata_theme/pages/formats.html | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/hepdata/modules/records/utils/data_processing_utils.py b/hepdata/modules/records/utils/data_processing_utils.py index 4f3279174..d8c492049 100644 --- a/hepdata/modules/records/utils/data_processing_utils.py +++ b/hepdata/modules/records/utils/data_processing_utils.py @@ -311,11 +311,6 @@ def process_ctx(ctx, light_mode=False): _cleaned_table_name = data_table['name'].replace('%', '%25').replace('\\', '%5C') - #data_table['data'] = {} - #for format in ['json', 'root', 'csv', 'yoda', 'yoda1', 'yoda.h5', 'yaml']: - # data_table[format] = '{0}/download/table/{1}/{2}/{4}'.format( - # site_url, _recid, _cleaned_table_name, format) - data_table['data'] = { 'json': '{0}/download/table/{1}/{2}/json'.format( site_url, _recid, _cleaned_table_name), @@ -330,5 +325,4 @@ def process_ctx(ctx, light_mode=False): 'yaml': '{0}/download/table/{1}/{2}/yaml'.format( site_url, _recid, _cleaned_table_name)} - return ctx diff --git a/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html b/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html index 70dc4c011..d3fd2d3f3 100644 --- a/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html +++ b/hepdata/modules/theme/templates/hepdata_theme/pages/formats.html @@ -142,7 +142,7 @@

    Detailed record

    the data tables from the response.
  • - rivet=ALICE_2016_I1419244: when using format=yoda, format=yoda1 or format=yoda.h5, specify the desired + rivet=ALICE_2016_I1419244: when using format=yoda, format=yoda1, or format=yoda.h5, specify the desired Rivet analysis name to be written in the YODA files if it does not match the automatically generated name.
  • From 83adff102bdc071fe04b95cf2a741ac67dba29c2 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 21 May 2025 17:14:37 +0100 Subject: [PATCH 3/9] add yoda-h5 to test cases --- tests/e2e/test_general.py | 3 ++- tests/records_test.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/e2e/test_general.py b/tests/e2e/test_general.py index dfe919eb4..edf45eeab 100644 --- a/tests/e2e/test_general.py +++ b/tests/e2e/test_general.py @@ -269,7 +269,8 @@ def test_accept_headers(app, live_server, e2e_identifiers): {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/root', 'description': 'ROOT file', 'encodingFormat': 'https://root.cern'}, {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yaml', 'description': 'YAML file', 'encodingFormat': 'https://yaml.org'}, {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/csv', 'description': 'CSV file', 'encodingFormat': 'text/csv'}, - {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yoda', 'description': 'YODA file', 'encodingFormat': 'https://yoda.hepforge.org'} + {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yoda', 'description': 'YODA file', 'encodingFormat': 'https://yoda.hepforge.org'}, + {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yoda.h5', 'description': 'YODA-H5 file', 'encodingFormat': 'https://yoda.hepforge.org'} ] # Data resource landing page (use last submission, which has resources) diff --git a/tests/records_test.py b/tests/records_test.py index d701ae69f..ebd44f57f 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -919,6 +919,12 @@ def test_get_json_ld(app, load_default_data, identifiers): 'contentUrl': f'http://localhost:5000/download/table/2/yoda', 'description': 'YODA file', 'encodingFormat': 'https://yoda.hepforge.org' + }, + { + '@type': 'DataDownload', + 'contentUrl': f'http://localhost:5000/download/table/2/yoda.h5', + 'description': 'YODA-H5 file', + 'encodingFormat': 'https://yoda.hepforge.org' } ] assert table_data['includedInDataCatalog'] == { From 671021b8c35c9823a915d5ca17ae413d766d4008 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 6 Aug 2025 14:13:43 +0100 Subject: [PATCH 4/9] compactify creation of links to output tables in JSON, add yoda.h5 to it --- .../records/utils/data_processing_utils.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/hepdata/modules/records/utils/data_processing_utils.py b/hepdata/modules/records/utils/data_processing_utils.py index d8c492049..06991135c 100644 --- a/hepdata/modules/records/utils/data_processing_utils.py +++ b/hepdata/modules/records/utils/data_processing_utils.py @@ -311,18 +311,9 @@ def process_ctx(ctx, light_mode=False): _cleaned_table_name = data_table['name'].replace('%', '%25').replace('\\', '%5C') - data_table['data'] = { - 'json': '{0}/download/table/{1}/{2}/json'.format( - site_url, _recid, _cleaned_table_name), - 'root': '{0}/download/table/{1}/{2}/root'.format( - site_url, _recid, _cleaned_table_name), - 'csv': '{0}/download/table/{1}/{2}/csv'.format( - site_url, _recid, _cleaned_table_name), - 'yoda': '{0}/download/table/{1}/{2}/yoda'.format( - site_url, _recid, _cleaned_table_name), - 'yoda1': '{0}/download/table/{1}/{2}/yoda1'.format( - site_url, _recid, _cleaned_table_name), - 'yaml': '{0}/download/table/{1}/{2}/yaml'.format( - site_url, _recid, _cleaned_table_name)} + data_table['data'] = {} + for file_format in ['json', 'root', 'csv', 'yoda', 'yoda1', 'yoda.h5', 'yaml']: + data_table['data'][file_format] = '{0}/download/table/{1}/{2}/{3}'.format( + site_url, _recid, _cleaned_table_name, file_format) return ctx From 2ce7ef28e62ca29b95025dde5516a12a81d084b7 Mon Sep 17 00:00:00 2001 From: Martin Habedank Date: Wed, 6 Aug 2025 14:35:17 +0100 Subject: [PATCH 5/9] used dashes instead of dots in file names --- hepdata/modules/converter/views.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hepdata/modules/converter/views.py b/hepdata/modules/converter/views.py index be6dc9cda..fd3c0654b 100644 --- a/hepdata/modules/converter/views.py +++ b/hepdata/modules/converter/views.py @@ -188,7 +188,8 @@ def download_submission(submission, file_format, offline=False, force=False, riv if file_format == 'original': file_format_and_extension = os.path.splitext(data_filepath)[1] else: - file_format_and_extension = '-{0}.tar.gz'.format(file_format) + file_format_dashed = file_format.replace('.', '-') + file_format_and_extension = '-{0}.tar.gz'.format(file_format_dashed) output_file = 'HEPData-{0}-v{1}{2}'.format(file_identifier, submission.version, file_format_and_extension) @@ -219,10 +220,11 @@ def download_submission(submission, file_format, offline=False, force=False, riv print('File created at {0}'.format(output_path)) return + file_format_dashed = file_format.replace('.', '-') converter_options = { 'input_format': 'yaml', 'output_format': file_format, - 'filename': 'HEPData-{0}-v{1}-{2}'.format(file_identifier, submission.version, file_format), + 'filename': 'HEPData-{0}-v{1}-{2}'.format(file_identifier, submission.version, file_format_dashed), 'validator_schema_version': '0.1.0', } From 06ac6aedf33d6b02fc5ffd3890c020cc485a82a6 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Tue, 19 Aug 2025 14:42:02 +0100 Subject: [PATCH 6/9] global: fix tests --- hepdata/ext/opensearch/document_enhancers.py | 4 ++-- hepdata/modules/records/utils/json_ld.py | 3 ++- hepdata/version.py | 2 +- tests/e2e/test_general.py | 2 +- tests/records_test.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/hepdata/ext/opensearch/document_enhancers.py b/hepdata/ext/opensearch/document_enhancers.py index a2e217cd6..50540d9f2 100644 --- a/hepdata/ext/opensearch/document_enhancers.py +++ b/hepdata/ext/opensearch/document_enhancers.py @@ -55,7 +55,7 @@ def add_data_submission_urls(doc): doc['access_urls'] = {'links': {}} for format in FORMATS: - doc['access_urls']['links'][format] = '{0}/download/submission/ins{1}/{2}/{3}'.format( + doc['access_urls']['links'][format.replace('.', '-')] = '{0}/download/submission/ins{1}/{2}/{3}'.format( current_app.config.get('SITE_URL', 'https://www.hepdata.net'), doc['inspire_id'], doc['version'] if 'version' in doc else 1, format) @@ -67,7 +67,7 @@ def add_data_table_urls(doc): _cleaned_table_name = doc['title'].replace('%', '%25').replace('\\', '%5C') - doc['access_urls']['links'][format] = '{0}/download/table/ins{1}/{2}/{3}'.format( + doc['access_urls']['links'][format.replace('.', '-')] = '{0}/download/table/ins{1}/{2}/{3}'.format( current_app.config.get('SITE_URL', 'https://www.hepdata.net'), doc['inspire_id'], _cleaned_table_name, format) diff --git a/hepdata/modules/records/utils/json_ld.py b/hepdata/modules/records/utils/json_ld.py index cb4458e7c..51326f78f 100644 --- a/hepdata/modules/records/utils/json_ld.py +++ b/hepdata/modules/records/utils/json_ld.py @@ -183,7 +183,8 @@ def _add_table_info(data, ctx, data_submission): 'root': 'https://root.cern', 'yaml': 'https://yaml.org', 'csv': 'text/csv', - 'yoda': 'https://yoda.hepforge.org' + 'yoda': 'https://yoda.hepforge.org', + 'yoda.h5': 'https://yoda.hepforge.org' } for download_type, format in download_types.items(): data_downloads.append({ diff --git a/hepdata/version.py b/hepdata/version.py index 809d9ade7..2b90216b4 100644 --- a/hepdata/version.py +++ b/hepdata/version.py @@ -28,4 +28,4 @@ and parsed by ``setup.py``. """ -__version__ = "0.9.4dev20250813" +__version__ = "0.9.4dev20250819" diff --git a/tests/e2e/test_general.py b/tests/e2e/test_general.py index edf45eeab..ef9473fac 100644 --- a/tests/e2e/test_general.py +++ b/tests/e2e/test_general.py @@ -270,7 +270,7 @@ def test_accept_headers(app, live_server, e2e_identifiers): {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yaml', 'description': 'YAML file', 'encodingFormat': 'https://yaml.org'}, {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/csv', 'description': 'CSV file', 'encodingFormat': 'text/csv'}, {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yoda', 'description': 'YODA file', 'encodingFormat': 'https://yoda.hepforge.org'}, - {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yoda.h5', 'description': 'YODA-H5 file', 'encodingFormat': 'https://yoda.hepforge.org'} + {'@type': 'DataDownload', 'contentUrl': 'http://localhost:5000/download/table/1/yoda.h5', 'description': 'YODA.H5 file', 'encodingFormat': 'https://yoda.hepforge.org'} ] # Data resource landing page (use last submission, which has resources) diff --git a/tests/records_test.py b/tests/records_test.py index ebd44f57f..1f6e2cc6f 100644 --- a/tests/records_test.py +++ b/tests/records_test.py @@ -923,7 +923,7 @@ def test_get_json_ld(app, load_default_data, identifiers): { '@type': 'DataDownload', 'contentUrl': f'http://localhost:5000/download/table/2/yoda.h5', - 'description': 'YODA-H5 file', + 'description': 'YODA.H5 file', 'encodingFormat': 'https://yoda.hepforge.org' } ] From 6bb9fb665243161de9607fe6fbf62d5850ee6912 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Tue, 19 Aug 2025 17:21:46 +0100 Subject: [PATCH 7/9] records: simplify using CFG_SUPPORTED_FORMATS * Implement suggestion from review by Codecov AI. --- hepdata/modules/records/utils/data_processing_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hepdata/modules/records/utils/data_processing_utils.py b/hepdata/modules/records/utils/data_processing_utils.py index 06991135c..6d60585b0 100644 --- a/hepdata/modules/records/utils/data_processing_utils.py +++ b/hepdata/modules/records/utils/data_processing_utils.py @@ -24,7 +24,7 @@ from flask import current_app from collections import OrderedDict -import re +from hepdata.config import CFG_SUPPORTED_FORMATS from hepdata.utils.miscellaneous import sanitize_html @@ -312,8 +312,9 @@ def process_ctx(ctx, light_mode=False): _cleaned_table_name = data_table['name'].replace('%', '%25').replace('\\', '%5C') data_table['data'] = {} - for file_format in ['json', 'root', 'csv', 'yoda', 'yoda1', 'yoda.h5', 'yaml']: - data_table['data'][file_format] = '{0}/download/table/{1}/{2}/{3}'.format( - site_url, _recid, _cleaned_table_name, file_format) + for file_format in ['json'] + CFG_SUPPORTED_FORMATS: + if file_format != 'original': + data_table['data'][file_format] = '{0}/download/table/{1}/{2}/{3}'.format( + site_url, _recid, _cleaned_table_name, file_format) return ctx From e42a7e775898453a73e65a43d106fc4eedd9b671 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Tue, 19 Aug 2025 17:23:03 +0100 Subject: [PATCH 8/9] CONTRIBUTING: clarify not to open PRs from forks --- CONTRIBUTING.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5fb573258..f8ba64de3 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -80,7 +80,12 @@ Ready to contribute? Here's how to set up ``hepdata`` for local development. $ git commit -s -m "Your detailed description of your changes." $ git push origin name-of-your-bugfix-or-feature -7. Submit a pull request through the GitHub website, perhaps initially as a +7. The continuous integration (CI) run via GitHub Actions requires access to secrets only available to the +https://github.com/HEPData/hepdata repository, not to forks. Therefore, pull requests should be made only +from branches of this repository, not from forks. You can request Write access to the repository by +sending an email to ``info@hepdata.net``. + +8. Submit a pull request through the GitHub website, perhaps initially as a `draft pull request `_ until you make sure that all checks have passed. From 716624c806740e273c0a9b3d400fe9764edcea2a Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Tue, 19 Aug 2025 19:24:21 +0100 Subject: [PATCH 9/9] tests: access JSON format to increase coverage --- tests/e2e/test_general.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/e2e/test_general.py b/tests/e2e/test_general.py index ef9473fac..c31c6e738 100644 --- a/tests/e2e/test_general.py +++ b/tests/e2e/test_general.py @@ -94,6 +94,18 @@ def test_home(app, live_server, env_browser, e2e_identifiers): # Close download dropdown by clicking again browser.find_element(By.ID, 'dLabel').click() + # Access the JSON format of the record + json_url = browser.find_element(By.ID, 'jsonLabel').get_attribute('href') + assert json_url.endswith('?format=json') + response = requests.get(json_url) + assert response.status_code == 200 + json_data = response.json() + assert len(json_data['data_tables']) == 14 + response = requests.get(json_url + '&light=true') + assert response.status_code == 200 + json_data = response.json() + assert 'data_tables' not in json_data + # Go back to homepage and click on 1st link - should be record with resources browser.back() latest_item = browser.find_elements(By.CSS_SELECTOR, '.latest-record .title')[0]