HEPData · GraemeWatt · Oct 17, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 16, 2025
diff --git a/fixes/add_analyses.py b/fixes/add_analyses.py
@@ -0,0 +1,71 @@
+import click
+import logging
+
+from celery import shared_task
+from flask import current_app
+from flask.cli import with_appcontext
+from invenio_db import db
+
+from hepdata.celery import dynamic_tasks
+from hepdata.config import SIMPLEANALYSIS_FILE_TYPE, HS3_FILE_TYPE
+from hepdata.cli import fix
+from hepdata.ext.opensearch.api import reindex_batch
+from hepdata.modules.submission.api import get_latest_hepsubmission
+from hepdata.modules.submission.models import HEPSubmission
+from hepdata.modules.records.utils.common import is_analysis
+
+logging.basicConfig()
+log = logging.getLogger(__name__)
+
+@fix.command()
+@with_appcontext
+@click.option('--analyses-type', '-a', type=str, help=f"e.g. '{SIMPLEANALYSIS_FILE_TYPE}' or '{HS3_FILE_TYPE}'.")
+@click.option('--batch-size', '-b', type=int, default=20,
+              help='Number of hepsubmission entries to check at a time.')
+@click.option('--synchronous', '-s', type=bool, default=False)
+def add_analyses(analyses_type, batch_size, synchronous=False):
+    """Check all submissions for resources with analyses_type in the description but not as the type."""
+
+    if analyses_type not in (SIMPLEANALYSIS_FILE_TYPE, HS3_FILE_TYPE):
+        log.error(f"analyses-type must be '{SIMPLEANALYSIS_FILE_TYPE}' or '{HS3_FILE_TYPE}'")
+        return
+
+    all_ids = db.session.query(HEPSubmission.id).order_by(HEPSubmission.id).all()
+
+    count = 0
+    total = len(all_ids)
+    while count < total:
+        batch_ids = [i[0] for i in all_ids[count:min(count + batch_size, total)]]
+        if synchronous:
+            _add_analyses_batch(analyses_type, batch_ids)
+        else:
+            log.info('Sending batch of IDs {0} to {1} to celery'.format(batch_ids[0], batch_ids[-1]))
+            dynamic_tasks.delay('_add_analyses_batch', 'add_analyses', analyses_type, batch_ids)
+        count += batch_size
+
+
+@shared_task
+def _add_analyses_batch(analyses_type, ids):
+    log.info(f"Checking for {analyses_type} resources in submission ids {ids}")
+    recids_to_reindex = []
+    for id in ids:
+        hepsubmission = HEPSubmission.query.get(id)
+
+        if hepsubmission:
+            for resource in hepsubmission.resources:
+                if resource.file_type != analyses_type and is_analysis(analyses_type, resource.file_description):
+                    log.info(f"Found {analyses_type} for resource {resource.file_location}")
+                    # Update resource to have type analyses_type
+                    resource.file_type = analyses_type
+                    db.session.add(resource)
+                    db.session.commit()
+
+                    # Check if this is the latest finished submission - reindex if so
+                    latest_submission = get_latest_hepsubmission(publication_recid=hepsubmission.publication_recid, overall_status='finished')
+                    if latest_submission and latest_submission.version == hepsubmission.version:
+                        recids_to_reindex.append(hepsubmission.id)
+
+    if recids_to_reindex:
+        recids_to_reindex = list(set(recids_to_reindex))  # remove duplicates before indexing
+        log.info(f"Reindexing records: {recids_to_reindex}")
+        reindex_batch(recids_to_reindex, current_app.config['OPENSEARCH_INDEX'])
diff --git a/fixes/add_histfactory_analyses.py b/fixes/add_histfactory_analyses.py
diff --git a/hepdata/config.py b/hepdata/config.py
@@ -369,6 +369,8 @@ def _(x):
 }
 
 HISTFACTORY_FILE_TYPE = 'HistFactory'
+HS3_FILE_TYPE = 'HS3'
+SIMPLEANALYSIS_FILE_TYPE = 'SimpleAnalysis'
 NUISANCE_FILE_TYPE = 'ProSelecta'
 
 ADMIN_EMAIL = 'info@hepdata.net'

diff --git a/hepdata/ext/opensearch/document_enhancers.py b/hepdata/ext/opensearch/document_enhancers.py
@@ -30,7 +30,8 @@
 from dateutil.parser import parse
 from flask import current_app
 
-from hepdata.config import CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE, NUISANCE_FILE_TYPE
+from hepdata.config import (CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE,
+                            HS3_FILE_TYPE, SIMPLEANALYSIS_FILE_TYPE, NUISANCE_FILE_TYPE)
 from hepdata.ext.opensearch.config.record_mapping import mapping as os_mapping
 from hepdata.modules.permissions.models import SubmissionParticipant
 from hepdata.modules.submission.api import get_latest_hepsubmission
@@ -104,12 +105,12 @@ def add_analyses(doc):
     if latest_submission:
         doc["analyses"] = []
         for reference in latest_submission.resources:
-            if reference.file_type in current_app.config['ANALYSES_ENDPOINTS']:
+            if reference.file_type in current_app.config['ANALYSES_ENDPOINTS'] and reference.file_location.lower().startswith('http'):
                 doc["analyses"].append({'type': reference.file_type, 'analysis': reference.file_location})
             else:
                 site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
                 landing_page_url = f"{site_url}/record/resource/{reference.id}?landing_page=true"
-                if reference.file_type == HISTFACTORY_FILE_TYPE:
+                if reference.file_type in (HISTFACTORY_FILE_TYPE, HS3_FILE_TYPE, SIMPLEANALYSIS_FILE_TYPE):
                     doc["analyses"].append({'type': reference.file_type, 'analysis': landing_page_url,
                                             'filename': os.path.basename(reference.file_location)})
                 elif reference.file_type == NUISANCE_FILE_TYPE:

diff --git a/hepdata/modules/records/utils/analyses.py b/hepdata/modules/records/utils/analyses.py
@@ -205,6 +205,8 @@ def update_analyses(endpoint=None):
                     try:
                         recids_to_reindex = []
                         for extra_analysis_resource in analysis_resources:
+                            if not extra_analysis_resource.file_location.lower().startswith('http'):
+                                continue  # don't delete local files from database
                             query = db.select([data_reference_link.columns.submission_id]).where(
                                 data_reference_link.columns.dataresource_id == extra_analysis_resource.id)
                             results = db.session.execute(query)

diff --git a/hepdata/modules/records/utils/common.py b/hepdata/modules/records/utils/common.py
@@ -30,7 +30,8 @@
 import os
 from sqlalchemy.orm.exc import NoResultFound
 
-from hepdata.config import HISTFACTORY_FILE_TYPE, NUISANCE_FILE_TYPE, SIZE_LOAD_CHECK_THRESHOLD
+from hepdata.config import (HISTFACTORY_FILE_TYPE, HS3_FILE_TYPE, SIMPLEANALYSIS_FILE_TYPE,
+                            NUISANCE_FILE_TYPE, SIZE_LOAD_CHECK_THRESHOLD)
 from hepdata.ext.opensearch.api import get_record
 from hepdata.modules.submission.models import HEPSubmission, License, DataSubmission, DataResource
 
@@ -74,9 +75,6 @@
 
 ALLOWED_EXTENSIONS = ('.zip', '.tar', '.tar.gz', '.tgz', '.oldhepdata', '.yaml', '.yaml.gz')
 
-HISTFACTORY_EXTENSIONS = ALLOWED_EXTENSIONS[:4] + ('.tar.xz', '.json')
-HISTFACTORY_TERMS = ("histfactory", "pyhf", "likelihoods", "workspaces")
-
 
 def contains_accepted_url(file):
     for pattern in URL_PATTERNS:
@@ -96,17 +94,12 @@ def is_image(filename):
     return False
 
 
-def is_histfactory(filename, description, type=None):
-    if type and type.lower() == HISTFACTORY_FILE_TYPE.lower():
+def is_analysis(analyses_type, description, type=None):
+    if type and type.lower() == analyses_type.lower():
         return True
 
-    if filename.endswith(HISTFACTORY_EXTENSIONS):
-        description_lc = description.lower()
-        for term in HISTFACTORY_TERMS:
-            if term in description_lc:
-                return True
-
-    return False
+    description_lc = description.lower()
+    return True if analyses_type.lower() in description_lc else False
 
 
 def infer_file_type(file, description, type=None):
@@ -115,7 +108,11 @@ def infer_file_type(file, description, type=None):
         if result:
             return pattern
         else:
-            if is_histfactory(file, description, type):
+            if is_analysis(SIMPLEANALYSIS_FILE_TYPE, description, type):
+                return SIMPLEANALYSIS_FILE_TYPE
+            elif is_analysis(HS3_FILE_TYPE, description, type):
+                return HS3_FILE_TYPE
+            elif type and type.lower() == HISTFACTORY_FILE_TYPE.lower():
                 return HISTFACTORY_FILE_TYPE
             elif type and type.lower() == NUISANCE_FILE_TYPE.lower():
                 return NUISANCE_FILE_TYPE

diff --git a/hepdata/modules/search/templates/hepdata_search/modals/search_help.html b/hepdata/modules/search/templates/hepdata_search/modals/search_help.html
@@ -277,6 +277,20 @@ <h4>Other useful searches</h4>
                                        (likelihoods in HistFactory format)
                                     </span>
                                 </li>
+                                <li>
+                                    <a href='/search?q=analysis:HS3&sort_by=latest'
+                                       target="_new">analysis:HS3</a>
+                                    <span class="text-muted">
+                                       (likelihoods in HS3 format)
+                                    </span>
+                                </li>
+                                <li>
+                                    <a href='/search?q=analysis:SimpleAnalysis&sort_by=latest'
+                                       target="_new">analysis:SimpleAnalysis</a>
+                                    <span class="text-muted">
+                                       (code snippets in SimpleAnalysis format)
+                                    </span>
+                                </li>
                                 <li>
                                     <a href='/search?q=analysis:NUISANCE&sort_by=latest'
                                        target="_new">analysis:NUISANCE</a>

diff --git a/hepdata/version.py b/hepdata/version.py
@@ -28,4 +28,4 @@
 and parsed by ``setup.py``.
 """
 
-__version__ = "0.9.4dev20251013"
+__version__ = "0.9.4dev20251015"
diff --git a/tests/search_test.py b/tests/search_test.py
@@ -335,7 +335,7 @@ def test_search(app, load_default_data, identifiers):
 
     # Test searching of the resources field by type.
     # A bunch of different types to be checked for
-    resource_types = ['png', 'html', 'zenodo', 'dat', 'C++', None]
+    resource_types = ['png', 'html', 'zenodo', 'dat', 'SimpleAnalysis', None]
     for res_type in resource_types:
         # Execute search for the current type
         results = os_api.search(f'resources.type:{res_type}', index=index)
@@ -774,8 +774,9 @@ def test_add_analyses(app):
             "filename": "test.tar.gz"
         },
     ]
-    # This should probably be changed to use SITE_URL or some similar concept
-    analysis_url = "http://localhost:5000/record/resource/%s?landing_page=true"
+
+    site_url = app.config.get('SITE_URL', 'http://localhost:5000')
+    analysis_url = site_url + "/record/resource/%s?landing_page=true"
 
     with app.app_context():
         # Creating and submitting the test submission containing resources
@@ -802,7 +803,7 @@ def test_add_analyses(app):
 
         # Add MadAnalysis DataResource object separately
         mad_analysis_resource = DataResource(
-            file_location = "placeholder",
+            file_location = "https://placeholder",
             file_type = "MadAnalysis",
             file_description = "placeholder"
         )
@@ -1072,36 +1073,36 @@ def test_reindex_batch_large_submission(app, mocker):
     # Mock methods called so we can check they're called with correct parameters
     mock_index_record_ids = mocker.patch('hepdata.ext.opensearch.api.index_record_ids')
     mock_push_data_keywords = mocker.patch('hepdata.ext.opensearch.api.push_data_keywords')
-    
+
     # Mock database query to return a large number of records (250 total)
     mock_db_result = [(1, i) for i in range(2, 252)]  # pub_recid=1, data_recids=2-251
     mocker.patch('hepdata.ext.opensearch.api.db.session.query').return_value.join.return_value.filter.return_value.all.return_value = mock_db_result
-    
+
     # Set up return values for batched calls
     mock_index_record_ids.return_value = {'publication': [1], 'datatable': []}
-    
+
     # Call reindex_batch with a mock submission ID
     os_api.reindex_batch([999], index)
-    
-    # Should be called 3 times: 100 records, 100 records, 51 records  
+
+    # Should be called 3 times: 100 records, 100 records, 51 records
     assert mock_index_record_ids.call_count == 3
-    
+
     # Check the call arguments for batching
     calls = mock_index_record_ids.call_args_list
-    
+
     # First batch: 100 records (1 + first 99 from 2-100)
     first_batch = calls[0][0][0]  # First positional argument of first call
     assert len(first_batch) == 100
     assert 1 in first_batch  # publication record
-    
+
     # Second batch: 100 records (101-200)
     second_batch = calls[1][0][0]
     assert len(second_batch) == 100
-    
-    # Third batch: 51 records (201-251)  
+
+    # Third batch: 51 records (201-251)
     third_batch = calls[2][0][0]
     assert len(third_batch) == 51
-    
+
     # push_data_keywords should be called once at the end
     mock_push_data_keywords.assert_called_once_with(pub_ids=[1, 1, 1])  # Called with accumulated publication IDs