From 79e5f320bca607031f7d04944b8146da60ca4402 Mon Sep 17 00:00:00 2001
From: nextstrain-bot <78992647+nextstrain-bot@users.noreply.github.com>
Date: Tue, 14 Apr 2026 22:15:26 +0000
Subject: [PATCH 1/2] git subrepo pull (merge) --force ingest/vendored

subrepo:
  subdir:   "ingest/vendored"
  merged:   "c29898f"
upstream:
  origin:   "https://github.com/nextstrain/ingest"
  branch:   "main"
  commit:   "c29898f"
git-subrepo:
  version:  "0.4.9"
  origin:   "https://github.com/ingydotnet/git-subrepo"
  commit:   "5e0f401"
---
 ingest/vendored/.github/dependabot.yml        |   2 +-
 ingest/vendored/.github/workflows/ci.yaml     |   2 +-
 .../.github/workflows/pre-commit.yaml         |   4 +-
 ingest/vendored/.gitrepo                      |   6 +-
 ingest/vendored/README.md                     |  70 ++++----
 ingest/vendored/notify-slack                  |  56 ------
 ingest/vendored/scripts/assign-colors         |  96 +++++++++++
 .../{ => scripts}/cloudfront-invalidate       |   0
 .../vendored/{ => scripts}/download-from-s3   |   0
 .../{ => scripts}/fetch-from-ncbi-entrez      |   0
 ingest/vendored/{ => scripts}/notify-on-diff  |   0
 .../vendored/{ => scripts}/notify-on-job-fail |   0
 .../{ => scripts}/notify-on-job-start         |   0
 .../{ => scripts}/notify-on-record-change     |   0
 ingest/vendored/scripts/notify-slack          |  93 ++++++++++
 .../vendored/{ => scripts}/s3-object-exists   |   0
 ingest/vendored/{ => scripts}/sha256sum       |   0
 ingest/vendored/{ => scripts}/trigger         |   0
 .../{ => scripts}/trigger-on-new-data         |   0
 ingest/vendored/{ => scripts}/upload-to-s3    |   0
 ingest/vendored/snakemake/config.smk          | 156 +++++++++++++++++
 ingest/vendored/snakemake/remote_files.smk    | 159 ++++++++++++++++++
 22 files changed, 551 insertions(+), 93 deletions(-)
 delete mode 100755 ingest/vendored/notify-slack
 create mode 100755 ingest/vendored/scripts/assign-colors
 rename ingest/vendored/{ => scripts}/cloudfront-invalidate (100%)
 rename ingest/vendored/{ => scripts}/download-from-s3 (100%)
 rename ingest/vendored/{ => scripts}/fetch-from-ncbi-entrez (100%)
 rename ingest/vendored/{ => scripts}/notify-on-diff (100%)
 rename ingest/vendored/{ => scripts}/notify-on-job-fail (100%)
 rename ingest/vendored/{ => scripts}/notify-on-job-start (100%)
 rename ingest/vendored/{ => scripts}/notify-on-record-change (100%)
 create mode 100755 ingest/vendored/scripts/notify-slack
 rename ingest/vendored/{ => scripts}/s3-object-exists (100%)
 rename ingest/vendored/{ => scripts}/sha256sum (100%)
 rename ingest/vendored/{ => scripts}/trigger (100%)
 rename ingest/vendored/{ => scripts}/trigger-on-new-data (100%)
 rename ingest/vendored/{ => scripts}/upload-to-s3 (100%)
 create mode 100644 ingest/vendored/snakemake/config.smk
 create mode 100644 ingest/vendored/snakemake/remote_files.smk

diff --git a/ingest/vendored/.github/dependabot.yml b/ingest/vendored/.github/dependabot.yml
index 89bd084..0a50ee1 100644
--- a/ingest/vendored/.github/dependabot.yml
+++ b/ingest/vendored/.github/dependabot.yml
@@ -4,7 +4,7 @@
 # Each ecosystem is checked on a scheduled interval defined below.  To trigger
 # a check manually, go to
 #
-#   https://github.com/nextstrain/ingest/network/updates
+#   https://github.com/nextstrain/shared/network/updates
 #
 # and look for a "Check for updates" button.  You may need to click around a
 # bit first.
diff --git a/ingest/vendored/.github/workflows/ci.yaml b/ingest/vendored/.github/workflows/ci.yaml
index c716277..6dba82b 100644
--- a/ingest/vendored/.github/workflows/ci.yaml
+++ b/ingest/vendored/.github/workflows/ci.yaml
@@ -11,5 +11,5 @@ jobs:
   shellcheck:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - uses: nextstrain/.github/actions/shellcheck@master
diff --git a/ingest/vendored/.github/workflows/pre-commit.yaml b/ingest/vendored/.github/workflows/pre-commit.yaml
index 70da533..c63b890 100644
--- a/ingest/vendored/.github/workflows/pre-commit.yaml
+++ b/ingest/vendored/.github/workflows/pre-commit.yaml
@@ -7,8 +7,8 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.12"
       - uses: pre-commit/action@v3.0.1
diff --git a/ingest/vendored/.gitrepo b/ingest/vendored/.gitrepo
index 544d60f..53c414e 100644
--- a/ingest/vendored/.gitrepo
+++ b/ingest/vendored/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = https://github.com/nextstrain/ingest
 	branch = main
-	commit = 258ab8ce898a88089bc88caee336f8d683a0e79a
-	parent = a7af2c05fc4ccc822c8ef38f0001dc5e8bee803b
+	commit = c29898f7c32c3f85d65db235d23a78e776f89120
+	parent = 2d06e5de2f761a090d45e0bfcb8b1d510fffdc83
 	method = merge
-	cmdver = 0.4.7
+	cmdver = 0.4.9
diff --git a/ingest/vendored/README.md b/ingest/vendored/README.md
index a2b54cb..d4b1874 100644
--- a/ingest/vendored/README.md
+++ b/ingest/vendored/README.md
@@ -1,6 +1,6 @@
-# ingest
+# shared
 
-Shared internal tooling for pathogen data ingest.  Used by our individual
+Shared internal tooling for pathogen workflows.  Used by our individual
 pathogen repos which produce Nextstrain builds.  Expected to be vendored by
 each pathogen repo using `git subrepo`.
 
@@ -9,47 +9,47 @@ Some tools may only live here temporarily before finding a permanent home in
 
 ## Vendoring
 
-Nextstrain maintained pathogen repos will use [`git subrepo`](https://github.com/ingydotnet/git-subrepo) to vendor ingest scripts.
-(See discussion on this decision in https://github.com/nextstrain/ingest/issues/3)
+Nextstrain maintained pathogen repos will use [`git subrepo`](https://github.com/ingydotnet/git-subrepo) to vendor shared scripts.
+(See discussion on this decision in https://github.com/nextstrain/shared/issues/3)
 
 For a list of Nextstrain repos that are currently using this method, use [this
 GitHub code search](https://github.com/search?type=code&q=org%3Anextstrain+subrepo+%22remote+%3D+https%3A%2F%2Fgithub.com%2Fnextstrain%2Fingest%22).
 
 If you don't already have `git subrepo` installed, follow the [git subrepo installation instructions](https://github.com/ingydotnet/git-subrepo#installation).
-Then add the latest ingest scripts to the pathogen repo by running:
+Then add the latest shared scripts to the pathogen repo by running:
 
 ```
-git subrepo clone https://github.com/nextstrain/ingest ingest/vendored
+git subrepo clone https://github.com/nextstrain/shared shared/vendored
 ```
 
-Any future updates of ingest scripts can be pulled in with:
+Any future updates of shared scripts can be pulled in with:
 
 ```
-git subrepo pull ingest/vendored
+git subrepo pull shared/vendored
 ```
 
 If you run into merge conflicts and would like to pull in a fresh copy of the
-latest ingest scripts, pull with the `--force` flag:
+latest shared scripts, pull with the `--force` flag:
 
 ```
-git subrepo pull ingest/vendored --force
+git subrepo pull shared/vendored --force
 ```
 
 > **Warning**
 > Beware of rebasing/dropping the parent commit of a `git subrepo` update
 
-`git subrepo` relies on metadata in the `ingest/vendored/.gitrepo` file,
+`git subrepo` relies on metadata in the `shared/vendored/.gitrepo` file,
 which includes the hash for the parent commit in the pathogen repos.
 If this hash no longer exists in the commit history, there will be errors when
 running future `git subrepo pull` commands.
 
 If you run into an error similar to the following:
 ```
-$ git subrepo pull ingest/vendored
-git-subrepo: Command failed: 'git branch subrepo/ingest/vendored '.
+$ git subrepo pull shared/vendored
+git-subrepo: Command failed: 'git branch subrepo/shared/vendored '.
 fatal: not a valid object name: ''
 ```
-Check the parent commit hash in the `ingest/vendored/.gitrepo` file and make
+Check the parent commit hash in the `shared/vendored/.gitrepo` file and make
 sure the commit exists in the commit history. Update to the appropriate parent
 commit hash if needed.
 
@@ -84,39 +84,49 @@ approach to "ingest" has been discussed in various internal places, including:
 
 ## Scripts
 
-Scripts for supporting ingest workflow automation that don’t really belong in any of our existing tools.
+Scripts for supporting workflow automation that don’t really belong in any of our existing tools.
 
-- [notify-on-diff](notify-on-diff) - Send Slack message with diff of a local file and an S3 object
-- [notify-on-job-fail](notify-on-job-fail) - Send Slack message with details about failed workflow job on GitHub Actions and/or AWS Batch
-- [notify-on-job-start](notify-on-job-start) - Send Slack message with details about workflow job on GitHub Actions and/or AWS Batch
-- [notify-on-record-change](notify-on-recod-change) - Send Slack message with details about line count changes for a file compared to an S3 object's metadata `recordcount`.
+- [assign-colors](scripts/assign-colors) - Generate colors.tsv for augur export based on ordering, color schemes, and what exists in the metadata. Used in the phylogenetic or nextclade workflows.
+- [notify-on-diff](scripts/notify-on-diff) - Send Slack message with diff of a local file and an S3 object
+- [notify-on-job-fail](scripts/notify-on-job-fail) - Send Slack message with details about failed workflow job on GitHub Actions and/or AWS Batch
+- [notify-on-job-start](scripts/notify-on-job-start) - Send Slack message with details about workflow job on GitHub Actions and/or AWS Batch
+- [notify-on-record-change](scripts/notify-on-record-change) - Send Slack message with details about line count changes for a file compared to an S3 object's metadata `recordcount`.
   If the S3 object's metadata does not have `recordcount`, then will attempt to download S3 object to count lines locally, which only supports `xz` compressed S3 objects.
-- [notify-slack](notify-slack) - Send message or file to Slack
-- [s3-object-exists](s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
-- [trigger](trigger) - Triggers downstream GitHub Actions via the GitHub API using repository_dispatch events.
-- [trigger-on-new-data](trigger-on-new-data) - Triggers downstream GitHub Actions if the provided `upload-to-s3` outputs do not contain the `identical_file_message`
+- [notify-slack](scripts/notify-slack) - Send message or file to Slack
+- [s3-object-exists](scripts/s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
+- [trigger](scripts/trigger) - Triggers downstream GitHub Actions via the GitHub API using repository_dispatch events.
+- [trigger-on-new-data](scripts/trigger-on-new-data) - Triggers downstream GitHub Actions if the provided `upload-to-s3` outputs do not contain the `identical_file_message`
   A hacky way to ensure that we only trigger downstream phylogenetic builds if the S3 objects have been updated.
 
+
 NCBI interaction scripts that are useful for fetching public metadata and sequences.
 
-- [fetch-from-ncbi-entrez](fetch-from-ncbi-entrez) - Fetch metadata and nucleotide sequences from [NCBI Entrez](https://www.ncbi.nlm.nih.gov/books/NBK25501/) and output to a GenBank file.
+- [fetch-from-ncbi-entrez](scripts/fetch-from-ncbi-entrez) - Fetch metadata and nucleotide sequences from [NCBI Entrez](https://www.ncbi.nlm.nih.gov/books/NBK25501/) and output to a GenBank file.
   Useful for pathogens with metadata and annotations in custom fields that are not part of the standard [NCBI Datasets](https://www.ncbi.nlm.nih.gov/datasets/) outputs.
 
-Historically, some pathogen repos used the undocumented NCBI Virus API through [fetch-from-ncbi-virus](https://github.com/nextstrain/ingest/blob/c97df238518171c2b1574bec0349a55855d1e7a7/fetch-from-ncbi-virus) to fetch data. However we've opted to drop the NCBI Virus scripts due to https://github.com/nextstrain/ingest/issues/18.
+Historically, some pathogen repos used the undocumented NCBI Virus API through [fetch-from-ncbi-virus](https://github.com/nextstrain/shared/blob/c97df238518171c2b1574bec0349a55855d1e7a7/fetch-from-ncbi-virus) to fetch data. However we've opted to drop the NCBI Virus scripts due to https://github.com/nextstrain/shared/issues/18.
 
 Potential Nextstrain CLI scripts
 
-- [sha256sum](sha256sum) - Used to check if files are identical in upload-to-s3 and download-from-s3 scripts.
-- [cloudfront-invalidate](cloudfront-invalidate) - CloudFront invalidation is already supported in the [nextstrain remote command for S3 files](https://github.com/nextstrain/cli/blob/a5dda9c0579ece7acbd8e2c32a4bbe95df7c0bce/nextstrain/cli/remote/s3.py#L104).
+- [sha256sum](scripts/sha256sum) - Used to check if files are identical in upload-to-s3 and download-from-s3 scripts.
+- [cloudfront-invalidate](scripts/cloudfront-invalidate) - CloudFront invalidation is already supported in the [nextstrain remote command for S3 files](https://github.com/nextstrain/cli/blob/a5dda9c0579ece7acbd8e2c32a4bbe95df7c0bce/nextstrain/cli/remote/s3.py#L104).
   This exists as a separate script to support CloudFront invalidation when using the upload-to-s3 script.
-- [upload-to-s3](upload-to-s3) - Upload file to AWS S3 bucket with compression based on file extension in S3 URL.
+- [upload-to-s3](scripts/upload-to-s3) - Upload file to AWS S3 bucket with compression based on file extension in S3 URL.
   Skips upload if the local file's hash is identical to the S3 object's metadata `sha256sum`.
   Adds the following user defined metadata to uploaded S3 object:
-    - `sha256sum` - hash of the file generated by [sha256sum](sha256sum)
+    - `sha256sum` - hash of the file generated by [sha256sum](scripts/sha256sum)
     - `recordcount` - the line count of the file
-- [download-from-s3](download-from-s3) - Download file from AWS S3 bucket with decompression based on file extension in S3 URL.
+- [download-from-s3](scripts/download-from-s3) - Download file from AWS S3 bucket with decompression based on file extension in S3 URL.
   Skips download if the local file already exists and has a hash identical to the S3 object's metadata `sha256sum`.
 
+## Snakemake
+
+Snakemake workflow functions that are shared across many pathogen workflows that don’t really belong in any of our existing tools.
+
+- [config.smk](snakemake/config.smk) - Shared functions for handling workflow configs.
+- [remote_files.smk](snakemake/remote_files.smk) - Exposes the `path_or_url` function which will use Snakemake's storage plugins to download/upload files to remote providers as needed.
+
+
 ## Software requirements
 
 Some scripts may require Bash ≥4. If you are running these scripts on macOS, the builtin Bash (`/bin/bash`) does not meet this requirement. You can install [Homebrew's Bash](https://formulae.brew.sh/formula/bash) which is more up to date.
diff --git a/ingest/vendored/notify-slack b/ingest/vendored/notify-slack
deleted file mode 100755
index a343435..0000000
--- a/ingest/vendored/notify-slack
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
-: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
-
-upload=0
-output=/dev/null
-thread_ts=""
-broadcast=0
-args=()
-
-for arg; do
-    case "$arg" in
-        --upload)
-            upload=1;;
-        --output=*)
-            output="${arg#*=}";;
-        --thread-ts=*)
-            thread_ts="${arg#*=}";;
-        --broadcast)
-            broadcast=1;;
-        *)
-            args+=("$arg");;
-    esac
-done
-
-set -- "${args[@]}"
-
-text="${1:?Some message text is required.}"
-
-if [[ "$upload" == 1 ]]; then
-    echo "Uploading data to Slack with the message: $text"
-    curl https://slack.com/api/files.upload \
-        --header "Authorization: Bearer $SLACK_TOKEN" \
-        --form-string channels="$SLACK_CHANNELS" \
-        --form-string title="$text" \
-        --form-string filename="$text" \
-        --form-string thread_ts="$thread_ts" \
-        --form file=@/dev/stdin \
-        --form filetype=text \
-        --fail --silent --show-error \
-        --http1.1 \
-        --output "$output"
-else
-    echo "Posting Slack message: $text"
-    curl https://slack.com/api/chat.postMessage \
-        --header "Authorization: Bearer $SLACK_TOKEN" \
-        --form-string channel="$SLACK_CHANNELS" \
-        --form-string text="$text" \
-        --form-string thread_ts="$thread_ts" \
-        --form-string reply_broadcast="$broadcast" \
-        --fail --silent --show-error \
-        --http1.1 \
-        --output "$output"
-fi
diff --git a/ingest/vendored/scripts/assign-colors b/ingest/vendored/scripts/assign-colors
new file mode 100755
index 0000000..e42a44d
--- /dev/null
+++ b/ingest/vendored/scripts/assign-colors
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""
+Generate colors.tsv for augur export based on ordering, color schemes, and
+traits that exists in the metadata.
+"""
+import argparse
+import pandas as pd
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Assign colors based on defined ordering of traits.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    parser.add_argument('--ordering', type=str, required=True,
+        help="""Input TSV file defining the color ordering where the first
+        column is the field and the second column is the trait in that field.
+        Blank lines are ignored. Lines starting with '#' will be ignored as comments.""")
+    parser.add_argument('--color-schemes', type=str, required=True,
+        help="Input color schemes where each line is a different color scheme separated by tabs.")
+    parser.add_argument('--metadata', type=str,
+        help="""If provided, restrict colors to only those traits found in
+        metadata. If the metadata includes a 'focal' column that only contains
+        boolean values, then restrict colors to traits for rows where 'focal'
+        is set to True.""")
+    parser.add_argument('--output', type=str, required=True,
+        help="Output colors TSV file to be passed to augur export.")
+    args = parser.parse_args()
+
+    assignment = {}
+    with open(args.ordering) as f:
+        for line in f.readlines():
+            array = line.strip().split("\t")
+            # Ignore empty lines or commented lines
+            if not array or not array[0] or array[0].startswith('#'):
+                continue
+            # Throw a warning if encountering a line not matching the expected number of columns, ignore line
+            elif len(array)!=2:
+                print(f"WARNING: Could not decode color ordering line: {line}")
+                continue
+            # Otherwise, process color ordering where we expect 2 columns: name, traits
+            else:
+                name = array[0]
+                trait = array[1]
+                if name not in assignment:
+                    assignment[name] = [trait]
+                else:
+                    assignment[name].append(trait)
+
+    # if metadata supplied, go through and
+    # 1. remove assignments that don't exist in metadata
+    # 2. remove assignments that have 'focal' set to 'False' in metadata
+    if args.metadata:
+        metadata = pd.read_csv(args.metadata, delimiter='\t')
+        for name, trait in assignment.items():
+            if name in metadata:
+                if 'focal' in metadata and metadata['focal'].dtype == 'bool':
+                    focal_list = metadata.loc[metadata['focal'], name].unique()
+                    subset_focal = [x for x in assignment[name] if x in focal_list]
+                    assignment[name] = subset_focal
+                else: # no 'focal' present
+                    subset_present = [x for x in assignment[name] if x in metadata[name].unique()]
+                    assignment[name] = subset_present
+
+
+    schemes = {}
+    counter = 0
+    with open(args.color_schemes) as f:
+        for line in f.readlines():
+            counter += 1
+            array = line.lstrip().rstrip().split("\t")
+            schemes[counter] = array
+
+    with open(args.output, 'w') as f:
+        for trait_name, trait_array in assignment.items():
+            if len(trait_array)==0:
+                print(f"No traits found for {trait_name}")
+                continue
+            if len(schemes)<len(trait_array):
+              print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
+              remain = len(trait_array)
+              color_array = []
+              while(remain>0):
+                if (remain>len(schemes)):
+                  color_array = [*color_array, *schemes[len(schemes)]]
+                  remain -= len(schemes)
+                else:
+                  color_array = [*color_array, *schemes[remain]]
+                  remain = 0
+            else:
+              color_array = schemes[len(trait_array)]
+
+            zipped = list(zip(trait_array, color_array))
+            for trait_value, color in zipped:
+                f.write(trait_name + "\t" + trait_value + "\t" + color + "\n")
+            f.write("\n")
diff --git a/ingest/vendored/cloudfront-invalidate b/ingest/vendored/scripts/cloudfront-invalidate
similarity index 100%
rename from ingest/vendored/cloudfront-invalidate
rename to ingest/vendored/scripts/cloudfront-invalidate
diff --git a/ingest/vendored/download-from-s3 b/ingest/vendored/scripts/download-from-s3
similarity index 100%
rename from ingest/vendored/download-from-s3
rename to ingest/vendored/scripts/download-from-s3
diff --git a/ingest/vendored/fetch-from-ncbi-entrez b/ingest/vendored/scripts/fetch-from-ncbi-entrez
similarity index 100%
rename from ingest/vendored/fetch-from-ncbi-entrez
rename to ingest/vendored/scripts/fetch-from-ncbi-entrez
diff --git a/ingest/vendored/notify-on-diff b/ingest/vendored/scripts/notify-on-diff
similarity index 100%
rename from ingest/vendored/notify-on-diff
rename to ingest/vendored/scripts/notify-on-diff
diff --git a/ingest/vendored/notify-on-job-fail b/ingest/vendored/scripts/notify-on-job-fail
similarity index 100%
rename from ingest/vendored/notify-on-job-fail
rename to ingest/vendored/scripts/notify-on-job-fail
diff --git a/ingest/vendored/notify-on-job-start b/ingest/vendored/scripts/notify-on-job-start
similarity index 100%
rename from ingest/vendored/notify-on-job-start
rename to ingest/vendored/scripts/notify-on-job-start
diff --git a/ingest/vendored/notify-on-record-change b/ingest/vendored/scripts/notify-on-record-change
similarity index 100%
rename from ingest/vendored/notify-on-record-change
rename to ingest/vendored/scripts/notify-on-record-change
diff --git a/ingest/vendored/scripts/notify-slack b/ingest/vendored/scripts/notify-slack
new file mode 100755
index 0000000..c6f1a87
--- /dev/null
+++ b/ingest/vendored/scripts/notify-slack
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
+: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
+
+upload=0
+output=/dev/null
+thread_ts=""
+broadcast=0
+fail_on_error=0
+args=()
+
+for arg; do
+    case "$arg" in
+        --upload)
+            upload=1;;
+        --output=*)
+            output="${arg#*=}";;
+        --thread-ts=*)
+            thread_ts="${arg#*=}";;
+        --broadcast)
+            broadcast=1;;
+        --fail-on-error)
+            fail_on_error=1;;
+        *)
+            args+=("$arg");;
+    esac
+done
+
+set -- "${args[@]}"
+
+text="${1:?Some message text is required.}"
+
+send_slack_message() {
+    if [[ "$upload" == 1 ]]; then
+        echo "Uploading data to Slack with the message: $text"
+
+        upload_file="$(mktemp -t upload-file-XXXXXX)"
+        trap "rm -f '$upload_file'" EXIT
+
+        cat /dev/stdin > "$upload_file"
+        # printf used to strip whitespace from output of macOS/BSD wc
+        # See <https://github.com/nextstrain/shared/pull/47#discussion_r1974802967>
+        length=$(printf '%d' "$(<"$upload_file" wc -c)")
+
+        upload_info=$(curl https://slack.com/api/files.getUploadURLExternal \
+            --header "Authorization: Bearer $SLACK_TOKEN" \
+            --form-string filename="$text" \
+            --form-string length="$length" \
+            --fail --silent --show-error \
+            --http1.1 )
+
+        upload_url="$(jq -r .upload_url <<< "$upload_info")"
+        curl "$upload_url" \
+            --form-string filename="$text" \
+            --form file="@$upload_file" \
+            --fail --silent --show-error \
+            --http1.1 > /dev/null
+
+        files_uploaded="$(jq -r "[{id: .file_id}]" <<< "$upload_info")"
+        curl -X POST https://slack.com/api/files.completeUploadExternal \
+            --header "Authorization: Bearer $SLACK_TOKEN" \
+            --form-string channel_id="$SLACK_CHANNELS" \
+            --form-string thread_ts="$thread_ts" \
+            --form-string files="$files_uploaded" \
+            --fail --silent --show-error \
+            --http1.1 \
+            --output "$output"
+
+    else
+        echo "Posting Slack message: $text"
+        curl https://slack.com/api/chat.postMessage \
+            --header "Authorization: Bearer $SLACK_TOKEN" \
+            --form-string channel="$SLACK_CHANNELS" \
+            --form-string text="$text" \
+            --form-string thread_ts="$thread_ts" \
+            --form-string reply_broadcast="$broadcast" \
+            --fail --silent --show-error \
+            --http1.1 \
+            --output "$output"
+    fi
+}
+
+if ! send_slack_message; then
+    if [[ "$fail_on_error" == 1 ]]; then
+        echo "Sending Slack message failed"
+        exit 1
+    else
+        echo "Sending Slack message failed, but exiting with success anyway."
+        exit 0
+    fi
+fi
diff --git a/ingest/vendored/s3-object-exists b/ingest/vendored/scripts/s3-object-exists
similarity index 100%
rename from ingest/vendored/s3-object-exists
rename to ingest/vendored/scripts/s3-object-exists
diff --git a/ingest/vendored/sha256sum b/ingest/vendored/scripts/sha256sum
similarity index 100%
rename from ingest/vendored/sha256sum
rename to ingest/vendored/scripts/sha256sum
diff --git a/ingest/vendored/trigger b/ingest/vendored/scripts/trigger
similarity index 100%
rename from ingest/vendored/trigger
rename to ingest/vendored/scripts/trigger
diff --git a/ingest/vendored/trigger-on-new-data b/ingest/vendored/scripts/trigger-on-new-data
similarity index 100%
rename from ingest/vendored/trigger-on-new-data
rename to ingest/vendored/scripts/trigger-on-new-data
diff --git a/ingest/vendored/upload-to-s3 b/ingest/vendored/scripts/upload-to-s3
similarity index 100%
rename from ingest/vendored/upload-to-s3
rename to ingest/vendored/scripts/upload-to-s3
diff --git a/ingest/vendored/snakemake/config.smk b/ingest/vendored/snakemake/config.smk
new file mode 100644
index 0000000..76b2b75
--- /dev/null
+++ b/ingest/vendored/snakemake/config.smk
@@ -0,0 +1,156 @@
+"""
+Shared functions to be used within a Snakemake workflow for handling
+workflow configs.
+"""
+import os
+import sys
+import yaml
+from collections.abc import Callable
+from typing import Optional
+from textwrap import dedent, indent
+
+
+# Set search paths for Augur
+if "AUGUR_SEARCH_PATHS" in os.environ:
+    print(dedent(f"""\
+        Using existing search paths in AUGUR_SEARCH_PATHS:
+
+            {os.environ["AUGUR_SEARCH_PATHS"]!r}
+        """), file=sys.stderr)
+else:
+    # Note that this differs from the search paths used in
+    # resolve_config_path().
+    # This is the preferred default moving forwards, and the plan is to
+    # eventually update resolve_config_path() to use AUGUR_SEARCH_PATHS.
+    search_paths = [
+        # User analysis directory
+        Path.cwd(),
+
+        # Workflow defaults folder
+        Path(workflow.basedir) / "defaults",
+
+        # Workflow root (contains Snakefile)
+        Path(workflow.basedir),
+    ]
+
+    # This should work for majority of workflows, but we could consider doing a
+    # more thorough search for the nextstrain-pathogen.yaml. This would likely
+    # replicate how CLI searches for the root.¹
+    # ¹ <https://github.com/nextstrain/cli/blob/d5e184c5/nextstrain/cli/command/build.py#L413-L420>
+    repo_root = Path(workflow.basedir) / ".."
+    if (repo_root / "nextstrain-pathogen.yaml").is_file():
+        search_paths.extend([
+            # Pathogen repo root
+            repo_root,
+        ])
+
+    search_paths = [path.resolve() for path in search_paths if path.is_dir()]
+
+    os.environ["AUGUR_SEARCH_PATHS"] = ":".join(map(str, search_paths))
+
+
+class InvalidConfigError(Exception):
+    pass
+
+
+def resolve_config_path(path: str, defaults_dir: Optional[str] = None) -> Callable:
+    """
+    Resolve a relative *path* given in a configuration value. Will always try to
+    resolve *path* after expanding wildcards with Snakemake's `expand` functionality.
+
+    Returns the path for the first existing file, checked in the following order:
+    1. relative to the analysis directory or workdir, usually given by ``--directory`` (``-d``)
+    2. relative to *defaults_dir* if it's provided
+    3. relative to the workflow's ``defaults/`` directory if *defaults_dir* is _not_ provided
+
+    This behaviour allows a default configuration value to point to a default
+    auxiliary file while also letting the file used be overridden either by
+    setting an alternate file path in the configuration or by creating a file
+    with the conventional name in the workflow's analysis directory.
+    """
+    global workflow
+
+    def _resolve_config_path(wildcards):
+        try:
+            expanded_path = expand(path, **wildcards)[0]
+        except snakemake.exceptions.WildcardError as e:
+            available_wildcards = "\n".join(f"  - {wildcard}" for wildcard in wildcards)
+            raise snakemake.exceptions.WildcardError(indent(dedent(f"""\
+                {str(e)}
+
+                However, resolve_config_path({{path}}) requires the wildcard.
+
+                Wildcards available for this path are:
+
+                {{available_wildcards}}
+
+                Hint: Check that the config path value does not misspell the wildcard name
+                and that the rule actually uses the wildcard name.
+                """.lstrip("\n").rstrip()).format(path=repr(path), available_wildcards=available_wildcards), " " * 4))
+
+        if os.path.exists(expanded_path):
+            return expanded_path
+
+        if defaults_dir:
+            defaults_path = os.path.join(defaults_dir, expanded_path)
+        else:
+            # Special-case defaults/… for backwards compatibility with older
+            # configs.  We could achieve the same behaviour with a symlink
+            # (defaults/defaults → .) but that seems less clear.
+            if path.startswith("defaults/"):
+                defaults_path = os.path.join(workflow.basedir, expanded_path)
+            else:
+                defaults_path = os.path.join(workflow.basedir, "defaults", expanded_path)
+
+        if os.path.exists(defaults_path):
+            return defaults_path
+
+        raise InvalidConfigError(indent(dedent(f"""\
+            Unable to resolve the config-provided path {path!r},
+            expanded to {expanded_path!r} after filling in wildcards.
+            The workflow does not include the default file {defaults_path!r}.
+
+            Hint: Check that the file {expanded_path!r} exists in your analysis
+            directory or remove the config param to use the workflow defaults.
+            """), " " * 4))
+
+    return _resolve_config_path
+
+
+def write_config(path, section=None):
+    """
+    Write Snakemake's 'config' variable, or a section of it, to a file.
+
+    *section* is an optional list of keys to navigate to a specific section of
+    config. If provided, only that section will be written.
+    """
+    global config
+
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+
+    data = config
+    section_str = "config"
+
+    if section:
+        # Navigate to the specified section
+        for key in section:
+            # Error if key doesn't exist
+            if key not in data:
+                raise Exception(f"ERROR: Key {key!r} not found in {section_str!r}.")
+
+            data = data[key]
+            section_str += f".{key}"
+
+            # Error if value is not a mapping
+            if not isinstance(data, dict):
+                raise Exception(f"ERROR: {section_str!r} is not a mapping of key/value pairs.")
+
+    with open(path, 'w') as f:
+        yaml.dump(data, f, sort_keys=False, Dumper=NoAliasDumper)
+
+    print(f"Saved {section_str!r} to {path!r}.", file=sys.stderr)
+
+
+class NoAliasDumper(yaml.SafeDumper):
+    def ignore_aliases(self, data):
+        return True
diff --git a/ingest/vendored/snakemake/remote_files.smk b/ingest/vendored/snakemake/remote_files.smk
new file mode 100644
index 0000000..844f80e
--- /dev/null
+++ b/ingest/vendored/snakemake/remote_files.smk
@@ -0,0 +1,159 @@
+"""
+Helper functions to set-up storage plugins for remote inputs/outputs. See the
+docstring of `path_or_url` for usage instructions.
+
+The errors raised by storage plugins are often confusing. For instance, a HTTP
+404 error will result in a `MissingInputException` with little hint as to the
+underlying issue. S3 credentials errors are similarly confusing and we attempt
+to check these ourselves to improve UX here.
+"""
+
+from urllib.parse import urlparse
+
+# Keep a list of known public buckets, which we'll allow uncredentialled (unsigned) access to
+# We could make this config-definable in the future
+PUBLIC_BUCKETS = set(['nextstrain-data'])
+
+# Keep track of registered storage plugins to enable reuse
+_storage_registry = {}
+
+class RemoteFilesMissingCredentials(Exception):
+    pass
+
+def _storage_s3(*, bucket, keep_local, retries) -> snakemake.storage.StorageProviderProxy:
+    """
+    Registers and returns an instance of snakemake-storage-plugin-s3. Typically AWS
+    credentials are required for _any_ request however we allow requests to known
+    public buckets (see `PUBLIC_BUCKETS`) to be unsigned which allows for a nice user
+    experience in the common case of downloading inputs from s3://nextstrain-data.
+
+    The intended behaviour for various (S3) URIs supplied to `path_or_url` is:
+
+    |          | S3 buckets                 | credentials present | credentials missing |
+    |----------|----------------------------|---------------------|---------------------|
+    | download | private / private + public | signed              | Credentials Error   |
+    |          | public                     | signed              | unsigned            |
+    | upload   | private / private + public | signed              | Credentials Error   |
+    |          | public                     | signed              | AccessDenied Error  |
+    """
+    # If the bucket is public then we may use an unsigned request which has the nice UX
+    # of not needing credentials to be present. If we've made other signed requests _or_
+    # credentials are present then we just sign everything. This has implications for upload:
+    # if you attempt to upload to a public bucket without credentials then we allow that here
+    # and you'll get a subsequent `AccessDenied` error when the upload is attempted.
+    if bucket in PUBLIC_BUCKETS and \
+        "s3_signed" not in _storage_registry and \
+        ("s3_unsigned" in _storage_registry or not _aws_credentials_present()):
+
+        if provider:=_storage_registry.get('s3_unsigned', None):
+            return provider
+
+        from botocore import UNSIGNED # dependency of snakemake-storage-plugin-s3
+        storage s3_unsigned:
+            provider="s3",
+            signature_version=UNSIGNED,
+            retries=retries,
+            keep_local=keep_local,
+
+        _storage_registry['s3_unsigned'] = storage.s3_unsigned
+        return _storage_registry['s3_unsigned']
+
+    # Resource fetched/uploaded via a signed request, which will require AWS credentials
+    if provider:=_storage_registry.get('s3_signed', None):
+        return provider
+
+    # Enforce the presence of credentials to paper over <https://github.com/snakemake/snakemake/issues/3663>
+    if not _aws_credentials_present():
+        raise RemoteFilesMissingCredentials()
+
+    # the tag appears in the local file path, so reference 'signed' to give a hint about credential errors
+    storage s3_signed:
+        provider="s3",
+        retries=retries,
+        keep_local=keep_local,
+
+    _storage_registry['s3_signed'] = storage.s3_signed
+    return _storage_registry['s3_signed']
+
+def _aws_credentials_present() -> bool:
+    import boto3 # dependency of snakemake-storage-plugin-s3
+    session = boto3.Session()
+    creds = session.get_credentials()
+    return creds is not None
+
+def _storage_http(*, keep_local, retries) -> snakemake.storage.StorageProviderProxy:
+    """
+    Registers and returns an instance of snakemake-storage-plugin-http
+    """
+    if provider:=_storage_registry.get('http', None):
+        return provider
+
+    storage:
+        provider="http",
+        allow_redirects=True,
+        supports_head=True,
+        keep_local=keep_local,
+        retries=retries,
+
+    _storage_registry['http'] = storage.http
+    return _storage_registry['http']
+
+
+def path_or_url(uri, *, keep_local=True, retries=2) -> str:
+    """
+    Intended for use in Snakemake inputs / outputs to transparently use remote
+    resources. Returns the URI wrapped by an applicable storage plugin. Local
+    filepaths will be returned unchanged.
+
+    For example, the following rule will download inputs from HTTPs and upload
+    the output to S3:
+
+        rule filter:
+            input:
+                sequences = path_or_url("https://data.nextstrain.org/..."),
+                metadata = path_or_url("https://data.nextstrain.org/..."),
+            output:
+                sequences = path_or_url("s3://...")
+            shell:
+                r'''
+                augur filter \
+                    --sequences {input.sequences:q} \
+                    --metadata {input.metadata:q} \
+                    --metadata-id-columns accession \
+                    --output-sequences {output.sequences:q}
+                '''
+
+    If *keep_local* is True (the default) then downloaded/uploaded files will
+    remain in `.snakemake/storage/`. The presence of a previously downloaded
+    file (via `keep_local=True`) does not guarantee that the file will not be
+    re-downloaded if the storage plugin decides the local file is out of date.
+
+    Depending on the *uri* authentication may be required. See the specific
+    helper functions (such as `_storage_s3`) for more details.
+
+    See <https://snakemake.readthedocs.io/en/stable/snakefiles/storage.html> for
+    more information on Snakemake storage plugins. Note: various snakemake
+    plugins will be required depending on the URIs provided.
+    """
+    info = urlparse(uri)
+
+    if info.scheme=='': # local
+        return uri      # no storage wrapper
+
+    if info.scheme=='s3':
+        try:
+            return _storage_s3(bucket=info.netloc, keep_local=keep_local, retries=retries)(uri)
+        except RemoteFilesMissingCredentials as e:
+            raise Exception(f"AWS credentials are required to access {uri!r}") from e
+
+    if info.scheme=='https':
+        return _storage_http(keep_local=keep_local, retries=retries)(uri)
+    elif info.scheme=='http':
+        raise Exception(f"HTTP remote file support is not implemented in nextstrain workflows (attempting to access {uri!r}).\n"
+            "Please use an HTTPS address instead.")
+
+    if info.scheme in ['gs', 'gcs']:
+        raise Exception(f"Google Storage is not yet implemented for nextstrain workflows (attempting to access {uri!r}).\n"
+            "Please get in touch if you require this functionality and we can add it to our workflows")
+
+    raise Exception(f"Input address {uri!r} (scheme={info.scheme!r}) is from a non-supported remote")

From 23e46bcf26357abf9108bb32d3e2baa4ab8b9dd1 Mon Sep 17 00:00:00 2001
From: Jover Lee <joverlee521@gmail.com>
Date: Wed, 15 Apr 2026 10:19:19 -0700
Subject: [PATCH 2/2] ingest: fix vendored script paths

Follow up to automated update of the vendored subrepo in the previous
commit.
---
 ingest/build-configs/nextstrain-automation/upload.smk | 2 +-
 ingest/rules/fetch_from_ncbi.smk                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ingest/build-configs/nextstrain-automation/upload.smk b/ingest/build-configs/nextstrain-automation/upload.smk
index 1f1929b..d51ec53 100644
--- a/ingest/build-configs/nextstrain-automation/upload.smk
+++ b/ingest/build-configs/nextstrain-automation/upload.smk
@@ -30,7 +30,7 @@ rule upload_to_s3:
         cloudfront_domain=config["cloudfront_domain"],
     shell:
         r"""
-        ./vendored/upload-to-s3 \
+        ./vendored/scripts/upload-to-s3 \
             {params.quiet} \
             {input.file_to_upload:q} \
             {params.s3_dst:q}/{wildcards.remote_file:q} \
diff --git a/ingest/rules/fetch_from_ncbi.smk b/ingest/rules/fetch_from_ncbi.smk
index db26308..8370670 100644
--- a/ingest/rules/fetch_from_ncbi.smk
+++ b/ingest/rules/fetch_from_ncbi.smk
@@ -95,7 +95,7 @@ rule fetch_ncbi_entrez_data:
         r"""
         exec &> >(tee {log:q})
 
-        vendored/fetch-from-ncbi-entrez \
+        vendored/scripts/fetch-from-ncbi-entrez \
             --term {params.term:q} \
             --output {output.genbank:q}
         """