From bc7518079d0f47e120213528c435082718d361ea Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 31 Aug 2023 13:00:19 -0700 Subject: [PATCH 1/5] Use the environment's Bash We definitely don't need "the system bash", which is what /bin/bash is. We want "the bash the user wants", which will often be the system bash, but not always. The main motivation is that /bin/bash on macOS is stuck on an ancient version due to licensing issues, and users might prefer another Bash in their environments. --- cloudfront-invalidate | 2 +- download-from-s3 | 2 +- fetch-from-ncbi-virus | 2 +- notify-on-diff | 2 +- notify-on-job-fail | 2 +- notify-on-job-start | 2 +- notify-on-record-change | 2 +- notify-slack | 2 +- s3-object-exists | 2 +- trigger | 2 +- trigger-on-new-data | 2 +- upload-to-s3 | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cloudfront-invalidate b/cloudfront-invalidate index dec4852..dbea398 100755 --- a/cloudfront-invalidate +++ b/cloudfront-invalidate @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Originally from @tsibley's gist: https://gist.github.com/tsibley/a66262d341dedbea39b02f27e2837ea8 set -euo pipefail diff --git a/download-from-s3 b/download-from-s3 index 44f7ff3..4981186 100755 --- a/download-from-s3 +++ b/download-from-s3 @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail bin="$(dirname "$0")" diff --git a/fetch-from-ncbi-virus b/fetch-from-ncbi-virus index 0c5f3e5..06f7d69 100755 --- a/fetch-from-ncbi-virus +++ b/fetch-from-ncbi-virus @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # usage: fetch-from-ncbi-virus [options] # # Fetch metadata and nucleotide sequences from [NCBI Virus](https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/) diff --git a/notify-on-diff b/notify-on-diff index c304d6b..ddbe7da 100755 --- a/notify-on-diff +++ b/notify-on-diff @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail diff --git a/notify-on-job-fail b/notify-on-job-fail index 02cb6ba..7dd2409 100755 --- a/notify-on-job-fail +++ b/notify-on-job-fail @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}" diff --git a/notify-on-job-start b/notify-on-job-start index 3e44bb0..1c8ce7d 100755 --- a/notify-on-job-start +++ b/notify-on-job-start @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}" diff --git a/notify-on-record-change b/notify-on-record-change index c0bf8f7..f424252 100755 --- a/notify-on-record-change +++ b/notify-on-record-change @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}" diff --git a/notify-slack b/notify-slack index db98bfb..a343435 100755 --- a/notify-slack +++ b/notify-slack @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}" diff --git a/s3-object-exists b/s3-object-exists index faac421..679c20a 100755 --- a/s3-object-exists +++ b/s3-object-exists @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail url="${1#s3://}" diff --git a/trigger b/trigger index 11d1b63..586f9cc 100755 --- a/trigger +++ b/trigger @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail : "${PAT_GITHUB_DISPATCH:=}" diff --git a/trigger-on-new-data b/trigger-on-new-data index ef71d88..470d2f4 100755 --- a/trigger-on-new-data +++ b/trigger-on-new-data @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail : "${PAT_GITHUB_DISPATCH:?The PAT_GITHUB_DISPATCH environment variable is required.}" diff --git a/upload-to-s3 b/upload-to-s3 index 31cd49b..36d171c 100755 --- a/upload-to-s3 +++ b/upload-to-s3 @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail bin="$(dirname "$0")" From f9ec8311ed45ce79eebc75a858c59705d58e1ca0 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 31 Aug 2023 15:19:51 -0700 Subject: [PATCH 2/5] README: Add software requirements section --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a5dd3d1..118fd97 100644 --- a/README.md +++ b/README.md @@ -97,3 +97,7 @@ Potential augur curate scripts - [transform-authors](transform-authors) - Abbreviates full author lists to ' et al.' - [transform-field-names](transform-field-names) - Rename fields of NDJSON records - [transform-genbank-location](transform-genbank-location) - Parses `location` field with the expected pattern `"[:][, ]"` based on [GenBank's country field](https://www.ncbi.nlm.nih.gov/genbank/collab/country/) + +## Software requirements + +Some scripts may require Bash ≥4. If you are running these scripts on macOS, the builtin Bash (`/bin/bash`) does not meet this requirement. You can install [Homebrew's Bash](https://formulae.brew.sh/formula/bash) which is more up to date. From ae064612880d527ef0545e783ed5fffdaa3fdbe7 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Wed, 30 Aug 2023 15:06:01 -0700 Subject: [PATCH 3/5] Set up Cram tests --- .cramrc | 3 +++ .github/workflows/ci.yaml | 8 ++++++++ README.md | 9 +++++++++ 3 files changed, 20 insertions(+) create mode 100644 .cramrc diff --git a/.cramrc b/.cramrc new file mode 100644 index 0000000..153d20f --- /dev/null +++ b/.cramrc @@ -0,0 +1,3 @@ +[cram] +shell = /bin/bash +indent = 2 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dcb3b89..bbf40f7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,3 +11,11 @@ jobs: steps: - uses: actions/checkout@v3 - uses: nextstrain/.github/actions/shellcheck@master + + cram: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - run: pip install cram + - run: cram tests/ diff --git a/README.md b/README.md index 118fd97..533b39a 100644 --- a/README.md +++ b/README.md @@ -101,3 +101,12 @@ Potential augur curate scripts ## Software requirements Some scripts may require Bash ≥4. If you are running these scripts on macOS, the builtin Bash (`/bin/bash`) does not meet this requirement. You can install [Homebrew's Bash](https://formulae.brew.sh/formula/bash) which is more up to date. + +## Testing + +Most scripts are untested within this repo, relying on "testing in production". That is the only practical testing option for some scripts such as the ones interacting with S3 and Slack. + +For more locally testable scripts, Cram-style functional tests live in `tests` and are run as part of CI. To run these locally, + +1. Download Cram: `pip install cram` +2. Run the tests: `cram tests/` From 86be613009e148227273fabe2e9a2386de37ed35 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Wed, 30 Aug 2023 15:09:22 -0700 Subject: [PATCH 4/5] fetch-from-ncbi-virus: Add tests to show current behavior --- .../fetch-from-ncbi-virus/filter-and-fields.t | 20 +++++++++++++++++++ .../fetch-from-ncbi-virus/invalid-taxon-id.t | 4 ++++ 2 files changed, 24 insertions(+) create mode 100644 tests/fetch-from-ncbi-virus/filter-and-fields.t create mode 100644 tests/fetch-from-ncbi-virus/invalid-taxon-id.t diff --git a/tests/fetch-from-ncbi-virus/filter-and-fields.t b/tests/fetch-from-ncbi-virus/filter-and-fields.t new file mode 100644 index 0000000..30e8695 --- /dev/null +++ b/tests/fetch-from-ncbi-virus/filter-and-fields.t @@ -0,0 +1,20 @@ +Get the virus lineage IDs for 4 early Dengue sequences, testing the options --filter and --field. + + $ $TESTDIR/../../fetch-from-ncbi-virus \ + > --filter='CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])' \ + > --field='viruslineage_ids:VirusLineageId_ss' \ + > 12637 nextstrain/ingest + {"genbank_accession":"X05375","genbank_accession_rev":"X05375.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for envelope protein E N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GTAACTTATGGGACGTGTACCACCACAGGAGAACACAGAAGAGAAAAAAGATCAGTGGCACTCGTTCCACATGTGGGAATGGGACTGGAGACACGAACTGAAACATGGATGTCATCAGAAGGGGCCTGGAAACATGCCCAGAGAATTGAAACTTGGATCTTGAGACATCCAGGCTTTACCATAATGGCAGCAATCCTGGCATACACCATAGGAACGACACATTTCCAAAGAGCCCTGATTTTCATCTTACTGACAGCTGTCGCTCCTTCAATGACAATGCGTTGCATAGGAATATCAAATAGAGACTTTGTAGAAGGGGTTTCAGGAGGAAGCTGGGTTGACATAGTCTTAGAACATGGA","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"} + {"genbank_accession":"X05376","genbank_accession_rev":"X05376.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS1 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"ACAACAATGAGGGGAGCGAAGAGAATGGCCATTTTAGGTGACACAGCTTGGGATTTTGGATCCCTGGGAGGAGTGTTTACATCTATAGGAAAGGCTCTCCACCAAGTTTTCGGAGCAATCTATGGGGCTGCCTTCAGTGGGGTCTCATGGACTATGAAAATCCTCATAGGAGTCATTATCACATGGATAGGAATGAATTCACGCAGCACCTCACTTTCTGTGTCACTAGTATTGGTGGGAGTCGTGACGCTGTATTTGGGAGTTATGGTGCAGGCCGATAGTGGTTGCGTTGTGAGCTGGAAAAACAAAGAACTGAAGTGTGGCAGTGGGATTTTCATCACAGACAACGTGCACACATGG","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"} + {"genbank_accession":"X05377","genbank_accession_rev":"X05377.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS3 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"CTCACTGTGTGCTACGTGCTCACTGGACGATCGGCCGATTTGGAACTGGAGAGAGCCGCCGATGTCAAATGGGAAGATCAGGCAGAGATATCAGGAAGCAGTCCAATCCTGTCAATAACAATATCAGAAGATGGTAGCATGTCGATAAAAAACGAAGAGGAAGAACAAACACTGACCATACTCATTAGAACAGGATTGCTGGTGATCTCAGGACTTTTTCCTGTATCAATACCAATCACGGCAGCAGCATGGTACCTGTGGGAAGTGAAGAAACAACGGGCTGGAGTATTGTGGGATGTCCCTTCACCCCCACCCGTGGGAAAGGCTGAACTGGAAGATGGAGCCTATAGAATCAAGCAA","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"} + {"genbank_accession":"X05378","genbank_accession_rev":"X05378.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS5 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GATCCAATACCCTATGATCCAAAGTTTGAAAAGCAGTTGGGACAAGTAATGCTCCTAGTCCTCTGCGGGACTCAAGTGTTGATGATGAGGACTACATGGGCTCTGTGTGAGGCTTTAACCTTAGCGACCGGGCCTATCTCCACATTGTGGGAAGGAAATCCAGGGAGGTTTTGGAACACTACCATTGCAGTGTCAATGGCTAACATTTTTAGAGGGAGTTACTTGGCCGGAGCTGGACTTCTCTTTTCCATCATGAAGAACACAACCAACACGAGAAGGGGAACTGGCAACATAGGAGAGACGCTTGGAGAGAAATGGAAAAGCCGATTGAACGCATTGGGGAAAAGTGAATTCCAGATC","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"} + +Do the same but without --field. + + $ $TESTDIR/../../fetch-from-ncbi-virus \ + > --filter='CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])' \ + > 12637 nextstrain/ingest + {"genbank_accession":"X05375","genbank_accession_rev":"X05375.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for envelope protein E N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GTAACTTATGGGACGTGTACCACCACAGGAGAACACAGAAGAGAAAAAAGATCAGTGGCACTCGTTCCACATGTGGGAATGGGACTGGAGACACGAACTGAAACATGGATGTCATCAGAAGGGGCCTGGAAACATGCCCAGAGAATTGAAACTTGGATCTTGAGACATCCAGGCTTTACCATAATGGCAGCAATCCTGGCATACACCATAGGAACGACACATTTCCAAAGAGCCCTGATTTTCATCTTACTGACAGCTGTCGCTCCTTCAATGACAATGCGTTGCATAGGAATATCAAATAGAGACTTTGTAGAAGGGGTTTCAGGAGGAAGCTGGGTTGACATAGTCTTAGAACATGGA"} + {"genbank_accession":"X05376","genbank_accession_rev":"X05376.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS1 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"ACAACAATGAGGGGAGCGAAGAGAATGGCCATTTTAGGTGACACAGCTTGGGATTTTGGATCCCTGGGAGGAGTGTTTACATCTATAGGAAAGGCTCTCCACCAAGTTTTCGGAGCAATCTATGGGGCTGCCTTCAGTGGGGTCTCATGGACTATGAAAATCCTCATAGGAGTCATTATCACATGGATAGGAATGAATTCACGCAGCACCTCACTTTCTGTGTCACTAGTATTGGTGGGAGTCGTGACGCTGTATTTGGGAGTTATGGTGCAGGCCGATAGTGGTTGCGTTGTGAGCTGGAAAAACAAAGAACTGAAGTGTGGCAGTGGGATTTTCATCACAGACAACGTGCACACATGG"} + {"genbank_accession":"X05377","genbank_accession_rev":"X05377.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS3 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"CTCACTGTGTGCTACGTGCTCACTGGACGATCGGCCGATTTGGAACTGGAGAGAGCCGCCGATGTCAAATGGGAAGATCAGGCAGAGATATCAGGAAGCAGTCCAATCCTGTCAATAACAATATCAGAAGATGGTAGCATGTCGATAAAAAACGAAGAGGAAGAACAAACACTGACCATACTCATTAGAACAGGATTGCTGGTGATCTCAGGACTTTTTCCTGTATCAATACCAATCACGGCAGCAGCATGGTACCTGTGGGAAGTGAAGAAACAACGGGCTGGAGTATTGTGGGATGTCCCTTCACCCCCACCCGTGGGAAAGGCTGAACTGGAAGATGGAGCCTATAGAATCAAGCAA"} + {"genbank_accession":"X05378","genbank_accession_rev":"X05378.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS5 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GATCCAATACCCTATGATCCAAAGTTTGAAAAGCAGTTGGGACAAGTAATGCTCCTAGTCCTCTGCGGGACTCAAGTGTTGATGATGAGGACTACATGGGCTCTGTGTGAGGCTTTAACCTTAGCGACCGGGCCTATCTCCACATTGTGGGAAGGAAATCCAGGGAGGTTTTGGAACACTACCATTGCAGTGTCAATGGCTAACATTTTTAGAGGGAGTTACTTGGCCGGAGCTGGACTTCTCTTTTCCATCATGAAGAACACAACCAACACGAGAAGGGGAACTGGCAACATAGGAGAGACGCTTGGAGAGAAATGGAAAAGCCGATTGAACGCATTGGGGAAAAGTGAATTCCAGATC"} diff --git a/tests/fetch-from-ncbi-virus/invalid-taxon-id.t b/tests/fetch-from-ncbi-virus/invalid-taxon-id.t new file mode 100644 index 0000000..7a0d522 --- /dev/null +++ b/tests/fetch-from-ncbi-virus/invalid-taxon-id.t @@ -0,0 +1,4 @@ +Fetch from an invalid Taxon ID without any additional options. +This should not error nor return any output. + + $ $TESTDIR/../../fetch-from-ncbi-virus INVALID_TAXID nextstrain/ingest From 966ebcf0e9afc88674999bb5bb075c9ea6176ff3 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Wed, 30 Aug 2023 15:10:00 -0700 Subject: [PATCH 5/5] fetch-from-ncbi-virus: Pass options directly to ncbi-virus-url MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the bash script by directly passing these options to the script that uses them. This requires changing the order so that required arguments are specified before options. This also removes reliance on the Bash ≥4 feature that allows unset arrays to be accessed by [@] with -u. --- fetch-from-ncbi-virus | 30 +++---------------- .../fetch-from-ncbi-virus/filter-and-fields.t | 12 ++++---- 2 files changed, 9 insertions(+), 33 deletions(-) diff --git a/fetch-from-ncbi-virus b/fetch-from-ncbi-virus index 06f7d69..39733e6 100755 --- a/fetch-from-ncbi-virus +++ b/fetch-from-ncbi-virus @@ -1,16 +1,10 @@ #!/usr/bin/env bash -# usage: fetch-from-ncbi-virus [options] +# usage: fetch-from-ncbi-virus [options] # # Fetch metadata and nucleotide sequences from [NCBI Virus](https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/) # and output NDJSON records to stdout. # -# options: -# -# --filter= Filter criteria to add as `fq` param values for the NCBI Virus URL -# May be specified multiple times. -# -# --field=: Metadata fields to add as `fl` param values for the NCBI Virus URL -# May be specified multiple times. +# [options] are passed directly to ncbi-virus-url. See that script for usage details. # # Originally copied from "bin/fetch-from-genbank" in nextstrain/ncov-ingest: # https://github.com/nextstrain/ncov-ingest/blob/2a5f255329ee5bdf0cabc8b8827a700c92becbe4/bin/fetch-from-genbank @@ -21,27 +15,11 @@ bin="$(dirname "$0")" main() { - declare -a filters - declare -a fields - - for arg; do - case "$arg" in - --filter=*) - filters+=("${arg#*=}") - shift;; - --field=*) - fields+=("${arg#*=}") - shift;; - *) - break;; - esac - done - local ncbi_taxon_id="${1:?NCBI taxon id is required.}" local github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}" - local ncbi_virus_url - ncbi_virus_url="$("$bin"/ncbi-virus-url --ncbi-taxon-id "$ncbi_taxon_id" --filters "${filters[@]}" --fields "${fields[@]}")" + # "${@:3}" represents all other options, if any. + ncbi_virus_url="$("$bin"/ncbi-virus-url --ncbi-taxon-id "$ncbi_taxon_id" "${@:3}")" fetch "$ncbi_virus_url" "$github_repo" | "$bin"/csv-to-ndjson } diff --git a/tests/fetch-from-ncbi-virus/filter-and-fields.t b/tests/fetch-from-ncbi-virus/filter-and-fields.t index 30e8695..2fd7020 100644 --- a/tests/fetch-from-ncbi-virus/filter-and-fields.t +++ b/tests/fetch-from-ncbi-virus/filter-and-fields.t @@ -1,9 +1,8 @@ Get the virus lineage IDs for 4 early Dengue sequences, testing the options --filter and --field. - $ $TESTDIR/../../fetch-from-ncbi-virus \ - > --filter='CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])' \ - > --field='viruslineage_ids:VirusLineageId_ss' \ - > 12637 nextstrain/ingest + $ $TESTDIR/../../fetch-from-ncbi-virus 12637 nextstrain/ingest \ + > --filters 'CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])' \ + > --fields 'viruslineage_ids:VirusLineageId_ss' {"genbank_accession":"X05375","genbank_accession_rev":"X05375.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for envelope protein E N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GTAACTTATGGGACGTGTACCACCACAGGAGAACACAGAAGAGAAAAAAGATCAGTGGCACTCGTTCCACATGTGGGAATGGGACTGGAGACACGAACTGAAACATGGATGTCATCAGAAGGGGCCTGGAAACATGCCCAGAGAATTGAAACTTGGATCTTGAGACATCCAGGCTTTACCATAATGGCAGCAATCCTGGCATACACCATAGGAACGACACATTTCCAAAGAGCCCTGATTTTCATCTTACTGACAGCTGTCGCTCCTTCAATGACAATGCGTTGCATAGGAATATCAAATAGAGACTTTGTAGAAGGGGTTTCAGGAGGAAGCTGGGTTGACATAGTCTTAGAACATGGA","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"} {"genbank_accession":"X05376","genbank_accession_rev":"X05376.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS1 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"ACAACAATGAGGGGAGCGAAGAGAATGGCCATTTTAGGTGACACAGCTTGGGATTTTGGATCCCTGGGAGGAGTGTTTACATCTATAGGAAAGGCTCTCCACCAAGTTTTCGGAGCAATCTATGGGGCTGCCTTCAGTGGGGTCTCATGGACTATGAAAATCCTCATAGGAGTCATTATCACATGGATAGGAATGAATTCACGCAGCACCTCACTTTCTGTGTCACTAGTATTGGTGGGAGTCGTGACGCTGTATTTGGGAGTTATGGTGCAGGCCGATAGTGGTTGCGTTGTGAGCTGGAAAAACAAAGAACTGAAGTGTGGCAGTGGGATTTTCATCACAGACAACGTGCACACATGG","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"} {"genbank_accession":"X05377","genbank_accession_rev":"X05377.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS3 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"CTCACTGTGTGCTACGTGCTCACTGGACGATCGGCCGATTTGGAACTGGAGAGAGCCGCCGATGTCAAATGGGAAGATCAGGCAGAGATATCAGGAAGCAGTCCAATCCTGTCAATAACAATATCAGAAGATGGTAGCATGTCGATAAAAAACGAAGAGGAAGAACAAACACTGACCATACTCATTAGAACAGGATTGCTGGTGATCTCAGGACTTTTTCCTGTATCAATACCAATCACGGCAGCAGCATGGTACCTGTGGGAAGTGAAGAAACAACGGGCTGGAGTATTGTGGGATGTCCCTTCACCCCCACCCGTGGGAAAGGCTGAACTGGAAGATGGAGCCTATAGAATCAAGCAA","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"} @@ -11,9 +10,8 @@ Get the virus lineage IDs for 4 early Dengue sequences, testing the options --fi Do the same but without --field. - $ $TESTDIR/../../fetch-from-ncbi-virus \ - > --filter='CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])' \ - > 12637 nextstrain/ingest + $ $TESTDIR/../../fetch-from-ncbi-virus 12637 nextstrain/ingest \ + > --filters 'CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])' {"genbank_accession":"X05375","genbank_accession_rev":"X05375.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for envelope protein E N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GTAACTTATGGGACGTGTACCACCACAGGAGAACACAGAAGAGAAAAAAGATCAGTGGCACTCGTTCCACATGTGGGAATGGGACTGGAGACACGAACTGAAACATGGATGTCATCAGAAGGGGCCTGGAAACATGCCCAGAGAATTGAAACTTGGATCTTGAGACATCCAGGCTTTACCATAATGGCAGCAATCCTGGCATACACCATAGGAACGACACATTTCCAAAGAGCCCTGATTTTCATCTTACTGACAGCTGTCGCTCCTTCAATGACAATGCGTTGCATAGGAATATCAAATAGAGACTTTGTAGAAGGGGTTTCAGGAGGAAGCTGGGTTGACATAGTCTTAGAACATGGA"} {"genbank_accession":"X05376","genbank_accession_rev":"X05376.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS1 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"ACAACAATGAGGGGAGCGAAGAGAATGGCCATTTTAGGTGACACAGCTTGGGATTTTGGATCCCTGGGAGGAGTGTTTACATCTATAGGAAAGGCTCTCCACCAAGTTTTCGGAGCAATCTATGGGGCTGCCTTCAGTGGGGTCTCATGGACTATGAAAATCCTCATAGGAGTCATTATCACATGGATAGGAATGAATTCACGCAGCACCTCACTTTCTGTGTCACTAGTATTGGTGGGAGTCGTGACGCTGTATTTGGGAGTTATGGTGCAGGCCGATAGTGGTTGCGTTGTGAGCTGGAAAAACAAAGAACTGAAGTGTGGCAGTGGGATTTTCATCACAGACAACGTGCACACATGG"} {"genbank_accession":"X05377","genbank_accession_rev":"X05377.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS3 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"CTCACTGTGTGCTACGTGCTCACTGGACGATCGGCCGATTTGGAACTGGAGAGAGCCGCCGATGTCAAATGGGAAGATCAGGCAGAGATATCAGGAAGCAGTCCAATCCTGTCAATAACAATATCAGAAGATGGTAGCATGTCGATAAAAAACGAAGAGGAAGAACAAACACTGACCATACTCATTAGAACAGGATTGCTGGTGATCTCAGGACTTTTTCCTGTATCAATACCAATCACGGCAGCAGCATGGTACCTGTGGGAAGTGAAGAAACAACGGGCTGGAGTATTGTGGGATGTCCCTTCACCCCCACCCGTGGGAAAGGCTGAACTGGAAGATGGAGCCTATAGAATCAAGCAA"}