diff --git a/CHANGES.md b/CHANGES.md index 215b236a..5424a2f1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,15 @@ development source code and as such may not be routinely kept up to date. # __NEXT__ +## Improvements + +* Snakemake's storage support downloaded files (stored in `.snakemake/storage/`) + are now downloaded from AWS Batch builds by default. + + The runtime image used must be at least `nextstrain/base:build-20250721T201347Z` + for these Snakemake storage files to be available for download from the AWS + Batch job. + ([#460](https://github.com/nextstrain/cli/pull/460)) # 10.2.1.post1 (1 July 2025) diff --git a/doc/changes.md b/doc/changes.md index 9ba2d8ad..74d99443 100644 --- a/doc/changes.md +++ b/doc/changes.md @@ -16,6 +16,16 @@ development source code and as such may not be routinely kept up to date. (v-next)= ## __NEXT__ +(v-next-improvements)= +### Improvements + +* Snakemake's storage support downloaded files (stored in `.snakemake/storage/`) + are now downloaded from AWS Batch builds by default. + + The runtime image used must be at least `nextstrain/base:build-20250721T201347Z` + for these Snakemake storage files to be available for download from the AWS + Batch job. + ([#460](https://github.com/nextstrain/cli/pull/460)) (v10-2-1-post1)= ## 10.2.1.post1 (1 July 2025) diff --git a/nextstrain/cli/command/build.py b/nextstrain/cli/command/build.py index 5e15b178..0aa35d7a 100644 --- a/nextstrain/cli/command/build.py +++ b/nextstrain/cli/command/build.py @@ -297,6 +297,14 @@ def run(opts): based on its --memory option. This may or may not be what you expect. """ % (snakemake_opts["--resources"][0],))) + if opts.__runner__ is runner.aws_batch and snakemake_opts["--local-storage-prefix"]: + warn(dedent(""" + Warning: The explicit %s option passed to Snakemake overrides the + default prefix (.snakemake/storage/) expected for the AWS Batch + runtime. This may prevent you from downloading the storage files + that were downloaded during the remote build. + """ % (snakemake_opts["--local-storage-prefix"][0],))) + return runner.run(opts, working_volume = working_volume, cpus = opts.cpus, memory = opts.memory) @@ -441,19 +449,22 @@ def parse_snakemake_args(args): their presence or absence in our invocation. >>> sorted(parse_snakemake_args(["--cores"]).items()) - [('--cores', ['--cores']), ('--resources', [])] + [('--cores', ['--cores']), ('--local-storage-prefix', []), ('--resources', [])] + + >>> sorted(parse_snakemake_args(["--local-storage-prefix=.snakemake/foo"]).items()) + [('--cores', []), ('--local-storage-prefix', ['--local-storage-prefix']), ('--resources', [])] >>> sorted(parse_snakemake_args(["--resources=mem_mb=100"]).items()) - [('--cores', []), ('--resources', ['--resources'])] + [('--cores', []), ('--local-storage-prefix', []), ('--resources', ['--resources'])] >>> sorted(parse_snakemake_args(["-j", "8", "--res", "mem_mb=100"]).items()) - [('--cores', ['-j']), ('--resources', ['--res'])] + [('--cores', ['-j']), ('--local-storage-prefix', []), ('--resources', ['--res'])] >>> sorted(parse_snakemake_args(["-j8"]).items()) - [('--cores', ['-j']), ('--resources', [])] + [('--cores', ['-j']), ('--local-storage-prefix', []), ('--resources', [])] >>> sorted(parse_snakemake_args([]).items()) - [('--cores', []), ('--resources', [])] + [('--cores', []), ('--local-storage-prefix', []), ('--resources', [])] """ opts = { "-j" if re.search(r"^-j\d+$", arg) else arg @@ -480,7 +491,25 @@ def parse_snakemake_args(args): "--res", # documented } + storage_prefix = { + "--local-storage-prefix", # documented + "--local-storage-prefi", + "--local-storage-pref", + "--local-storage-pre", + "--local-storage-pr", + "--local-storage-p", + "--local-storage-", + "--local-storage", + "--local-storag", + "--local-stora", + "--local-stor", + "--local-sto", + "--local-st", + "--local-s", + } + return { "--cores": list(cores & opts), "--resources": list(resources & opts), + "--local-storage-prefix": list(storage_prefix & opts), } diff --git a/nextstrain/cli/runner/aws_batch/s3.py b/nextstrain/cli/runner/aws_batch/s3.py index 1a084bcd..b06303e8 100644 --- a/nextstrain/cli/runner/aws_batch/s3.py +++ b/nextstrain/cli/runner/aws_batch/s3.py @@ -141,8 +141,21 @@ def download_workdir(remote_workdir: S3Object, workdir: Path, patterns: List[str ".snakemake/log/", # …and the input/output metadata Snakemake tracks (akin to mtimes, - # which we also preserve). + # which we also preserve)… ".snakemake/metadata/", + + # …and the remote files downloaded via Snakemake's storage support. + # Note this is the default path used by Snakemake, but the storage path + # is configurable via `--local-storage-prefix`.¹ So if someone configures + # the storage path to a custom path within `.snakemake`, e.g. `.snakemake/foo`, + # then it would not be available in their downloaded workdir. + # I'm not even sure it's possible to configure the path in entrypoint-aws-batch + # for the docker-base image, so I've added a warning against using the + # Snakemake option when using aws-batch runtimes. + # -Jover, 18 July 2025 + # + # ¹ + ".snakemake/storage/", ]) if patterns: