From 9f7783d02d8650db113521ccf87e8246a335396c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 12:53:09 +0100 Subject: [PATCH 01/60] ignore s3 exploratory tests in normal runs --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 8bbd9484..728ce2d3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,6 +2,7 @@ addopts = # --doctest-modules --ignore=old_code/ + --ignore=tests/s3_exploratory --cov=activestorage --cov-report=xml:test-reports/coverage.xml --cov-report=html:test-reports/coverage_html From 8069cef11514cfccb50b895b002c19cbdc51e5a3 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 12:53:35 +0100 Subject: [PATCH 02/60] add s3 test run GA workflow --- .github/workflows/test_s3_minio.yml | 68 +++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/test_s3_minio.yml diff --git a/.github/workflows/test_s3_minio.yml b/.github/workflows/test_s3_minio.yml new file mode 100644 index 00000000..33002f5b --- /dev/null +++ b/.github/workflows/test_s3_minio.yml @@ -0,0 +1,68 @@ +# adapted GA workflow from https://github.com/stackhpc/s3-active-storage-rs +--- +name: S3/Minio Exploratory Test + +on: + push: + branches: + - main + - add_s3_tests + schedule: + - cron: '0 0 * * *' # nightly + +jobs: + linux-test: + runs-on: "ubuntu-latest" + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + fail-fast: false + name: Linux Python ${{ matrix.python-version }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + miniforge-version: "latest" + miniforge-variant: Mambaforge + use-mamba: true + - shell: bash -l {0} + run: conda --version + - shell: bash -l {0} + run: python -V + - name: Export proxy + run: | + echo 'PROXY_URL = "http://localhost:8080"' >> config.py + - name: Start minio object storage + run: tests/s3_exploratory/minio-start + - name: Wait for minio object storage to start + run: | + until curl -if http://localhost:9001; do + sleep 1; + done + #- name: Run container + # run: docker run -it --detach --rm --net=host --name s3-active-storage ghcr.io/stackhpc/s3-active-storage-rs:latest + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: activestorage-minio + environment-file: environment.yml + python-version: ${{ matrix.python-version }} + miniforge-version: "latest" + miniforge-variant: Mambaforge + use-mamba: true + - name: Install PyActiveStorage and run tests + shell: bash -l {0} + run: | + conda --version + python -V + which python + pip install -e . + pytest tests/s3_exploratory/test_s3_reduction.py + - name: Stop minio object storage + run: tests/s3_exploratory/minio-stop + if: always() From d854997b92f8994022de493d49aae4356213f85b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 12:54:20 +0100 Subject: [PATCH 03/60] change GA test feature branch --- .github/workflows/test_s3_minio.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_s3_minio.yml b/.github/workflows/test_s3_minio.yml index 33002f5b..b59ab5e5 100644 --- a/.github/workflows/test_s3_minio.yml +++ b/.github/workflows/test_s3_minio.yml @@ -6,7 +6,7 @@ on: push: branches: - main - - add_s3_tests + - real_world_s3_tests schedule: - cron: '0 0 * * *' # nightly From ea2cf98df272ae3e66cc2a9e26b6997eb1877e19 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 12:54:41 +0100 Subject: [PATCH 04/60] add s3 exploratory test repo --- tests/s3_exploratory/config_minio.py | 19 ++++++++ .../s3_exploratory/minio_scripts/minio-start | 4 ++ tests/s3_exploratory/minio_scripts/minio-stop | 3 ++ tests/s3_exploratory/test_s3_reduction.py | 43 +++++++++++++++++++ 4 files changed, 69 insertions(+) create mode 100644 tests/s3_exploratory/config_minio.py create mode 100755 tests/s3_exploratory/minio_scripts/minio-start create mode 100755 tests/s3_exploratory/minio_scripts/minio-stop create mode 100644 tests/s3_exploratory/test_s3_reduction.py diff --git a/tests/s3_exploratory/config_minio.py b/tests/s3_exploratory/config_minio.py new file mode 100644 index 00000000..9a004aba --- /dev/null +++ b/tests/s3_exploratory/config_minio.py @@ -0,0 +1,19 @@ +# This file contains configuration for PyActiveStorage. + +# Force True for S3 exploratory tests +USE_S3 = True + +# URL of S3 Active Storage server. +S3_ACTIVE_STORAGE_URL = "http://localhost:8080" + +# URL of S3 object store. +S3_URL = "http://localhost:9000" + +# S3 access key / username. +S3_ACCESS_KEY = "minioadmin" + +# S3 secret key / password. +S3_SECRET_KEY = "minioadmin" + +# S3 bucket. +S3_BUCKET = "pyactivestorage" diff --git a/tests/s3_exploratory/minio_scripts/minio-start b/tests/s3_exploratory/minio_scripts/minio-start new file mode 100755 index 00000000..16aecaa4 --- /dev/null +++ b/tests/s3_exploratory/minio_scripts/minio-start @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +# Use anon storage volume so that test data is removed when container is stopped +exec docker run --detach --rm -p 9000:9000 -p 9001:9001 -v :/data --name minio minio/minio server data --console-address ":9001" diff --git a/tests/s3_exploratory/minio_scripts/minio-stop b/tests/s3_exploratory/minio_scripts/minio-stop new file mode 100755 index 00000000..663b8b4e --- /dev/null +++ b/tests/s3_exploratory/minio_scripts/minio-stop @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +exec docker stop minio diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py new file mode 100644 index 00000000..5d8a6dcc --- /dev/null +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -0,0 +1,43 @@ +import os +import numpy as np +import pytest +import s3fs + +import activestorage.storage as st +from activestorage.s3 import reduce_chunk as s3_reduce_chunk + +from config_minio import * + + +def upload_to_s3(server, username, password, bucket, object, rfile): + """Upload a file to an S3 object store.""" + s3_fs = s3fs.S3FileSystem(key=username, secret=password, client_kwargs={'endpoint_url': server}) + # Make sure s3 bucket exists + try: + s3_fs.mkdir(bucket) + except FileExistsError: + pass + + s3_fs.put_file(rfile, os.path.join(bucket, object)) + + +def test_s3_reduce_chunk(): + """Unit test for s3_reduce_chunk.""" + rfile = "tests/test_data/cesm2_native.nc" + offset = 2 + size = 128 + object = os.path.basename(rfile) + + # create bucket and upload to Minio's S3 bucket + upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, + S3_BUCKET, object, rfile) + + tmp, count = s3_reduce_chunk(S3_ACTIVE_STORAGE_URL, S3_ACCESS_KEY, + S3_SECRET_KEY, S3_URL, S3_BUCKET, + object, offset, size, + None, None, [], + np.dtype("int32"), (32, ), + "C", [slice(0, 2, 1), ], + "min") + assert tmp == 134351386 + assert count == None From e84f21b05979abf32e05282bad2847132db10766 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 12:59:16 +0100 Subject: [PATCH 05/60] correct path to minio scripts --- .github/workflows/test_s3_minio.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_s3_minio.yml b/.github/workflows/test_s3_minio.yml index b59ab5e5..d4e62ef6 100644 --- a/.github/workflows/test_s3_minio.yml +++ b/.github/workflows/test_s3_minio.yml @@ -36,7 +36,7 @@ jobs: run: | echo 'PROXY_URL = "http://localhost:8080"' >> config.py - name: Start minio object storage - run: tests/s3_exploratory/minio-start + run: tests/s3_exploratory/minio_scripts/minio-start - name: Wait for minio object storage to start run: | until curl -if http://localhost:9001; do @@ -64,5 +64,5 @@ jobs: pip install -e . pytest tests/s3_exploratory/test_s3_reduction.py - name: Stop minio object storage - run: tests/s3_exploratory/minio-stop + run: tests/s3_exploratory/minio_scripts/minio-stop if: always() From fc5766a630857ec65d576c0766d4796b44470445 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 13:04:50 +0100 Subject: [PATCH 06/60] run s3 AS container --- .github/workflows/test_s3_minio.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_s3_minio.yml b/.github/workflows/test_s3_minio.yml index d4e62ef6..0e805b04 100644 --- a/.github/workflows/test_s3_minio.yml +++ b/.github/workflows/test_s3_minio.yml @@ -42,8 +42,8 @@ jobs: until curl -if http://localhost:9001; do sleep 1; done - #- name: Run container - # run: docker run -it --detach --rm --net=host --name s3-active-storage ghcr.io/stackhpc/s3-active-storage-rs:latest + - name: Run S3ActiveStorage container + run: docker run -it --detach --rm --net=host --name s3-active-storage ghcr.io/stackhpc/s3-active-storage-rs:latest - uses: actions/checkout@v3 with: fetch-depth: 0 From 37ca643f08f646a0c835ecabed38ee18c66e0d19 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 13:31:00 +0100 Subject: [PATCH 07/60] run Active component too --- tests/s3_exploratory/test_s3_reduction.py | 57 +++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index 5d8a6dcc..bd868806 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -2,13 +2,35 @@ import numpy as np import pytest import s3fs +import tempfile +from activestorage.active import Active +from activestorage.dummy_data import make_vanilla_ncdata import activestorage.storage as st from activestorage.s3 import reduce_chunk as s3_reduce_chunk +from numpy.testing import assert_array_equal from config_minio import * +def make_tempfile(): + """Make dummy data.""" + temp_folder = tempfile.mkdtemp() + s3_testfile = os.path.join(temp_folder, + 's3_test_bizarre.nc') # Bryan likes this name + print(f"S3 Test file is {s3_testfile}") + if not os.path.exists(s3_testfile): + make_vanilla_ncdata(filename=s3_testfile) + + local_testfile = os.path.join(temp_folder, + 'local_test_bizarre.nc') # Bryan again + print(f"Local Test file is {local_testfile}") + if not os.path.exists(local_testfile): + make_vanilla_ncdata(filename=local_testfile) + + return s3_testfile, local_testfile + + def upload_to_s3(server, username, password, bucket, object, rfile): """Upload a file to an S3 object store.""" s3_fs = s3fs.S3FileSystem(key=username, secret=password, client_kwargs={'endpoint_url': server}) @@ -21,6 +43,36 @@ def upload_to_s3(server, username, password, bucket, object, rfile): s3_fs.put_file(rfile, os.path.join(bucket, object)) +def test_Active(): + """ + Shows what we expect an active example test to achieve and provides "the right answer" + """ + # make dummy data + s3_testfile, local_testfile = make_tempfile() + + # put s3 dummy data onto S3. then rm from local + object = os.path.basename(s3_testfile) + upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, + S3_BUCKET, object, s3_testfile) + os.remove(s3_testfile) + + # run Active on s3 file + # Active should be run with USE_S3 set to true here + active = Active(s3_testfile, "data") + active._version = 0 + d = active[0:2, 4:6, 7:9] + mean_result = np.mean(d) + + # run Active on local file + active = Active(local_testfile, "data") + active._version = 2 + active.method = "mean" + active.components = True + result2 = active[0:2, 4:6, 7:9] + print(result2) + assert_array_equal(mean_result, result2["sum"]/result2["n"]) + + def test_s3_reduce_chunk(): """Unit test for s3_reduce_chunk.""" rfile = "tests/test_data/cesm2_native.nc" @@ -32,6 +84,11 @@ def test_s3_reduce_chunk(): upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, S3_BUCKET, object, rfile) + # remove file during test session to be sure + # workflow uses uploaded file to S3 bucket + os.remove(rfile) + + # call s3_reduce_chunk tmp, count = s3_reduce_chunk(S3_ACTIVE_STORAGE_URL, S3_ACCESS_KEY, S3_SECRET_KEY, S3_URL, S3_BUCKET, object, offset, size, From cccc8d25f1d82d345b74671fb7bc025bc6e05e39 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 13:35:53 +0100 Subject: [PATCH 08/60] temp deactivate check on physical file in active class --- activestorage/active.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index 102d5f88..d9dce12b 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -48,8 +48,9 @@ def __init__(self, uri, ncvar, missing_value=None, _FillValue=None, valid_min=No self.uri = uri if self.uri is None: raise ValueError(f"Must use a valid file for uri. Got {self.uri}") - if not os.path.isfile(self.uri): - raise ValueError(f"Must use existing file for uri. {self.uri} not found") + # TODO this needs tweaking for S3 storage; S3 file is not an URI + # if not os.path.isfile(self.uri): + # raise ValueError(f"Must use existing file for uri. {self.uri} not found") self.ncvar = ncvar if self.ncvar is None: raise ValueError("Must set a netCDF variable name to slice") From d0b98051b2b8079e692c57b8da31bc576510f99e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 14:14:51 +0100 Subject: [PATCH 09/60] shove in use s3 --- activestorage/active.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index d9dce12b..49f51021 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -37,7 +37,7 @@ def __new__(cls, *args, **kwargs): } return instance - def __init__(self, uri, ncvar, missing_value=None, _FillValue=None, valid_min=None, valid_max=None): + def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue=None, valid_min=None, valid_max=None): """ Instantiate with a NetCDF4 dataset and the variable of interest within that file. (We need the variable, because we need variable specific metadata from within that @@ -48,9 +48,11 @@ def __init__(self, uri, ncvar, missing_value=None, _FillValue=None, valid_min=No self.uri = uri if self.uri is None: raise ValueError(f"Must use a valid file for uri. Got {self.uri}") - # TODO this needs tweaking for S3 storage; S3 file is not an URI - # if not os.path.isfile(self.uri): - # raise ValueError(f"Must use existing file for uri. {self.uri} not found") + self.storage_type = storage_type + if self.storage_type == "s3": + USE_S3 = True + if not os.path.isfile(self.uri) and not self.storage_type: + raise ValueError(f"Must use existing file for uri. {self.uri} not found") self.ncvar = ncvar if self.ncvar is None: raise ValueError("Must set a netCDF variable name to slice") @@ -337,6 +339,10 @@ def _process_chunk(self, fsref, chunk_coords, chunk_selection, out, counts, key = f"{self.ncvar}/{coord}" rfile, offset, size = tuple(fsref[key]) + if self.storage_type == "s3": + USE_S3 = True + else: + USE_S3 = False if USE_S3: object = os.path.basename(rfile) tmp, count = s3_reduce_chunk(S3_ACTIVE_STORAGE_URL, S3_ACCESS_KEY, From 1c10fb86289f501ab2076056a2e785285639698b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 14:15:14 +0100 Subject: [PATCH 10/60] run with s3 file --- tests/s3_exploratory/test_s3_reduction.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index bd868806..9afe7f44 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -57,11 +57,12 @@ def test_Active(): os.remove(s3_testfile) # run Active on s3 file - # Active should be run with USE_S3 set to true here - active = Active(s3_testfile, "data") - active._version = 0 - d = active[0:2, 4:6, 7:9] - mean_result = np.mean(d) + active = Active(s3_testfile, "data", "s3") + active._version = 2 + active.method = "mean" + active.components = True + result1 = active[0:2, 4:6, 7:9] + print(result1) # run Active on local file active = Active(local_testfile, "data") @@ -70,7 +71,8 @@ def test_Active(): active.components = True result2 = active[0:2, 4:6, 7:9] print(result2) - assert_array_equal(mean_result, result2["sum"]/result2["n"]) + + assert_array_equal(result1, result2["sum"]/result2["n"]) def test_s3_reduce_chunk(): From 405b5ebc659311e9a0e341ca84911549444727c1 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 14:57:23 +0100 Subject: [PATCH 11/60] try load netCDF from s3 --- activestorage/active.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 49f51021..a7162ca3 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -68,7 +68,11 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue # If the user actually wrote the data with no fill value, or the # default fill value is in play, then this might go wrong. if (missing_value, _FillValue, valid_min, valid_max) == (None, None, None, None): - ds = Dataset(uri) + if storage_type is None: + ds = Dataset(uri) + elif storage_type == "s3": + fs = fsspec.filesystem("s3", anon=True) + ds = Dataset(fs.open(uri)) try: ds_var = ds[ncvar] except IndexError as exc: From 60e1cb3042087ec92e6de3b016ff4c221dd88ee3 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 14:57:36 +0100 Subject: [PATCH 12/60] pass correct s3 uri --- tests/s3_exploratory/test_s3_reduction.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index 9afe7f44..993314f3 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -42,6 +42,8 @@ def upload_to_s3(server, username, password, bucket, object, rfile): s3_fs.put_file(rfile, os.path.join(bucket, object)) + return os.path.join(bucket, object) + def test_Active(): """ @@ -52,12 +54,13 @@ def test_Active(): # put s3 dummy data onto S3. then rm from local object = os.path.basename(s3_testfile) - upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, - S3_BUCKET, object, s3_testfile) + bucket_file = upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, + S3_BUCKET, object, s3_testfile) os.remove(s3_testfile) + s3_testfile_uri = os.path.join("s3://", bucket_file) # run Active on s3 file - active = Active(s3_testfile, "data", "s3") + active = Active(s3_testfile_uri, "data", "s3") active._version = 2 active.method = "mean" active.components = True From bf280abc75088ca92ad9e0f0db1cda547af9ec5f Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 15:01:16 +0100 Subject: [PATCH 13/60] of course I forgot import --- activestorage/active.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activestorage/active.py b/activestorage/active.py index a7162ca3..c553916c 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -2,6 +2,8 @@ import numpy as np import pathlib +import fsspec + #FIXME: Consider using h5py throughout, for more generality from netCDF4 import Dataset from zarr.indexing import ( From 68a77772453bf541840642fb4387d9902596b823 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 15:07:57 +0100 Subject: [PATCH 14/60] try without s3 prefit for 3 file path --- tests/s3_exploratory/test_s3_reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index 993314f3..b79ede9f 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -57,7 +57,7 @@ def test_Active(): bucket_file = upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, S3_BUCKET, object, s3_testfile) os.remove(s3_testfile) - s3_testfile_uri = os.path.join("s3://", bucket_file) + s3_testfile_uri = bucket_file # run Active on s3 file active = Active(s3_testfile_uri, "data", "s3") From 0fa23f25f94cfd138677e59c864ec72d0196e186 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 15:15:52 +0100 Subject: [PATCH 15/60] possible correct s3 path --- tests/s3_exploratory/test_s3_reduction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index b79ede9f..2421432f 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -57,7 +57,8 @@ def test_Active(): bucket_file = upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, S3_BUCKET, object, s3_testfile) os.remove(s3_testfile) - s3_testfile_uri = bucket_file + s3_testfile_uri = os.path.join(S3_URL, bucket_file) + print("S3 file uri", s3_testfile_uri) # run Active on s3 file active = Active(s3_testfile_uri, "data", "s3") From 66dce9271459f8a3f1ca92d1d996b31a3ef48358 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 15:21:03 +0100 Subject: [PATCH 16/60] possible correct s3 path --- tests/s3_exploratory/test_s3_reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index 2421432f..849db464 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -57,7 +57,7 @@ def test_Active(): bucket_file = upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, S3_BUCKET, object, s3_testfile) os.remove(s3_testfile) - s3_testfile_uri = os.path.join(S3_URL, bucket_file) + s3_testfile_uri = os.path.join("s3://localhost", bucket_file) print("S3 file uri", s3_testfile_uri) # run Active on s3 file From de1939d14d1ff28bd224ff5ed18717e43763fec0 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 15:29:42 +0100 Subject: [PATCH 17/60] try with context manager --- activestorage/active.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index c553916c..b7a243a0 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -74,7 +74,8 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue ds = Dataset(uri) elif storage_type == "s3": fs = fsspec.filesystem("s3", anon=True) - ds = Dataset(fs.open(uri)) + with fs.open(uri) as s3file: + ds = Dataset(s3file) try: ds_var = ds[ncvar] except IndexError as exc: From 6c78fcee9a45202515ba0d617df695d71ee10a61 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 15:58:11 +0100 Subject: [PATCH 18/60] use s3fs directly --- activestorage/active.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index b7a243a0..45a6c998 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -2,7 +2,7 @@ import numpy as np import pathlib -import fsspec +import s3fs #FIXME: Consider using h5py throughout, for more generality from netCDF4 import Dataset @@ -73,8 +73,8 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue if storage_type is None: ds = Dataset(uri) elif storage_type == "s3": - fs = fsspec.filesystem("s3", anon=True) - with fs.open(uri) as s3file: + fs = s3fs.S3FileSystem(key="minioadmin", secret="minioadmin") + with fs.open(uri, 'rb') as s3file: ds = Dataset(s3file) try: ds_var = ds[ncvar] From fee58b35f9819cc42d8210ff1d1cfa4aa5569d6c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 16:07:19 +0100 Subject: [PATCH 19/60] add client server arg --- activestorage/active.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 45a6c998..e2062945 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -73,7 +73,9 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue if storage_type is None: ds = Dataset(uri) elif storage_type == "s3": - fs = s3fs.S3FileSystem(key="minioadmin", secret="minioadmin") + fs = s3fs.S3FileSystem(key="minioadmin", + secret="minioadmin", + client_kwargs={'endpoint_url': "http://localhost:9000"}) with fs.open(uri, 'rb') as s3file: ds = Dataset(s3file) try: From a1d3ac02d2bd0886f785380251190867b2dfe326 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 16:12:14 +0100 Subject: [PATCH 20/60] try without localhost --- tests/s3_exploratory/test_s3_reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index 849db464..6e96e790 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -57,7 +57,7 @@ def test_Active(): bucket_file = upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, S3_BUCKET, object, s3_testfile) os.remove(s3_testfile) - s3_testfile_uri = os.path.join("s3://localhost", bucket_file) + s3_testfile_uri = os.path.join("s3://", bucket_file) print("S3 file uri", s3_testfile_uri) # run Active on s3 file From f076c6c647bc1036832789e7b258b9ac4f1321c6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 16:45:30 +0100 Subject: [PATCH 21/60] add xarray for test --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 20d90712..fa039fb4 100644 --- a/environment.yml +++ b/environment.yml @@ -17,6 +17,7 @@ dependencies: # pin Zarr to avoid using old KVStore interface # see github.com/zarr-developers/zarr-python/issues/1362 - zarr >=2.13.6 # KVStore to FSStore + - xarray # for testing only # Python packages for testing - pytest - pytest-cov >=2.10.1 From 1efe1e77c3525459bdca7c8cb074f33a68b25780 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 16:45:35 +0100 Subject: [PATCH 22/60] add xarray for test --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 44c12b50..4bd6f969 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ 's3fs', # pin Zarr to use new FSStore instead of KVStore 'zarr>=2.13.3', # github.com/zarr-developers/zarr-python/issues/1362 + 'xarray', # for testing only # for testing 'pytest', 'pytest-cov>=2.10.1', From 6981740c882244ef632d8365a253adc434198af5 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 16:45:48 +0100 Subject: [PATCH 23/60] use xarray for test --- activestorage/active.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index e2062945..de79c5ec 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -3,6 +3,7 @@ import pathlib import s3fs +import xarray as xr #FIXME: Consider using h5py throughout, for more generality from netCDF4 import Dataset @@ -73,11 +74,16 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue if storage_type is None: ds = Dataset(uri) elif storage_type == "s3": + # correct settings for Minio; need be imported from config.py + # calling open returns a File-like object S3FileSystem fs = s3fs.S3FileSystem(key="minioadmin", secret="minioadmin", client_kwargs={'endpoint_url': "http://localhost:9000"}) with fs.open(uri, 'rb') as s3file: - ds = Dataset(s3file) + # this will throw a FileNotFoundError: [Errno 2] No such file or directory: '' + # ds = Dataset(s3file) + # try use xarray for now + ds = xr.open_dataset(s3file, engine='h5netcdf') try: ds_var = ds[ncvar] except IndexError as exc: From 482ffb5c2e1030cde686f4f75568c35ba5c5348d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 16:49:17 +0100 Subject: [PATCH 24/60] wrong engine --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index de79c5ec..f142b121 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -83,7 +83,7 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue # this will throw a FileNotFoundError: [Errno 2] No such file or directory: '' # ds = Dataset(s3file) # try use xarray for now - ds = xr.open_dataset(s3file, engine='h5netcdf') + ds = xr.open_dataset(s3file, engine='netcdf4') try: ds_var = ds[ncvar] except IndexError as exc: From f17d08f61ac90bb598b2f1890f938e26ae304d77 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 17:04:02 +0100 Subject: [PATCH 25/60] try mfdataset --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index f142b121..fd361a1b 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -83,7 +83,7 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue # this will throw a FileNotFoundError: [Errno 2] No such file or directory: '' # ds = Dataset(s3file) # try use xarray for now - ds = xr.open_dataset(s3file, engine='netcdf4') + ds = xr.open_mfdataset(s3file, engine='h5netcdf') try: ds_var = ds[ncvar] except IndexError as exc: From ab0674bf239976d219c924396146304cf066cc53 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 17:09:40 +0100 Subject: [PATCH 26/60] forget about xarray --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index fa039fb4..20d90712 100644 --- a/environment.yml +++ b/environment.yml @@ -17,7 +17,6 @@ dependencies: # pin Zarr to avoid using old KVStore interface # see github.com/zarr-developers/zarr-python/issues/1362 - zarr >=2.13.6 # KVStore to FSStore - - xarray # for testing only # Python packages for testing - pytest - pytest-cov >=2.10.1 From 77bfef932a0a94882c14b1e0aa8cf02d2e0a6926 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 17:09:44 +0100 Subject: [PATCH 27/60] forget about xarray --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 4bd6f969..44c12b50 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,6 @@ 's3fs', # pin Zarr to use new FSStore instead of KVStore 'zarr>=2.13.3', # github.com/zarr-developers/zarr-python/issues/1362 - 'xarray', # for testing only # for testing 'pytest', 'pytest-cov>=2.10.1', From 5b4f64e59cefc34590eb17bc9c12205c4fa167f5 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 17:10:04 +0100 Subject: [PATCH 28/60] hat does that s3 file like look like --- activestorage/active.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index fd361a1b..003f4f73 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -3,7 +3,6 @@ import pathlib import s3fs -import xarray as xr #FIXME: Consider using h5py throughout, for more generality from netCDF4 import Dataset @@ -82,8 +81,9 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue with fs.open(uri, 'rb') as s3file: # this will throw a FileNotFoundError: [Errno 2] No such file or directory: '' # ds = Dataset(s3file) - # try use xarray for now - ds = xr.open_mfdataset(s3file, engine='h5netcdf') + print("S3 file looks like:") + print(s3file) + print(dir(s3file)) try: ds_var = ds[ncvar] except IndexError as exc: From 6003b74c69186d9a3d2bffda38677d7aa3f6ae94 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Jun 2023 17:22:10 +0100 Subject: [PATCH 29/60] how does that s3 file look like --- activestorage/active.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index 003f4f73..efe9cce9 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -82,8 +82,8 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue # this will throw a FileNotFoundError: [Errno 2] No such file or directory: '' # ds = Dataset(s3file) print("S3 file looks like:") - print(s3file) - print(dir(s3file)) + for l in s3file.readlines(): + print(l) try: ds_var = ds[ncvar] except IndexError as exc: From 71051e47c5c4b750a88f099d270154f2af788be0 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 12:32:24 +0100 Subject: [PATCH 30/60] asdd h5netcdf to env --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 20d90712..debd6755 100644 --- a/environment.yml +++ b/environment.yml @@ -8,6 +8,7 @@ dependencies: - python >=3.9 - dask - fsspec + - h5netcdf - h5py # needed by Kerchunk - kerchunk - netcdf4 From 624c849770835062f53a13cc1e9c1fc0a0753bca Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 12:32:30 +0100 Subject: [PATCH 31/60] asdd h5netcdf to env --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 44c12b50..bbc48d44 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ 'install': [ 'dask', 'fsspec', + 'h5netcdf', 'h5py', # needed by Kerchunk 'kerchunk', 'netcdf4', From 30c33d35d9a3d4a135d4f94017d8443743f9a539 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 12:32:42 +0100 Subject: [PATCH 32/60] try h5netcdf layer --- activestorage/active.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index efe9cce9..f6d44849 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -2,6 +2,7 @@ import numpy as np import pathlib +import h5netcdf import s3fs #FIXME: Consider using h5py throughout, for more generality @@ -81,9 +82,9 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue with fs.open(uri, 'rb') as s3file: # this will throw a FileNotFoundError: [Errno 2] No such file or directory: '' # ds = Dataset(s3file) - print("S3 file looks like:") - for l in s3file.readlines(): - print(l) + # instead, try h5netcdf + nc = h5netcdf.File(s3file,'r', invalid_netcdf=True) + ds = Dataset(nc) try: ds_var = ds[ncvar] except IndexError as exc: From 514b1b6e455fae5514b22f7a40c60bcc58e8691f Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 12:41:13 +0100 Subject: [PATCH 33/60] dont call netcdf4 --- activestorage/active.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index f6d44849..70ae683d 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -80,11 +80,15 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue secret="minioadmin", client_kwargs={'endpoint_url': "http://localhost:9000"}) with fs.open(uri, 'rb') as s3file: - # this will throw a FileNotFoundError: [Errno 2] No such file or directory: '' - # ds = Dataset(s3file) + # s3file is a File-like object: a memory view but wih all the metadata + # gubbins inside it (no data!) + # >> ds = Dataset(s3file) << + # calling netCDF4.Dataset will throw a FileNotFoundError: + # [Errno 2] No such file or directory: + # '' # instead, try h5netcdf - nc = h5netcdf.File(s3file,'r', invalid_netcdf=True) - ds = Dataset(nc) + ds = h5netcdf.File(s3file,'r', invalid_netcdf=True) + print(f"Dataset loaded from S3 via h5netcdf: {ds}") try: ds_var = ds[ncvar] except IndexError as exc: From 9dcb03d94d6534c40605a883017edd26b14d3530 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 12:50:25 +0100 Subject: [PATCH 34/60] skeip filers xtraction --- activestorage/active.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index 70ae683d..24ab7c61 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -87,7 +87,7 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue # [Errno 2] No such file or directory: # '' # instead, try h5netcdf - ds = h5netcdf.File(s3file,'r', invalid_netcdf=True) + ds = h5netcdf.File(s3file, 'r', invalid_netcdf=True) print(f"Dataset loaded from S3 via h5netcdf: {ds}") try: ds_var = ds[ncvar] @@ -95,7 +95,11 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue print(f"Dataset {ds} does not contain ncvar {ncvar!r}.") raise exc - self._filters = ds_var.filters() + try: + self._filters = ds_var.filters() + # ds from h5netcdf may not have _filters and other such metadata + except AttributeError: + self._filters = None self._missing = getattr(ds_var, 'missing_value', None) self._fillvalue = getattr(ds_var, '_FillValue', None) valid_min = getattr(ds_var, 'valid_min', None) From 1e0ee8d8aa8e9eac0e29e771c72d9eb1a044b16f Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 13:20:59 +0100 Subject: [PATCH 35/60] fix test --- tests/s3_exploratory/test_s3_reduction.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index 6e96e790..8d6d047e 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -62,9 +62,7 @@ def test_Active(): # run Active on s3 file active = Active(s3_testfile_uri, "data", "s3") - active._version = 2 active.method = "mean" - active.components = True result1 = active[0:2, 4:6, 7:9] print(result1) From 83e3e853d2dbd06c2ef60ea1141f991ce8a27ed3 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 13:58:48 +0100 Subject: [PATCH 36/60] tweak call --- activestorage/active.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 24ab7c61..c418e8ef 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -256,7 +256,11 @@ def _via_kerchunk(self, index): """ # FIXME: Order of calls is hardcoded' if self.zds is None: - ds = nz.load_netcdf_zarr_generic(self.uri, self.ncvar) + print(f"Kerchunking file {self.uri} with variable " + f"{self.ncvar} for storage type {self.storage_type}") + ds = nz.load_netcdf_zarr_generic(self.uri, + self.ncvar, + self.storage_type) # The following is a hangove from exploration # and is needed if using the original doing it ourselves # self.zds = make_an_array_instance_active(ds) From 04ee2315bbe96a567661b994de0102a784846e9b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 13:59:02 +0100 Subject: [PATCH 37/60] switch to s3fs --- activestorage/netcdf_to_zarr.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py index 44f78c1c..16c7b073 100644 --- a/activestorage/netcdf_to_zarr.py +++ b/activestorage/netcdf_to_zarr.py @@ -3,6 +3,7 @@ import zarr import ujson import fsspec +import s3fs from kerchunk.hdf import SingleHdf5ToZarr @@ -61,13 +62,20 @@ def open_zarr_group(out_json, varname): return zarr_array -def load_netcdf_zarr_generic(fileloc, varname, build_dummy=True): +def load_netcdf_zarr_generic(fileloc, varname, storage_type, build_dummy=True): """Pass a netCDF4 file to be shaped as Zarr file by kerchunk.""" - so = dict(mode='rb', anon=True, default_fill_cache=False, - default_cache_type='first') # args to fs.open() - # default_fill_cache=False avoids caching data in between - # file chunks to lower memory usage - fs = fsspec.filesystem('') # local, for S3: ('s3', anon=True) + if storage_type not in [None, "s3"]: + so = dict(mode='rb', anon=True, default_fill_cache=False, + default_cache_type='first') # args to fs.open() + # default_fill_cache=False avoids caching data in between + # file chunks to lower memory usage + fs = fsspec.filesystem('') # local, for S3: ('s3', anon=True) + elif storage_type == "s3": + # TODO of course s3 connection params must be off the config + fs = s3fs.S3FileSystem(key="minioadmin", + secret="minioadmin", + client_kwargs={'endpoint_url': "http://localhost:9000"}) + so = None fs2 = fsspec.filesystem('') # local file system to save final json to out_json = gen_json(fileloc, fs, fs2, varname) From b8f276155dfa87a5e8155c45ac60c7718863f3de Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 14:13:02 +0100 Subject: [PATCH 38/60] give it a slash free name --- activestorage/netcdf_to_zarr.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py index 16c7b073..449cba44 100644 --- a/activestorage/netcdf_to_zarr.py +++ b/activestorage/netcdf_to_zarr.py @@ -12,6 +12,8 @@ def gen_json(file_url, fs, fs2, varname, **so): """Generate a json file that contains the kerchunk-ed data for Zarr.""" # set some name for the output json file fname = os.path.splitext(file_url)[0] + if os.sep in fname: + fname = os.path.basename(fname) outf = f'{fname}_{varname}.json' # vanilla file name # write it out if it's not there From 4af7c4c515804284138a46affef177788baa1f6c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 14:19:42 +0100 Subject: [PATCH 39/60] bleghh --- activestorage/netcdf_to_zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py index 449cba44..6b25ccfd 100644 --- a/activestorage/netcdf_to_zarr.py +++ b/activestorage/netcdf_to_zarr.py @@ -66,7 +66,7 @@ def open_zarr_group(out_json, varname): def load_netcdf_zarr_generic(fileloc, varname, storage_type, build_dummy=True): """Pass a netCDF4 file to be shaped as Zarr file by kerchunk.""" - if storage_type not in [None, "s3"]: + if storage_type not in ["s3"]: so = dict(mode='rb', anon=True, default_fill_cache=False, default_cache_type='first') # args to fs.open() # default_fill_cache=False avoids caching data in between From c7b4fef851851d552813b6950d1abd3286d1c7c6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 15:38:24 +0100 Subject: [PATCH 40/60] pytested test --- tests/test_missing.py | 105 ++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 60 deletions(-) diff --git a/tests/test_missing.py b/tests/test_missing.py index 4c236698..a0a7a267 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -1,6 +1,7 @@ import os from this import d import numpy as np +import pytest import shutil import tempfile import unittest @@ -9,65 +10,49 @@ from activestorage import dummy_data as dd -class TestActive(unittest.TestCase): +def _doit(testfile): """ - Test basic functionality + Compare and contrast vanilla mean with actual means """ - - def setUp(self): - """ - Ensure there is test data - """ - self.temp_folder = tempfile.mkdtemp() - - def tearDown(self): - """Remove temp folder.""" - shutil.rmtree(self.temp_folder) - - - def _doit(self, testfile): - """ - Compare and contrast vanilla mean with actual means - """ - active = Active(testfile, "data") - active._version = 0 - d = active[0:2, 4:6, 7:9] - mean_result = np.mean(d) - - active = Active(testfile, "data") - active._version = 2 - active.method = "mean" - active.components = True - result2 = active[0:2, 4:6, 7:9] - self.assertEqual(mean_result, result2["sum"]/result2["n"]) - - - def test_partially_missing_data(self): - testfile = os.path.join(self.temp_folder, 'test_partially_missing_data.nc') - r = dd.make_partially_missing_ncdata(testfile) - self._doit(testfile) - - def test_missing(self): - testfile = os.path.join(self.temp_folder, 'test_missing.nc') - r = dd.make_partially_missing_ncdata(testfile) - self._doit(testfile) - - def test_fillvalue(self): - testfile = os.path.join(self.temp_folder, 'test_fillvalue.nc') - r = dd.make_fillvalue_ncdata(testfile) - self._doit(testfile) - - def test_validmin(self): - testfile = os.path.join(self.temp_folder, 'test_validmin.nc') - r = dd.make_validmin_ncdata(testfile) - self._doit(testfile) - - def test_validmax(self): - testfile = os.path.join(self.temp_folder, 'test_validmax.nc') - r = dd.make_validmax_ncdata(testfile) - self._doit(testfile) - - def test_validrange(self): - testfile = os.path.join(self.temp_folder, 'test_validrange.nc') - r = dd.make_validrange_ncdata(testfile) - self._doit(testfile) + active = Active(testfile, "data") + active._version = 0 + d = active[0:2, 4:6, 7:9] + mean_result = np.mean(d) + + active = Active(testfile, "data") + active._version = 2 + active.method = "mean" + active.components = True + result2 = active[0:2, 4:6, 7:9] + np.testing.assert_array_equal(mean_result, result2["sum"]/result2["n"]) + + +def test_partially_missing_data(tmp_path): + testfile = tmp_path / 'test_partially_missing_data.nc' + r = dd.make_partially_missing_ncdata(testfile) + _doit(testfile) + +def test_missing(tmp_path): + testfile = tmp_path / 'test_missing.nc' + r = dd.make_partially_missing_ncdata(testfile) + _doit(testfile) + +def test_fillvalue(tmp_path): + testfile = tmp_path / 'test_fillvalue.nc' + r = dd.make_fillvalue_ncdata(testfile) + _doit(testfile) + +def test_validmin(tmp_path): + testfile = tmp_path / 'test_validmin.nc' + r = dd.make_validmin_ncdata(testfile) + _doit(testfile) + +def test_validmax(tmp_path): + testfile = tmp_path / 'test_validmax.nc' + r = dd.make_validmax_ncdata(testfile) + _doit(testfile) + +def test_validrange(tmp_path): + testfile = tmp_path / 'test_validrange.nc' + r = dd.make_validrange_ncdata(testfile) + _doit(testfile) From f2a1478ffc5d2bae8ecf14fb85eb56955186402b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 15:41:09 +0100 Subject: [PATCH 41/60] refine a bit --- activestorage/netcdf_to_zarr.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py index 6b25ccfd..0c859f2c 100644 --- a/activestorage/netcdf_to_zarr.py +++ b/activestorage/netcdf_to_zarr.py @@ -66,18 +66,21 @@ def open_zarr_group(out_json, varname): def load_netcdf_zarr_generic(fileloc, varname, storage_type, build_dummy=True): """Pass a netCDF4 file to be shaped as Zarr file by kerchunk.""" - if storage_type not in ["s3"]: + print(f"Storage type {storage_type}") + object_filesystems = ["s3"] + if storage_type not in object_filesystems: so = dict(mode='rb', anon=True, default_fill_cache=False, default_cache_type='first') # args to fs.open() # default_fill_cache=False avoids caching data in between # file chunks to lower memory usage - fs = fsspec.filesystem('') # local, for S3: ('s3', anon=True) + fs = fsspec.filesystem('') elif storage_type == "s3": # TODO of course s3 connection params must be off the config fs = s3fs.S3FileSystem(key="minioadmin", secret="minioadmin", client_kwargs={'endpoint_url': "http://localhost:9000"}) - so = None + so = {} + fs2 = fsspec.filesystem('') # local file system to save final json to out_json = gen_json(fileloc, fs, fs2, varname) @@ -86,5 +89,3 @@ def load_netcdf_zarr_generic(fileloc, varname, storage_type, build_dummy=True): ref_ds = open_zarr_group(out_json, varname) return ref_ds - - From 1a43bf14285e61157e9addd77488b4fb1f5aa209 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 15:56:34 +0100 Subject: [PATCH 42/60] corrected check for file --- activestorage/netcdf_to_zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py index 0c859f2c..11f18f92 100644 --- a/activestorage/netcdf_to_zarr.py +++ b/activestorage/netcdf_to_zarr.py @@ -12,7 +12,7 @@ def gen_json(file_url, fs, fs2, varname, **so): """Generate a json file that contains the kerchunk-ed data for Zarr.""" # set some name for the output json file fname = os.path.splitext(file_url)[0] - if os.sep in fname: + if "s3:" in fname: fname = os.path.basename(fname) outf = f'{fname}_{varname}.json' # vanilla file name From 20a806ee1d2e5563f164b0efc0d48cc8336aa9cc Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 15:56:45 +0100 Subject: [PATCH 43/60] posix to str paths --- tests/test_missing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_missing.py b/tests/test_missing.py index a0a7a267..96bad655 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -28,31 +28,31 @@ def _doit(testfile): def test_partially_missing_data(tmp_path): - testfile = tmp_path / 'test_partially_missing_data.nc' + testfile = str(tmp_path / 'test_partially_missing_data.nc') r = dd.make_partially_missing_ncdata(testfile) _doit(testfile) def test_missing(tmp_path): - testfile = tmp_path / 'test_missing.nc' + testfile = str(tmp_path / 'test_missing.nc') r = dd.make_partially_missing_ncdata(testfile) _doit(testfile) def test_fillvalue(tmp_path): - testfile = tmp_path / 'test_fillvalue.nc' + testfile = str(tmp_path / 'test_fillvalue.nc') r = dd.make_fillvalue_ncdata(testfile) _doit(testfile) def test_validmin(tmp_path): - testfile = tmp_path / 'test_validmin.nc' + testfile = str(tmp_path / 'test_validmin.nc') r = dd.make_validmin_ncdata(testfile) _doit(testfile) def test_validmax(tmp_path): - testfile = tmp_path / 'test_validmax.nc' + testfile = str(tmp_path / 'test_validmax.nc') r = dd.make_validmax_ncdata(testfile) _doit(testfile) def test_validrange(tmp_path): - testfile = tmp_path / 'test_validrange.nc' + testfile = str(tmp_path / 'test_validrange.nc') r = dd.make_validrange_ncdata(testfile) _doit(testfile) From 4b5c2d8bee4f006665d111dba5edc06ec65374e2 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 16:16:25 +0100 Subject: [PATCH 44/60] made an s3 loader function --- activestorage/active.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index c418e8ef..4cbd325a 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -16,6 +16,29 @@ from activestorage import netcdf_to_zarr as nz +def load_from_s3(uri) + """ + Load a netCDF4-like object from S3. + + First, set up an S3 filesystem with s3fs.S3FileSystem. + Then open the uri with this FS -> s3file + s3file is a File-like object: a memory view but wih all the metadata + gubbins inside it (no data!) + calling >> ds = netCDF4.Dataset(s3file) << + will throw a FileNotFoundError because the netCDF4 library is always looking for + a local file, resulting in [Errno 2] No such file or directory: + '' + instead, we use h5netcdf: https://github.com/h5netcdf/h5netcdf + a Python binder straight to HDF5-netCDF4 interface, that doesn't need a "local" file + """ + fs = s3fs.S3FileSystem(key=S3_ACCESS_KEY, # eg "minioadmin" for Minio + secret=S3_SECRET_KEY, # eg "minioadmin" for Minio + client_kwargs={'endpoint_url': S3_URL}) # eg "http://localhost:9000" for Minio + with fs.open(uri, 'rb') as s3file: + ds = h5netcdf.File(s3file, 'r', invalid_netcdf=True) + print(f"Dataset loaded from S3 via h5netcdf: {ds}") + + class Active: """ Instantiates an interface to active storage which contains either zarr files @@ -74,21 +97,7 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue if storage_type is None: ds = Dataset(uri) elif storage_type == "s3": - # correct settings for Minio; need be imported from config.py - # calling open returns a File-like object S3FileSystem - fs = s3fs.S3FileSystem(key="minioadmin", - secret="minioadmin", - client_kwargs={'endpoint_url': "http://localhost:9000"}) - with fs.open(uri, 'rb') as s3file: - # s3file is a File-like object: a memory view but wih all the metadata - # gubbins inside it (no data!) - # >> ds = Dataset(s3file) << - # calling netCDF4.Dataset will throw a FileNotFoundError: - # [Errno 2] No such file or directory: - # '' - # instead, try h5netcdf - ds = h5netcdf.File(s3file, 'r', invalid_netcdf=True) - print(f"Dataset loaded from S3 via h5netcdf: {ds}") + ds = load_from_s3(uri) try: ds_var = ds[ncvar] except IndexError as exc: From 770a9d012da76b0b73836ce93fe61aa95fa73bf6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 16:21:05 +0100 Subject: [PATCH 45/60] blegh colon dude --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 4cbd325a..f62bbd2d 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -16,7 +16,7 @@ from activestorage import netcdf_to_zarr as nz -def load_from_s3(uri) +def load_from_s3(uri): """ Load a netCDF4-like object from S3. From 621f85d394f51ff6c2bc2481d1b34394f07d86ff Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 16:24:51 +0100 Subject: [PATCH 46/60] return something too --- activestorage/active.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activestorage/active.py b/activestorage/active.py index f62bbd2d..f612821b 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -38,6 +38,8 @@ def load_from_s3(uri): ds = h5netcdf.File(s3file, 'r', invalid_netcdf=True) print(f"Dataset loaded from S3 via h5netcdf: {ds}") + return ds + class Active: """ From 945b32fd59ef76f8a2c87c84b2c247a3e327a564 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Jun 2023 16:25:09 +0100 Subject: [PATCH 47/60] load s3 connection params from config --- activestorage/netcdf_to_zarr.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py index 11f18f92..c0605419 100644 --- a/activestorage/netcdf_to_zarr.py +++ b/activestorage/netcdf_to_zarr.py @@ -5,6 +5,7 @@ import fsspec import s3fs +from activestorage.config import * from kerchunk.hdf import SingleHdf5ToZarr @@ -68,17 +69,19 @@ def load_netcdf_zarr_generic(fileloc, varname, storage_type, build_dummy=True): """Pass a netCDF4 file to be shaped as Zarr file by kerchunk.""" print(f"Storage type {storage_type}") object_filesystems = ["s3"] + + # "local"/POSIX files; use a local FS with fsspec if storage_type not in object_filesystems: so = dict(mode='rb', anon=True, default_fill_cache=False, default_cache_type='first') # args to fs.open() # default_fill_cache=False avoids caching data in between # file chunks to lower memory usage fs = fsspec.filesystem('') + # open file in memory view mode straight from the S3 object storage elif storage_type == "s3": - # TODO of course s3 connection params must be off the config - fs = s3fs.S3FileSystem(key="minioadmin", - secret="minioadmin", - client_kwargs={'endpoint_url': "http://localhost:9000"}) + fs = s3fs.S3FileSystem(key=S3_ACCESS_KEY, # eg "minioadmin" for Minio + secret=S3_SECRET_KEY, # eg "minioadmin" for Minio + client_kwargs={'endpoint_url': S3_URL}) # eg "http://localhost:9000" for Minio so = {} fs2 = fsspec.filesystem('') # local file system to save final json to From 0d3492f943b1865173c62971b3845597d219b43d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 15:27:18 +0100 Subject: [PATCH 48/60] add test data file --- tests/s3_exploratory/test_data/cesm2_native.nc | Bin 0 -> 67544 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/s3_exploratory/test_data/cesm2_native.nc diff --git a/tests/s3_exploratory/test_data/cesm2_native.nc b/tests/s3_exploratory/test_data/cesm2_native.nc new file mode 100644 index 0000000000000000000000000000000000000000..61a4ecf359d387c3e2592c1b06a6c6dc5e55c7c2 GIT binary patch literal 67544 zcmeHQ31E{&_Mh|y3c?D43K9etkTy+Afr6429xVk5B1J_-Luk@A&?YrW5j+bNdU3T$ z5D^ej5ET_s)Kx)LJn&#q7sPc}Sq~IE)>}MQ|8Hi#mp0QD=~_Vd{|%(?n|bri@#cLq zznSlSGgaB6vf8&jqisZ793%IO=Jv-LM37lybm;NUk_)m&O^RsW-VkxN2#oBk@v{5K zi^w*TuKtsxZ6laSq)T@eg&~pM7^SY#_)KKuzYZ*pN3~9Xi5_=}!{c*0yzErGm**IP zzgZ5C*Xec*Pf2h&e0lc#;i;xHQ*uI~J%7;fWK(jADLKABM#H4y@gGmhl{e7p^?96m zKGqSd$qT}mQ6$C-*Rdn_n;rZwW8X-^*g163fOZO9M;6(9huTW-EqC2+mx5sigOEss z!C;8j;!;IXR+w8SV>(uly9}1vj-~z*$yl@yQe-R%sVjROL%DOvva%YD#&JQg$cTtZ zBl!h7l!!=fzVx2-zrkCkEFC#HdlZ%mi^k<;@3&)_ELIz)X$QM;`CYkLxw)Avr?+Go zo-?pvcmrdr-!J$MtF^jwrW9#x1Cfdd7>B4P(q277ZQ3=Cb-fW|Xd-PZ+cs0n_d=cl zIudkD^V&Fo$BSe!QX3&>L~gFoLar-3`%z=YXHLi+bHM~_E{w>@Wbv0VY~Dx~EpM1G zVtl5EpP|K{TA@xVKS0S!BlRV z^nN=bvvD2JhtBY$f!7n=yI)-{RGwe6tV!7ug-%2;q1YlYR8gaN92t8+(gX4k45h@?8(lx7hx^f8>rP+QUDb!kXvu9C%4O%yJ8l-S+Hm2vN4C9RWULzN z>90@Ul$DFR#i$lqWwe|rPhO;nY&MpJw+VOsH3c|R_!uL4`QsPAGjeiH7Ru`@&O&YE zWy{%U?8tolR_p}$u`)W^OgRjR#;Fkw_fz7C$?W-!hajQG4f_2iQYAAi=;+ zLvq^JpFS=w*a-BY%X?+dq2dYu)&wfU^uH~VfN1|VVNwli*|0VXYp<~O4pX48h7WJj zhH4e2Qeh4K#F{1JTT}}@@_#D<(eFg_euwvY_I_RF$do!W0ym}(@Vw}GsnaxI|8qd} zyt!X*n3Q_~WRK$mO%z)@T;97yD@;5-*%A0`chHc?1c6vmM>&g~_9n3iW20Zb{<;vD z=)=>AKr}&MLS%|z1S-fc$GO9)GT&TDXcUZ%>&gQnBO>RGl^ous@LPG`MuTBwC{aqB zD2Haehw6;AK$%#zT7cW_u{&Kh9|E*6T!r9=%0-kXz%!NQd{;w&>AXXCNnv&~7()6f%--S!Ku7Q{gsVuFlFiy`^w$X97P?a=Es$9sy3f6OZKaCL zR%?3PHk6BLciNNcBe+__mG!aSy1eiriOi@LT4jV(DMwCKWcG~q=o2C%nsw@$0~#{> zmfv@N17vm?PaGq1h}1L?dx_tIeJ>Y)0Ai*57OXn&@alosSq*^LTI7TQu@rs_LP(C? z7zbhk|KVeRJxbF+?9C0|pTZ;Su5~1EY{*P&M7EY^ifzu3G;62;tf2z2ux4$jNvxrc zgf({0PkV<}1$g4RJ24Uv{Z0%AD*UlRWOl90w;q|jl{kJ-_?#H~pL4_@6q&NRcIvgY zf8yXQek$+YBQcqd9Kh7i-E+p|PG+ZLs=PmW zH~UJ@Xt*GUhtsrMT%(t#$=w(&fYMaMMLHOd!dGb#sTZekT6UsIpBgSYtR~VCGs*_x z!v~?KfuGW$aBa@qs5~?PQG6;7?-#kFaH=n1S2bM2PNDdejy48`!YLiuQ~7<(OL*! zYk)m#pA}2ne1V}!qXP^Vj$S8(mQ`peMB~#;Tk|$ca783^ra9yK!b-bO_H`0g%qTl7+kk|WB?&;k>Y#>OFhWrrt%wnUiFF});|d1^*d zUZ2fnw|VSVm#x?#%uV>*UrW*HE44c!aUoyWQx&;g1@+N-XwhhJ$f1XO{A^CL^vCwl8 zG7jUtDSu9UXbX0%9e~WV*$#lZ6rN-=4}W-+#t$f8_oY&4PpD=FSKkrfOBdM@*yebR zUZ*S1VN4!6WN4x}8UGv2=3(N0B-%=e$3M?jiGrM#;k;ewP9J(qHNmUv7Q-{Sc*a#vR5J zwBz=0nxOfKj??@^otmF$rRFEPLGu%RR`U~mU-J` z+cp=nC*A;6x#bXjq#V+3oeR6(SqR_Ft%CvM>!8ym%iw`4{jhOp0EWF@2kvX@pvTz( zxM`#x*xCS$?Hqt>ZV1Bc^#MqY4!}Qi>R@+q0H)nt2|o>}gvwJYVZt+&aAZUcWUQ-( zZJ$R{n#eps>s^S9JN=|(@ayW0;FZV$lBssLDS4np^z0x-!Qfb>u5z`QE}7oA%L_fM;Y zy=PX!lmnIU(LFU#HLwQa2Gl^!ni_chk}8NByb#vjH4hSRDTBd=a)_M206KkjJv?=7 zIc%I!2WO3}gK1lrLH7l9U|Z{loRxKO(S$lEyV?&Q!~|f|jsUo?4#49Zf-v;YL0I-= z04{O*;hKL2py0(yNPW5j2F6#w)DD%f9P>Tcvj+0I)xaGIH86H*H9TOefR1J5aCXvs zn7L^Il&@L<1rz7OhvOH(8;|*+Q=%WT66&BA*0t-yez^G7Iyi)K|M(_9tcnc4Kh^}` z-O>QOGbRX`?Sk;xR{_|0u^+Z>48VwwE8wn!3nA~qg;3tL0+wD>4c1$#VZ(*hu;Ne^ zbh*C^4I*3I5PlCFAhNU-2s^QVgR=J1MoBRL*S|aY?-|fZYWv+>%S_8FRohv z7xk!u7)KSPH`_lsfs${M(MOEv78T?K7j)iC+GYPfJxHEgdA!uP|M z!?sNU@O>SCC#DAAh70}h5w>m5a{{pUbU$427eCyCI(OFde&}*#0OC*=i!ZK&t@Hda z?B5HZ%ZhTila<3YKbFIqZ!6$G|EYjC7FL4gPz4;icM0UayA%@Am%^~WF9xHt1}=Ge z5p3AD7#6-)12_E;gfUB(!+&G{i1oYlsUWgZUmmOnb=> zt5f}un_Ua(_xs_lf0e`3P4j_mnGZWrr;CPGz(a=?f_-cSEV_3g^etTkKeSl{1=vqn z9IQ3+kH@WX4p{BZBeT9|Qj0LE_%fWv`# zobQMCo<*H_t`0W03&7{)sCR9G;K>a@=PiCH-d76`cMD)UR=|qVh0xEk5EgcXmm?isD*#h+PsZQ%Xn2zRRg*JVqRtJWk}Ceo?5c#b%p7SFu;d|5$SJP+0k z!^xynOoX)Jgpv5KM*bLX%>09a#RwT(vVE+!T+!f;quZ3CA+&I087&MVc}SEDr@v&< zL;mI{-WIx3Ce3Y1Cs`=|8;;i=S8Yna(Db<8TC`wYr*5iE=@-Lf8uhWNe936m&A;}c zT4(5gdAsYLt2=+`ux((m|v*d}}iF244;oF|SExvn0?rZk~}P3bIt zOCPD2$7oZU%5Ul0z@~I154esAbK53A?wNH<@3S$A?pjF#ztyHxQvlXbfmm3xHq<26 zP)At9#=c<5yo+0@N_sLakN}4PF{kkx3{kX7-0)l87|~f6qC!TLxNt5O!Kwz!TrM$w zvbOc#tMI{%cLnrbg)dJ1y~o*mc}_Hp(c2Rm2>B-&(YS1Q{FcvYjf|I!J6cXBKgopr zSM_7P#5yZs8F81T_|xxx(>g0}r{A>B%Ev4|gkoDB>oTj%v)N@7i6t2_%bqP$Kl$mM3 zzKnV+qGT1X*8j1|N{FU1)XOPc-K$GLmw+w-T>`oUbP4F+_(phvoBkZlwKYjD z#iKchde1N=UUP_atP+nVhiZD4De*$Fjm|x%fw#-@fQg3IowW{8$2EBhL806s39d|Jid|OfV4e) zyF_96SBU$+V&ZZ558wXa!45vRXO6Jw9*0r*i;OIf)0pchF&c*&lLrn5SuBf59l`oWsH z^%(v(4Zm;bfTVoi{tj}ld#h}$ZsQkqVo&m0Gv)5BMFant$iCuoM8kdlerS7#S>sqb zk8!CqlRS{Vbtc=$6U~x`j10W)Z4ayEjn`Y+a0hO>!ONm}Ku>A1{np_J=Q1O|rN2C3 z;`syd^Vux^x~)uZKz`x8XUpX=02qTi^A|&^*=by#Fk8Ocqxpc_&qnY!Z_60pv4EZT z{fOJx3cTP(-MqjbniKC~r}C@S@}P*A6`Ss3Wjxz{avRfkze{+6oyOl9eq_M?)ET?x z`rgm8pZV)SwC?9`GY=I^eTi-6X@E2g-(|4wXT5n{P`^aPtNOgdzU4*70R9a8?fDTO zGM83;9)9Z8Q@=T!G*NpjFyJJ)hUV6qRKprFOn6~R5Y~8MwGXRkSfhujRai5G$ums1 z!rCHCL0Chr4Qs=&<_gopFog zC3~|BEQnC}B4;t4Ctw_fdwv)j_h~8Erwuap;~kk7d$QsZ+~mM#DlRE7*&RuFcI>Wn zr%OPW!0$%_;m3)g51`ZjPk`}^VGraMJg><`?00q1|IGvDt?wW3p!S7uak@ zkE7UDV)f>EoF%^Dt~}ulV->o+KECtP@FK@78*Z7DY(fa;H5IvWyOXepbYz{xWQmEM z0*?{*e6qTVt?p9aaJ1Zc8|ycRqh z0h{>cxm}1HO(jly%HU*ks@a%Sh$t{=deZcP=Ap@{$w^ncJu{Oy+DwAX-0>+%`6fq7 zvI&m`$(?K+IT}p)=Al;pD=tVLVuJBwCJs&Ka3Q%zN=Oqwg1-EM!Zg+;~@X6iDICvHr zWF;n>%&l@9W$4=+ue;QfC)JTp2t+m`#z!Kg*EUJ=N@lXgY_VE~`qd(LL1;@js?iLG zYi5JpmU!nSMA}BnEw9iq$Ki50QX=sHK27DEKBujyUg`Kp%)HdovA-~zM)Lkb^cB3D zNHO~g!IUy|h!s1DSt+82aEhKmZVHvidsaTPjl@ZgNZ#izaTj=OC53b9bKoC1^Bj^2 zifoAKN<=TkcYsUE^EwKW#6uvwHeYcPc0x%!k)-@tR||5QAb!{<=3|}Uz1p0b zVx8den$2leaGM94tyxaqfn7Gq>MbZ7Xtt)L&MdZ$o-`rHnwLDpoNOK_3a5SYRg5#^ zBCFl)w3d2`gq)08(FlDa&*b(LBqbvzH4h$?G-~jW?Bs0o#gTaGrK}&KQ!CEPoo>4| z-{ZJSq{1hUyn@AUm#@&89Fldz!6q!C*2(j=43VmgNJEc^%4dK)%_fGSkOIrwv_4PV z#V3;dE-@%Y;wc#7a4$ZF;IXSN98!FR9)}J07A!5E?(j4^;fqf;xg*MxYV71^{i_?C z;mI)ocSL{vGk*C6{S!;&eI7|d$CgDmvK*!_j^an~sK4pULtd$P96I>nAx&ZG75JW zY09%sGcG2bXnd|KyIkIJ-2U#p^opCrqD6Gi2iLS`X;}C(*zM1BIfoq-m+sS?e`YiB z@^rT1hMazEJ(lD&w&utwBiXrl*_A#2{j^c68ZXae@0T9P69;v6WmV~4*jc%_?P*(t zgYClzr?d0_QCq|&iO^Rz|8p_(iNwDB^4nX*a#I(!DrN1RY&Ayc#y$(?-@{%Jaev&o z=qWY@<925k&AIVu)(b7fv=IJsX-L@UB)ksV$+TWxIqd z`S}uK7x9>g$Wll6@GP8cTI~tJr%5yHuF)6i(`Gd-6&hFT2r-l zNm!Og9;?riYf|fG$(!!+6@#*69w+jz9Aj!5fC1818dIp_Z~lyQ2?ids%W`|Q;?t0F zT)_a(D<0ot5d=8*?dAL4lqMFT2yqg zt*F$&nmQDopF&1`4pCkLW?Zx*ezY9+JIIuZod^bSd*@mIRbrUr-v{pA-g=GG<$6(0 z_B(AHcgi2Dac=9e_GZ~QR~_xiiRVbX;eObJ$K(@J`TK5^QjZ5`;YOU=IgI@-MK^x@ zs8Rjn)98tj2M^ajF%2u))RS`EPrv*Ue;z$0;_mRLh2u&-o3)-w`-j zPXx{nn(eOe#i;DAG~s(4xNvCq@*PU@8nnmBZ-oB#BktDgo(2X_R%0B?*pZ){{Kx8^ z25idow_f9H_?D8U=T3 zY<6rpQyE)upMPV&e2=@>I2{l3z;S-g?_h90J7asUY=GInp#gq5X$^qO@y#{>Z9GOJ zd6R?toRdGc_g?Q@D%)VjZ)gM0O(&~oN;*m#@D&v?AJotkKa<1jbCjHXsgLrXvQ|oc z>(Qj%na?Sjnw*BuEk0;Q`Z1xkVw&F*ZH1+I-r(_rjw1cjH0k@?K71YTbr;z@MyJPP zv*WsXo+BR7FQ1GzI@yFTxbQ~1B&j%s&)E6$dh5*Cxj|}c&Dg=1bhcPC66x!Atq@$4+!2*NJQL>F9NreXQ?7vU&4#C}pu3;A zH*bz+U&`$Y7*6+iG_jp;PkxmlttG#%?%9W};{gfMHnh#kW%Jp?=x4ZS$yaVasJvhS zv+@`q_vRfnt<9}$C6CcvY6Ekwd4TQaF^c3eV4lDBkTez@!=vG%&ntTlv48O^oOAJV zzc{o1PXph2`mz`6OQ?2x5 zk5vL;>*oEQk+<>3ls|#3yKdeYQ(TrXeq>mdQ(mgivi08%@E1bwm*ia?mSyGZ$1_Vl zYA%0p{bylWKKj=2%#tsg@7R6%=&&q5dapjq)^E#hnE0Rw?Dom=%<^>UQlu3?!2d*dQIgEwE2E*uc6pIgJ&@7_~$j3#&Qyv$x zmI)bBG;4YB+iWhHhLnxX9hzF&X3Vy;Ss>YxBs43pJUsH#igxscZtZ|hj`p}X_;h8M zvDjgAwd6x8YA&K&xbz_ve*xXl&)+~d^oL$SW4P}A$Py6Ge|DexDm=LOoW6!AGIfY3 zxSEb)HSpeDxG1i0W4Jz`Gl*}u_*E1?Bz6^7NOXoM?Si%8vBl2(pXdC+|GR%m?ecXrRWKw29_8D;)8n*YpA&U2y+{H-CXWHi65Ji)k7<~CVtJp)Q zM;fBYU?V2aoq~+EW#WFU2$^m~@t0n7nxr9$UWv!@U|hB7GoNlCYmOLvM}R5O0h5L( zvg<$w&w@^xG(?ewM+~lwB7=^cG(^#Z7VWa*0udcOX^0}Dk7$<{!|4c0Lljwm#JKqS zMIoIr=ia>}#_q*I!=5jm{HCq-)%Q45&CZGO>}iND$bG1_%g1FkNh(R%BwOhfV7_YDm|>kr-wm}NB$Y9bR{Xpr*@#{ zlKkPZb)PN)T>`oUbP4DZ&?TTtK$pPpT>{O`9|-5E<=?1H7m2UV7d9(=nnhANk1Bk1 zKCnaKt2pxog|FiMJqn+un3Vs1g|FiI0}7w&N%w!G@KwD2xx!cR`4NRr_?psZ(cuy$ zzFLk@eEmX~fGz=D0=fir37h~4v=lGIY2z7>XU0?pwCb)2NKyr7L|#8>gc9)+*sh5ZU& z#R~@%zB*2Rr0`X|@VUZQ@xl>>ui^z3BWpzUBTDs;Q}`-g=&0~jywFYI6CR-SjT)c) z#8=Bfe4XkN&?TTtK$n0nffFHtmg0qe%6O;Z1+&6e@j{xySMfrI!dLM^w!&BO!bF9y z;)N*+U&Ra46uvr67b<)eFO(>J6)((I_=E?jt;;k%`H8QVgZMhtC7??{mw+w-T>>XU z0xiW0mCAUh;)P`jU&RY66uycV)+&4zFKkr!Dqh&E@KwCvhzFH3A>r|J3E&*Kvx&(9yoCpathZodwn#%H#Hl7h*#S5P( zd=)QzuJBd7@U_CHd?^1T3SY$wKPh|_FR)lyVoF!V3sDMR9jD_IzKR#(6~2lWIx2j^ z1604x8lU{cr}9#`PIU?B63``}OF);viIhM~@j^Fcyi@T)cZILw1*5`O@j@Skua1-b z6uycV5){6Q7t9J@#S5tlU&RY)3SY$w=PP^_FJvfu!UNRiqclGGiLc6m_&U`kpi4lP zfGz=D0w+QOEyWAj%6O;Zg&c*i;)RI{U&RZP6}~!7PEq(OUYM%zRlG1w;j4JTuJBd7 zP^j=#yila@RlHE5@Cgr4Tl+LV`H8Q}f%rPrC7??{mw+w-T>>XU0?pwCb)2NK%+|&; z;;VR}OyR3|p;FJrc;pi4lPfG&X(Apvv3Ib9F_ZJTA=JLh=*hIByn=y^Ml zJ~BM}pIeX~T=Da++mL#5e(AFe>1}sz?Q|p3d6BWZkG#Ik@{8Skrwi%a!Xe*$^cwni zP1}7X(z&k`bbs^JZI+w!et6#W=d^U+yfqIS`lY2mInG{sXaBVHRWsu3*9=HY&wl!m zk(R`?^xprvJh@X+TKb-}b{+Sc)6y+B{InuwU|Rb4m8oOS8{^l0P#}A#CmhMPQHtfP*qAA%>i@$I1cWF|xVH^H>nUf6*@OKD*l-nd~1H{kiW!QR;D^%>$N@B^7JF?rgVKK)^f!s?LL0*`B=*Wcji@Zz8q^g>yG;( zH@q5aIpvYqF|jvCSk}fLeDuMU5tcW*F5fbERfJ_mvG45nR!3Ogo(wzY-Wp+X-`&e> zx-G);UhU_vyu2;~#dGAKnB5&RR74{Qg0Ub=x(2*PI))l(+42Zr_BUC6 zeg6h?&~jkM+-IIh4qDnDT=Dkql%VC)4QHIae_)WOJN$5J(DK8g2g|-16tsNN1NZdA zGqDr!Hx7SJ{8i#_1OA@H-~0HBz$1Zs;V&J3Ep_JAhzM_wW*Hl!S;zaL+2#kMS=nEr kneoYJ_W90ew&TTUHf>KdGw+LLd)^}S`1zAEpVp`U2Z;hS=l}o! literal 0 HcmV?d00001 From ea910801d3a84b416ea9027345722e1860e9cd73 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 15:27:38 +0100 Subject: [PATCH 49/60] add netCDF real file test --- tests/s3_exploratory/test_s3_reduction.py | 41 ++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index 8d6d047e..c0d1fcd2 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -47,7 +47,8 @@ def upload_to_s3(server, username, password, bucket, object, rfile): def test_Active(): """ - Shows what we expect an active example test to achieve and provides "the right answer" + Shows what we expect an active example test to achieve and provides "the right answer" + Done twice: POSIX active and S3 active; we compare results. """ # make dummy data s3_testfile, local_testfile = make_tempfile() @@ -77,6 +78,44 @@ def test_Active(): assert_array_equal(result1, result2["sum"]/result2["n"]) +@pytest.fixture +def test_data_path(): + """Path to test data for CMOR fixes.""" + return Path(__file__).resolve().parent / 'test_data' + + +def test_with_valid_netCDF_file(test_data_path): + """ + Test as above but with an actual netCDF4 file. + Also, this has _FillValue and missing_value + """ + ncfile = str(test_data_path / "cesm2_native.nc") + + # run POSIX (local) Active + active = Active(ncfile, "TREFHT") + active._version = 2 + active.method = "mean" + active.components = True + result2 = active[0:2, 4:6, 7:9] + print(result2) + + # put data onto S3. then rm from local + object = os.path.basename(ncfile) + bucket_file = upload_to_s3(S3_URL, S3_ACCESS_KEY, S3_SECRET_KEY, + S3_BUCKET, object, ncfile) + os.remove(ncfile) + s3_testfile_uri = os.path.join("s3://", bucket_file) + print("S3 file uri", s3_testfile_uri) + + # run Active on s3 file + active = Active(s3_testfile_uri, "data", "s3") + active.method = "mean" + result1 = active[0:2, 4:6, 7:9] + print(result1) + + assert_array_equal(result1, result2["sum"]/result2["n"]) + + def test_s3_reduce_chunk(): """Unit test for s3_reduce_chunk.""" rfile = "tests/test_data/cesm2_native.nc" From d1fa7fbeaa1ef89009d69024e97aeab2f42c8e1c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 15:43:51 +0100 Subject: [PATCH 50/60] missing import --- tests/s3_exploratory/test_s3_reduction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index c0d1fcd2..f378e369 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -9,6 +9,7 @@ import activestorage.storage as st from activestorage.s3 import reduce_chunk as s3_reduce_chunk from numpy.testing import assert_array_equal +from pathlib import Path from config_minio import * From 5272d7da7ebad7986bca105e1dffc8101210ad05 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 15:50:40 +0100 Subject: [PATCH 51/60] pass correct var --- tests/s3_exploratory/test_s3_reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index f378e369..ada08eb4 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -109,7 +109,7 @@ def test_with_valid_netCDF_file(test_data_path): print("S3 file uri", s3_testfile_uri) # run Active on s3 file - active = Active(s3_testfile_uri, "data", "s3") + active = Active(s3_testfile_uri, "TREFHT", "s3") active.method = "mean" result1 = active[0:2, 4:6, 7:9] print(result1) From 4c5b311fb9df87724c7037049842ae92d205cd9c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 15:56:59 +0100 Subject: [PATCH 52/60] fail to visualize results --- tests/s3_exploratory/test_s3_reduction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index ada08eb4..c440b643 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -114,6 +114,9 @@ def test_with_valid_netCDF_file(test_data_path): result1 = active[0:2, 4:6, 7:9] print(result1) + print("xxx", result1, result2, result2["sum"], result2["n"]) + print(x) + assert_array_equal(result1, result2["sum"]/result2["n"]) From 5ae2666a24ea7b1855114d2023d70e949bb8d0b6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 16:07:39 +0100 Subject: [PATCH 53/60] fixed test --- tests/s3_exploratory/test_s3_reduction.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index c440b643..cebebced 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -97,7 +97,7 @@ def test_with_valid_netCDF_file(test_data_path): active._version = 2 active.method = "mean" active.components = True - result2 = active[0:2, 4:6, 7:9] + result2 = active[4:5, 1:2] print(result2) # put data onto S3. then rm from local @@ -110,8 +110,10 @@ def test_with_valid_netCDF_file(test_data_path): # run Active on s3 file active = Active(s3_testfile_uri, "TREFHT", "s3") + active._version = 2 active.method = "mean" - result1 = active[0:2, 4:6, 7:9] + active.components = True + result1 = active[4:5, 1:2] print(result1) print("xxx", result1, result2, result2["sum"], result2["n"]) From b0dc57230298fb9a8b4ace652616382cda95293c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 16:16:40 +0100 Subject: [PATCH 54/60] correct version of test --- tests/s3_exploratory/test_s3_reduction.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index cebebced..bee51316 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -89,6 +89,9 @@ def test_with_valid_netCDF_file(test_data_path): """ Test as above but with an actual netCDF4 file. Also, this has _FillValue and missing_value + + identical to tests/test_bigger_data.py::test_cesm2_native + """ ncfile = str(test_data_path / "cesm2_native.nc") @@ -116,10 +119,12 @@ def test_with_valid_netCDF_file(test_data_path): result1 = active[4:5, 1:2] print(result1) - print("xxx", result1, result2, result2["sum"], result2["n"]) - print(x) + # expect {'sum': array([[[2368.3232]]], dtype=float32), 'n': array([[[8]]])} + # check for typing and structure + np.testing.assert_array_equal(result1["sum"], np.array([[[2368.3232]]], dtype="float32")) + np.testing.assert_array_equal(result1["n"], np.array([[[8]]])) - assert_array_equal(result1, result2["sum"]/result2["n"]) + assert_array_equal(result1, result2) def test_s3_reduce_chunk(): From 4ef10cb111d9e5f4fb62079c7d90c554c54bf8c4 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 16:21:48 +0100 Subject: [PATCH 55/60] add to local s3 unit test --- tests/unit/test_s3.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/unit/test_s3.py b/tests/unit/test_s3.py index 3537dc85..9341b9f4 100644 --- a/tests/unit/test_s3.py +++ b/tests/unit/test_s3.py @@ -2,8 +2,11 @@ import numpy as np import pytest import requests +import tempfile from unittest import mock +from activestorage.active import Active +from activestorage.dummy_data import make_vanilla_ncdata from activestorage import s3 @@ -102,3 +105,14 @@ def test_s3_reduce_chunk_not_found(mock_request): assert str(exc.value) == 'S3 Active Storage error: HTTP 404: "Not found"' + + +def test_s3_storage_execution(): + """Test stack when call to Active contains storage_type == s3.""" + temp_folder = tempfile.mkdtemp() + s3_testfile = os.path.join(temp_folder, + 's3_test_bizarre.nc') + if not os.path.exists(s3_testfile): + make_vanilla_ncdata(filename=s3_testfile) + + active = Active(s3_testfile_uri, "data", "s3") From 2fb70fd833ade78f2caad5ad7a922693eabe7815 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 16:24:50 +0100 Subject: [PATCH 56/60] add mention in test --- tests/s3_exploratory/test_s3_reduction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/s3_exploratory/test_s3_reduction.py b/tests/s3_exploratory/test_s3_reduction.py index bee51316..c7ecaa04 100644 --- a/tests/s3_exploratory/test_s3_reduction.py +++ b/tests/s3_exploratory/test_s3_reduction.py @@ -50,6 +50,9 @@ def test_Active(): """ Shows what we expect an active example test to achieve and provides "the right answer" Done twice: POSIX active and S3 active; we compare results. + + identical to tests/test_harness.py::testActive() + """ # make dummy data s3_testfile, local_testfile = make_tempfile() From 273f0348d8c759293a9db63f72281afabad17fac Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 16:25:04 +0100 Subject: [PATCH 57/60] correct file passed eh --- tests/unit/test_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_s3.py b/tests/unit/test_s3.py index 9341b9f4..db705232 100644 --- a/tests/unit/test_s3.py +++ b/tests/unit/test_s3.py @@ -115,4 +115,4 @@ def test_s3_storage_execution(): if not os.path.exists(s3_testfile): make_vanilla_ncdata(filename=s3_testfile) - active = Active(s3_testfile_uri, "data", "s3") + active = Active(s3_testfile, "data", "s3") From 8bf14545827e6b01f5d6d6104fd70c05dd15bd32 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 14 Jun 2023 17:39:29 +0100 Subject: [PATCH 58/60] simple raiser test --- tests/unit/test_s3.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/unit/test_s3.py b/tests/unit/test_s3.py index db705232..666b8128 100644 --- a/tests/unit/test_s3.py +++ b/tests/unit/test_s3.py @@ -1,3 +1,4 @@ +import botocore import os import numpy as np import pytest @@ -5,8 +6,7 @@ import tempfile from unittest import mock -from activestorage.active import Active -from activestorage.dummy_data import make_vanilla_ncdata +from activestorage import active from activestorage import s3 @@ -107,12 +107,10 @@ def test_s3_reduce_chunk_not_found(mock_request): assert str(exc.value) == 'S3 Active Storage error: HTTP 404: "Not found"' -def test_s3_storage_execution(): +def test_s3_load(): """Test stack when call to Active contains storage_type == s3.""" - temp_folder = tempfile.mkdtemp() - s3_testfile = os.path.join(temp_folder, - 's3_test_bizarre.nc') - if not os.path.exists(s3_testfile): - make_vanilla_ncdata(filename=s3_testfile) + active_url = "https://s3.example.com" + s3_testfile = "s3_test_bizarre.nc" - active = Active(s3_testfile, "data", "s3") + with pytest.raises(botocore.exceptions.ParamValidationError): + s3load = active.Active(os.path.join(active_url, s3_testfile), "data", "s3") From 04f29a4d8ddbd76a712e72978843a0bb65220779 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 15 Jun 2023 12:52:05 +0100 Subject: [PATCH 59/60] remove unnecessary test --- tests/unit/test_s3.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/unit/test_s3.py b/tests/unit/test_s3.py index 666b8128..81f7d23a 100644 --- a/tests/unit/test_s3.py +++ b/tests/unit/test_s3.py @@ -1,12 +1,9 @@ -import botocore import os import numpy as np import pytest import requests -import tempfile from unittest import mock -from activestorage import active from activestorage import s3 @@ -54,7 +51,7 @@ def test_s3_reduce_chunk(mock_request): assert tmp == result # count is None; no missing data yet in S3 - assert count == None + assert count is None expected_url = f"{active_url}/v1/{operation}/" expected_data = { @@ -105,12 +102,3 @@ def test_s3_reduce_chunk_not_found(mock_request): assert str(exc.value) == 'S3 Active Storage error: HTTP 404: "Not found"' - - -def test_s3_load(): - """Test stack when call to Active contains storage_type == s3.""" - active_url = "https://s3.example.com" - s3_testfile = "s3_test_bizarre.nc" - - with pytest.raises(botocore.exceptions.ParamValidationError): - s3load = active.Active(os.path.join(active_url, s3_testfile), "data", "s3") From 5c7f86b116a1c6a8ad6f035b24741948147b4bb5 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 15 Jun 2023 12:52:36 +0100 Subject: [PATCH 60/60] start a unit test for storage types --- tests/unit/test_storage_types.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/unit/test_storage_types.py diff --git a/tests/unit/test_storage_types.py b/tests/unit/test_storage_types.py new file mode 100644 index 00000000..d265d059 --- /dev/null +++ b/tests/unit/test_storage_types.py @@ -0,0 +1,15 @@ +import botocore +import os +import numpy as np +import pytest + +from activestorage.active import Active + + +def test_s3_active(): + """Test stack when call to Active contains storage_type == s3.""" + active_url = "https://s3.example.com" + s3_testfile = "s3_test_bizarre.nc" + + with pytest.raises(botocore.exceptions.ParamValidationError): + Active(os.path.join(active_url, s3_testfile), "data", "s3")