diff --git a/tests/amodes/test_sentinel2.py b/tests/amodes/test_sentinel2.py index bb1f34e..278307b 100644 --- a/tests/amodes/test_sentinel2.py +++ b/tests/amodes/test_sentinel2.py @@ -4,9 +4,11 @@ from unittest import TestCase +import fsspec import numpy as np import pytest import xarray as xr +import zarr.storage from tests.helpers import make_s2_msi_l1c, make_s2_msi_l2a from xarray_eopf.amode import AnalysisModeRegistry @@ -100,9 +102,22 @@ def assert_convert_datatree_fail(self, original_dt: xr.DataTree): class MSIL1CTest(MSITestMixin, TestCase): mode = MSIL1C() - def test_is_valid_source(self): - self.assertTrue(self.mode.is_valid_source("S2A_MSIL1C_20240201.zarr")) - self.assertFalse(self.mode.is_valid_source("S2A_MSIL2A_20240201.zarr")) + def test_is_valid_source_ok(self): + self.assertTrue(self.mode.is_valid_source("data/S2A_MSIL1C_20240201.zarr")) + self.assertTrue( + self.mode.is_valid_source( + zarr.storage.DirectoryStore("data/S2A_MSIL1C_20240201.zarr") + ) + ) + fs: fsspec.AbstractFileSystem = fsspec.filesystem("local") + self.assertTrue( + self.mode.is_valid_source( + fs.get_mapper(root="data/S2A_MSIL1C_20240201.zarr") + ) + ) + + def test_is_no_valid_source(self): + self.assertFalse(self.mode.is_valid_source("data/S2A_MSIL2A_20240201.zarr")) self.assertFalse(self.mode.is_valid_source(dict())) def test_transform_datatree(self): diff --git a/tests/test_amode.py b/tests/test_amode.py index af2557a..9e49ec4 100644 --- a/tests/test_amode.py +++ b/tests/test_amode.py @@ -1,6 +1,11 @@ # Copyright (c) 2025 by EOPF Sample Service team and contributors # Permissions are hereby granted under the terms of the Apache 2.0 License: # https://opensource.org/license/apache-2-0. +import fsspec +import zarr.storage +from pathlib import Path + +import pytest from typing import Any, Iterable from unittest import TestCase @@ -14,7 +19,7 @@ class TestMode(AnalysisMode): product_type = "TEST" def is_valid_source(self, source: Any) -> bool: - return source == "TEST" + return isinstance(source, str) and "TEST" in source def get_applicable_params(self, **kwargs) -> dict[str, any]: return {} @@ -39,14 +44,56 @@ def setUp(self): def tearDown(self): AnalysisMode.registry.unregister(TestMode) + def test_guess_ok(self): + self.assertIsInstance(AnalysisMode.guess("TEST.zarr"), TestMode) + self.assertIsInstance(AnalysisMode.guess({}, product_type="TEST"), TestMode) + + # noinspection PyMethodMayBeStatic + def test_guess_fail(self): + with pytest.raises( + ValueError, match="Unable to detect analysis mode for input" + ): + AnalysisMode.guess("REST.zarr") + + with pytest.raises( + ValueError, match="Unable to detect analysis mode for input" + ): + AnalysisMode.guess({}, product_type="REST") + + with pytest.raises( + ValueError, match="Unable to detect analysis mode for input" + ): + AnalysisMode.guess("TEST.zarr", product_type="REST"), TestMode + def test_from_source(self): - self.assertIsInstance(AnalysisMode.from_source("TEST"), TestMode) - self.assertIsNone(AnalysisMode.from_source("REST")) + self.assertIsInstance(AnalysisMode.from_source("TEST.zarr"), TestMode) + self.assertIsNone(AnalysisMode.from_source("REST.zarr")) + self.assertIsNone(AnalysisMode.from_source({})) def test_from_product_type(self): self.assertIsInstance(AnalysisMode.from_product_type("TEST"), TestMode) self.assertIsNone(AnalysisMode.from_product_type("REST")) + def test_source_to_path(self): + # From str + self.assertEqual("test1.zarr", AnalysisMode._source_to_path("test1.zarr")) + + # From pathlib.Path + self.assertEqual("test2.zarr", AnalysisMode._source_to_path(Path("test2.zarr"))) + + # From fsspec.FSMap + path = AnalysisMode._source_to_path( + fsspec.filesystem("local").get_mapper("test3.zarr") + ) + self.assertEqual("test3.zarr", Path(path).name) + + # From zarr.storage.DirectoryStore + path = AnalysisMode._source_to_path(zarr.storage.DirectoryStore("test4.zarr")) + self.assertEqual("test4.zarr", Path(path).name) + + # From dict + self.assertEqual(None, AnalysisMode._source_to_path({"path": "test5.zarr"})) + class AnalysisModeRegistryTest(TestCase): # noinspection PyMethodMayBeStatic diff --git a/tests/test_backend.py b/tests/test_backend.py index 7a887fa..05b9869 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -1,7 +1,7 @@ # Copyright (c) 2025 by EOPF Sample Service team and contributors # Permissions are hereby granted under the terms of the Apache 2.0 License: # https://opensource.org/license/apache-2-0. - +from typing import Any from unittest import TestCase import fsspec @@ -78,6 +78,21 @@ def setUpClass(cls): def test_open_dataset_ok(self): # noinspection PyTypeChecker dataset = xr.open_dataset(self.path, engine="eopf-zarr", op_mode="analysis") + self.assert_dataset_ok(dataset) + + fs: fsspec.AbstractFileSystem = fsspec.filesystem("memory") + store = fs.get_mapper(root=self.path) + # noinspection PyTypeChecker + dataset = xr.open_dataset(store, engine="eopf-zarr", op_mode="analysis") + self.assert_dataset_ok(dataset) + + # noinspection PyMethodMayBeStatic + def test_open_dataset_fail(self): + with pytest.raises(FileNotFoundError): + # noinspection PyTypeChecker + xr.open_dataset("test.zarr", engine="eopf-zarr", op_mode="analysis") + + def assert_dataset_ok(self, dataset: Any): self.assertIsInstance(dataset, xr.Dataset) # Note, more detailed analysis is done in `tests/amodes` self.assertEqual( @@ -101,30 +116,14 @@ def test_open_dataset_ok(self): ) self.assertEqual(["spatial_ref", "x", "y"], sorted(dataset.coords.keys())) - # noinspection PyMethodMayBeStatic - def test_open_dataset_fail(self): - fs: fsspec.AbstractFileSystem = fsspec.filesystem("memory") - store = fs.get_mapper(root=self.path) - with pytest.raises( - ValueError, match="Unable to detect analysis mode for input" - ): - # noinspection PyTypeChecker - _dataset = xr.open_dataset(store, engine="eopf-zarr", op_mode="analysis") - # noinspection PyMethodMayBeStatic def test_open_datatree_ok(self): with pytest.raises(NotImplementedError): # noinspection PyTypeChecker - _data_tree = xr.open_datatree( - self.path, engine="eopf-zarr", op_mode="analysis" - ) + _dt = xr.open_datatree(self.path, engine="eopf-zarr", op_mode="analysis") - # noinspection PyMethodMayBeStatic - def test_open_datatree_fail(self): fs: fsspec.AbstractFileSystem = fsspec.filesystem("memory") store = fs.get_mapper(root=self.path) - with pytest.raises( - ValueError, match="Unable to detect analysis mode for input" - ): + with pytest.raises(NotImplementedError): # noinspection PyTypeChecker - _data_tree = xr.open_datatree(store, engine="eopf-zarr", op_mode="analysis") + _dt = xr.open_datatree(store, engine="eopf-zarr", op_mode="analysis") diff --git a/xarray_eopf/amode.py b/xarray_eopf/amode.py index 20d7761..75cb38e 100644 --- a/xarray_eopf/amode.py +++ b/xarray_eopf/amode.py @@ -2,6 +2,8 @@ # Permissions are hereby granted under the terms of the Apache 2.0 License: # https://opensource.org/license/apache-2-0. +from pathlib import Path + from abc import ABC, abstractmethod from collections.abc import Iterable from typing import Any, Optional, Type @@ -20,6 +22,36 @@ class AnalysisMode(ABC): # Product type name, e.g., "MSIL2A" product_type: str + @classmethod + def guess( + cls, source: Any, product_type: str | None = None + ) -> Optional["AnalysisMode"]: + """Guess the suitable analysis mode for the backend xarray input. + + Args: + source: A path or URL or dict-like mapping that acts as a + Zarr store. + product_type: If provided, it must be a valid product type name + for which an analysis mode has been registered. + + Returns: + The analysis mode. + + Raises: + ValueError: if guessing the analysis mode failed. + """ + if product_type: + analysis_mode = AnalysisMode.from_product_type(product_type) + else: + analysis_mode = AnalysisMode.from_source(source) + if analysis_mode is None: + raise ValueError( + "Unable to detect analysis mode for input." + " Use product_type argument to pass one of" + f" {', '.join(map(repr, cls.registry.keys()))}." + ) + return analysis_mode + @classmethod def from_product_type(cls, product_type: str | None) -> Optional["AnalysisMode"]: """Get the analysis mode for given `product_type`.""" @@ -89,6 +121,24 @@ def convert_datatree( A transformed data tree. """ + @classmethod + def _source_to_path(cls, source: Any) -> Optional[str]: + """Derive a path from given `source` object. + This is an implementation helper that may be used by + derived classes in `is_valid_source()`. + """ + path: str | None = None + if isinstance(source, (str, Path)): + path = source + elif hasattr(source, "path"): + path = source.path + elif hasattr(source, "root"): + path = source.root + if isinstance(path, (str, Path)): + return str(path) + else: + return None + class AnalysisModeRegistry: """A simple registry for `AnalysisMode` instances.""" diff --git a/xarray_eopf/amodes/sentinel2.py b/xarray_eopf/amodes/sentinel2.py index 87239b5..38459f1 100644 --- a/xarray_eopf/amodes/sentinel2.py +++ b/xarray_eopf/amodes/sentinel2.py @@ -1,6 +1,7 @@ # Copyright (c) 2025 by EOPF Sample Service team and contributors # Permissions are hereby granted under the terms of the Apache 2.0 License: # https://opensource.org/license/apache-2-0. +from pathlib import Path from abc import ABC from collections.abc import Iterable @@ -67,11 +68,11 @@ class MSI(AnalysisMode, ABC): def is_valid_source(self, source: Any) -> bool: - if not isinstance(source, str): - return False - path: str = source + path = self._source_to_path(source) return ( - f"S2A_{self.product_type}_" in path or f"S2B_{self.product_type}_" in path + (f"S2A_{self.product_type}_" in path or f"S2B_{self.product_type}_" in path) + if path + else False ) def get_applicable_params(self, **kwargs) -> dict[str, any]: diff --git a/xarray_eopf/backend.py b/xarray_eopf/backend.py index 7c271ad..c5672ba 100644 --- a/xarray_eopf/backend.py +++ b/xarray_eopf/backend.py @@ -51,15 +51,16 @@ def open_datatree( [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html). Args: - filename_or_obj: File path, or URL, or path-like string. + filename_or_obj: File path, or URL, a path-like string, or + a Zarr store, or other key to object mapping. op_mode: Mode of operation, either "analysis" or "native". Defaults to "analysis". - product_type: Product type name, such as `"S2B_MSIL1C"`. - Only used if `op_mode="analysis"` and - only required if `filename_or_obj` is not a path or URL - that refers to a product path adhering to EOPF naming conventions. + product_type: Optional product type name, such as `"MSIL1C"`. + Only used if `op_mode="analysis"`; typically not required + if the filename inherent to `filename_or_obj` + adheres to EOPF naming conventions. protocol: If `filename_or_obj` is a file path or URL, - forces using the filesystem protocol. + it forces using the specified filesystem protocol. Otherwise, the protocol will be derived from the file path or URL. Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html). storage_options: If `filename_or_obj` is a file path or URL, @@ -95,7 +96,9 @@ def open_datatree( if op_mode == OP_MODE_NATIVE: return datatree else: # op_mode == OP_MODE_ANALYSIS - analysis_mode = _guess_analysis_mode(filename_or_obj, product_type) + analysis_mode = AnalysisMode.guess( + filename_or_obj, product_type=product_type + ) return analysis_mode.transform_datatree(datatree) def open_dataset( @@ -126,12 +129,12 @@ def open_dataset( filename_or_obj: File path, or URL, or path-like string. op_mode: Mode of operation, either "analysis" or "native". Defaults to "analysis". - product_type: Product type name, such as `"S2B_MSIL1C"`. - Only used if `op_mode="analysis"` and - only required if `filename_or_obj` is not a path or URL - that refers to a product path adhering to EOPF naming conventions. + product_type: Optional product type name, such as `"MSIL1C"`. + Only used if `op_mode="analysis"`; typically not required + if the filename inherent to `filename_or_obj` + adheres to EOPF naming conventions. protocol: If `filename_or_obj` is a file path or URL, - forces using the filesystem protocol. + it forces using the specified filesystem protocol. Otherwise, the protocol will be derived from the file path or URL. Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html). storage_options: If `filename_or_obj` is a file path or URL, @@ -178,7 +181,9 @@ def open_dataset( dataset = flatten_datatree(datatree, sep=group_sep) dataset = filter_dataset(dataset, variables) else: # op_mode == OP_MODE_ANALYSIS - analysis_mode = _guess_analysis_mode(filename_or_obj, product_type) + analysis_mode = AnalysisMode.guess( + filename_or_obj, product_type=product_type + ) params = analysis_mode.get_applicable_params( resolution=resolution, spline_order=spline_order ) @@ -207,19 +212,6 @@ def guess_can_open( return False -def _guess_analysis_mode( - filename_or_obj: Any, product_type: str | None -) -> AnalysisMode: - analysis_mode: AnalysisMode | None = None - if product_type: - analysis_mode = AnalysisMode.from_product_type(product_type) - if analysis_mode is None: - analysis_mode = AnalysisMode.from_source(filename_or_obj) - if analysis_mode is None: - raise ValueError("Unable to detect analysis mode for input") - return analysis_mode - - def _assert_datatree_is_chunked(datatree: xr.DataTree): for ds_name, ds in flatten_datatree_as_dict(datatree).items(): _assert_dataset_is_chunked(ds, name=ds_name)