Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions tests/amodes/test_sentinel2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

from unittest import TestCase

import fsspec
import numpy as np
import pytest
import xarray as xr
import zarr.storage

from tests.helpers import make_s2_msi_l1c, make_s2_msi_l2a
from xarray_eopf.amode import AnalysisModeRegistry
Expand Down Expand Up @@ -100,9 +102,22 @@ def assert_convert_datatree_fail(self, original_dt: xr.DataTree):
class MSIL1CTest(MSITestMixin, TestCase):
mode = MSIL1C()

def test_is_valid_source(self):
self.assertTrue(self.mode.is_valid_source("S2A_MSIL1C_20240201.zarr"))
self.assertFalse(self.mode.is_valid_source("S2A_MSIL2A_20240201.zarr"))
def test_is_valid_source_ok(self):
self.assertTrue(self.mode.is_valid_source("data/S2A_MSIL1C_20240201.zarr"))
self.assertTrue(
self.mode.is_valid_source(
zarr.storage.DirectoryStore("data/S2A_MSIL1C_20240201.zarr")
)
)
fs: fsspec.AbstractFileSystem = fsspec.filesystem("local")
self.assertTrue(
self.mode.is_valid_source(
fs.get_mapper(root="data/S2A_MSIL1C_20240201.zarr")
)
)

def test_is_no_valid_source(self):
self.assertFalse(self.mode.is_valid_source("data/S2A_MSIL2A_20240201.zarr"))
self.assertFalse(self.mode.is_valid_source(dict()))

def test_transform_datatree(self):
Expand Down
53 changes: 50 additions & 3 deletions tests/test_amode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Copyright (c) 2025 by EOPF Sample Service team and contributors
# Permissions are hereby granted under the terms of the Apache 2.0 License:
# https://opensource.org/license/apache-2-0.
import fsspec
import zarr.storage
from pathlib import Path

import pytest
from typing import Any, Iterable
from unittest import TestCase

Expand All @@ -14,7 +19,7 @@ class TestMode(AnalysisMode):
product_type = "TEST"

def is_valid_source(self, source: Any) -> bool:
return source == "TEST"
return isinstance(source, str) and "TEST" in source

def get_applicable_params(self, **kwargs) -> dict[str, any]:
return {}
Expand All @@ -39,14 +44,56 @@ def setUp(self):
def tearDown(self):
AnalysisMode.registry.unregister(TestMode)

def test_guess_ok(self):
self.assertIsInstance(AnalysisMode.guess("TEST.zarr"), TestMode)
self.assertIsInstance(AnalysisMode.guess({}, product_type="TEST"), TestMode)

# noinspection PyMethodMayBeStatic
def test_guess_fail(self):
with pytest.raises(
ValueError, match="Unable to detect analysis mode for input"
):
AnalysisMode.guess("REST.zarr")

with pytest.raises(
ValueError, match="Unable to detect analysis mode for input"
):
AnalysisMode.guess({}, product_type="REST")

with pytest.raises(
ValueError, match="Unable to detect analysis mode for input"
):
AnalysisMode.guess("TEST.zarr", product_type="REST"), TestMode

def test_from_source(self):
self.assertIsInstance(AnalysisMode.from_source("TEST"), TestMode)
self.assertIsNone(AnalysisMode.from_source("REST"))
self.assertIsInstance(AnalysisMode.from_source("TEST.zarr"), TestMode)
self.assertIsNone(AnalysisMode.from_source("REST.zarr"))
self.assertIsNone(AnalysisMode.from_source({}))

def test_from_product_type(self):
self.assertIsInstance(AnalysisMode.from_product_type("TEST"), TestMode)
self.assertIsNone(AnalysisMode.from_product_type("REST"))

def test_source_to_path(self):
# From str
self.assertEqual("test1.zarr", AnalysisMode._source_to_path("test1.zarr"))

# From pathlib.Path
self.assertEqual("test2.zarr", AnalysisMode._source_to_path(Path("test2.zarr")))

# From fsspec.FSMap
path = AnalysisMode._source_to_path(
fsspec.filesystem("local").get_mapper("test3.zarr")
)
self.assertEqual("test3.zarr", Path(path).name)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.assertEqual("test3.zarr", Path(path).name)
self.assertEqual("test3.zarr", path)

Or am I missing something?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine! Copy/paste.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, it was correct! Path(path).name gets the filename of an absolute path.


# From zarr.storage.DirectoryStore
path = AnalysisMode._source_to_path(zarr.storage.DirectoryStore("test4.zarr"))
self.assertEqual("test4.zarr", Path(path).name)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.assertEqual("test4.zarr", Path(path).name)
self.assertEqual("test4.zarr", path)

See comment above

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, it is correct.


# From dict
self.assertEqual(None, AnalysisMode._source_to_path({"path": "test5.zarr"}))


class AnalysisModeRegistryTest(TestCase):
# noinspection PyMethodMayBeStatic
Expand Down
39 changes: 19 additions & 20 deletions tests/test_backend.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2025 by EOPF Sample Service team and contributors
# Permissions are hereby granted under the terms of the Apache 2.0 License:
# https://opensource.org/license/apache-2-0.

from typing import Any
from unittest import TestCase

import fsspec
Expand Down Expand Up @@ -78,6 +78,21 @@ def setUpClass(cls):
def test_open_dataset_ok(self):
# noinspection PyTypeChecker
dataset = xr.open_dataset(self.path, engine="eopf-zarr", op_mode="analysis")
self.assert_dataset_ok(dataset)

fs: fsspec.AbstractFileSystem = fsspec.filesystem("memory")
store = fs.get_mapper(root=self.path)
# noinspection PyTypeChecker
dataset = xr.open_dataset(store, engine="eopf-zarr", op_mode="analysis")
self.assert_dataset_ok(dataset)

# noinspection PyMethodMayBeStatic
def test_open_dataset_fail(self):
with pytest.raises(FileNotFoundError):
# noinspection PyTypeChecker
xr.open_dataset("test.zarr", engine="eopf-zarr", op_mode="analysis")

def assert_dataset_ok(self, dataset: Any):
self.assertIsInstance(dataset, xr.Dataset)
# Note, more detailed analysis is done in `tests/amodes`
self.assertEqual(
Expand All @@ -101,30 +116,14 @@ def test_open_dataset_ok(self):
)
self.assertEqual(["spatial_ref", "x", "y"], sorted(dataset.coords.keys()))

# noinspection PyMethodMayBeStatic
def test_open_dataset_fail(self):
fs: fsspec.AbstractFileSystem = fsspec.filesystem("memory")
store = fs.get_mapper(root=self.path)
with pytest.raises(
ValueError, match="Unable to detect analysis mode for input"
):
# noinspection PyTypeChecker
_dataset = xr.open_dataset(store, engine="eopf-zarr", op_mode="analysis")

# noinspection PyMethodMayBeStatic
def test_open_datatree_ok(self):
with pytest.raises(NotImplementedError):
# noinspection PyTypeChecker
_data_tree = xr.open_datatree(
self.path, engine="eopf-zarr", op_mode="analysis"
)
_dt = xr.open_datatree(self.path, engine="eopf-zarr", op_mode="analysis")

# noinspection PyMethodMayBeStatic
def test_open_datatree_fail(self):
fs: fsspec.AbstractFileSystem = fsspec.filesystem("memory")
store = fs.get_mapper(root=self.path)
with pytest.raises(
ValueError, match="Unable to detect analysis mode for input"
):
with pytest.raises(NotImplementedError):
# noinspection PyTypeChecker
_data_tree = xr.open_datatree(store, engine="eopf-zarr", op_mode="analysis")
_dt = xr.open_datatree(store, engine="eopf-zarr", op_mode="analysis")
50 changes: 50 additions & 0 deletions xarray_eopf/amode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# Permissions are hereby granted under the terms of the Apache 2.0 License:
# https://opensource.org/license/apache-2-0.

from pathlib import Path

from abc import ABC, abstractmethod
from collections.abc import Iterable
from typing import Any, Optional, Type
Expand All @@ -20,6 +22,36 @@ class AnalysisMode(ABC):
# Product type name, e.g., "MSIL2A"
product_type: str

@classmethod
def guess(
cls, source: Any, product_type: str | None = None
) -> Optional["AnalysisMode"]:
"""Guess the suitable analysis mode for the backend xarray input.

Args:
source: A path or URL or dict-like mapping that acts as a
Zarr store.
product_type: If provided, it must be a valid product type name
for which an analysis mode has been registered.

Returns:
The analysis mode.

Raises:
ValueError: if guessing the analysis mode failed.
"""
if product_type:
analysis_mode = AnalysisMode.from_product_type(product_type)
else:
analysis_mode = AnalysisMode.from_source(source)
if analysis_mode is None:
raise ValueError(
"Unable to detect analysis mode for input."
" Use product_type argument to pass one of"
f" {', '.join(map(repr, cls.registry.keys()))}."
)
return analysis_mode

@classmethod
def from_product_type(cls, product_type: str | None) -> Optional["AnalysisMode"]:
"""Get the analysis mode for given `product_type`."""
Expand Down Expand Up @@ -89,6 +121,24 @@ def convert_datatree(
A transformed data tree.
"""

@classmethod
def _source_to_path(cls, source: Any) -> Optional[str]:
"""Derive a path from given `source` object.
This is an implementation helper that may be used by
derived classes in `is_valid_source()`.
"""
path: str | None = None
if isinstance(source, (str, Path)):
path = source
elif hasattr(source, "path"):
path = source.path
elif hasattr(source, "root"):
path = source.root
if isinstance(path, (str, Path)):
return str(path)
else:
return None


class AnalysisModeRegistry:
"""A simple registry for `AnalysisMode` instances."""
Expand Down
9 changes: 5 additions & 4 deletions xarray_eopf/amodes/sentinel2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2025 by EOPF Sample Service team and contributors
# Permissions are hereby granted under the terms of the Apache 2.0 License:
# https://opensource.org/license/apache-2-0.
from pathlib import Path

from abc import ABC
from collections.abc import Iterable
Expand Down Expand Up @@ -67,11 +68,11 @@

class MSI(AnalysisMode, ABC):
def is_valid_source(self, source: Any) -> bool:
if not isinstance(source, str):
return False
path: str = source
path = self._source_to_path(source)
return (
f"S2A_{self.product_type}_" in path or f"S2B_{self.product_type}_" in path
(f"S2A_{self.product_type}_" in path or f"S2B_{self.product_type}_" in path)
if path
else False
)

def get_applicable_params(self, **kwargs) -> dict[str, any]:
Expand Down
44 changes: 18 additions & 26 deletions xarray_eopf/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,16 @@ def open_datatree(
[`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

Args:
filename_or_obj: File path, or URL, or path-like string.
filename_or_obj: File path, or URL, a path-like string, or
a Zarr store, or other key to object mapping.
op_mode: Mode of operation, either "analysis" or "native".
Defaults to "analysis".
product_type: Product type name, such as `"S2B_MSIL1C"`.
Only used if `op_mode="analysis"` and
only required if `filename_or_obj` is not a path or URL
that refers to a product path adhering to EOPF naming conventions.
product_type: Optional product type name, such as `"MSIL1C"`.
Only used if `op_mode="analysis"`; typically not required
if the filename inherent to `filename_or_obj`
adheres to EOPF naming conventions.
protocol: If `filename_or_obj` is a file path or URL,
forces using the filesystem protocol.
it forces using the specified filesystem protocol.
Otherwise, the protocol will be derived from the file path or URL.
Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
storage_options: If `filename_or_obj` is a file path or URL,
Expand Down Expand Up @@ -95,7 +96,9 @@ def open_datatree(
if op_mode == OP_MODE_NATIVE:
return datatree
else: # op_mode == OP_MODE_ANALYSIS
analysis_mode = _guess_analysis_mode(filename_or_obj, product_type)
analysis_mode = AnalysisMode.guess(
filename_or_obj, product_type=product_type
)
return analysis_mode.transform_datatree(datatree)

def open_dataset(
Expand Down Expand Up @@ -126,12 +129,12 @@ def open_dataset(
filename_or_obj: File path, or URL, or path-like string.
op_mode: Mode of operation, either "analysis" or "native".
Defaults to "analysis".
product_type: Product type name, such as `"S2B_MSIL1C"`.
Only used if `op_mode="analysis"` and
only required if `filename_or_obj` is not a path or URL
that refers to a product path adhering to EOPF naming conventions.
product_type: Optional product type name, such as `"MSIL1C"`.
Only used if `op_mode="analysis"`; typically not required
if the filename inherent to `filename_or_obj`
adheres to EOPF naming conventions.
protocol: If `filename_or_obj` is a file path or URL,
forces using the filesystem protocol.
it forces using the specified filesystem protocol.
Otherwise, the protocol will be derived from the file path or URL.
Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
storage_options: If `filename_or_obj` is a file path or URL,
Expand Down Expand Up @@ -178,7 +181,9 @@ def open_dataset(
dataset = flatten_datatree(datatree, sep=group_sep)
dataset = filter_dataset(dataset, variables)
else: # op_mode == OP_MODE_ANALYSIS
analysis_mode = _guess_analysis_mode(filename_or_obj, product_type)
analysis_mode = AnalysisMode.guess(
filename_or_obj, product_type=product_type
)
params = analysis_mode.get_applicable_params(
resolution=resolution, spline_order=spline_order
)
Expand Down Expand Up @@ -207,19 +212,6 @@ def guess_can_open(
return False


def _guess_analysis_mode(
filename_or_obj: Any, product_type: str | None
) -> AnalysisMode:
analysis_mode: AnalysisMode | None = None
if product_type:
analysis_mode = AnalysisMode.from_product_type(product_type)
if analysis_mode is None:
analysis_mode = AnalysisMode.from_source(filename_or_obj)
if analysis_mode is None:
raise ValueError("Unable to detect analysis mode for input")
return analysis_mode


def _assert_datatree_is_chunked(datatree: xr.DataTree):
for ds_name, ds in flatten_datatree_as_dict(datatree).items():
_assert_dataset_is_chunked(ds, name=ds_name)
Expand Down