diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9320babba..9d80add18 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,6 @@ jobs: matrix: include: - { python: "3.13", os: "ubuntu-latest", session: "pre-commit" } - # - { python: "3.13", os: "ubuntu-latest", session: "safety" } # - { python: "3.13", os: "ubuntu-latest", session: "mypy" } # - { python: "3.12", os: "ubuntu-latest", session: "mypy" } # - { python: "3.11", os: "ubuntu-latest", session: "mypy" } diff --git a/docs/api_reference.md b/docs/api_reference.md index 0ceba4f56..f819ea3eb 100644 --- a/docs/api_reference.md +++ b/docs/api_reference.md @@ -1,12 +1,5 @@ # API Reference -## Readers / Writers - -```{eval-rst} -.. automodule:: mdio.api.accessor - :members: -``` - ## Data Converters ### Seismic Data @@ -33,21 +26,10 @@ and ```{eval-rst} .. automodule:: mdio.converters.segy :members: - :exclude-members: grid_density_qc, parse_index_types, get_compressor + :exclude-members: grid_density_qc, parse_index_types, get_compressor, populate_dim_coordinates, populate_non_dim_coordinates .. automodule:: mdio.converters.mdio :members: - -.. automodule:: mdio.converters.numpy - :members: -``` - -## Convenience Functions - -```{eval-rst} -.. automodule:: mdio.api.convenience - :members: - :exclude-members: create_rechunk_plan, write_rechunked_values ``` ## Core Functionality @@ -58,17 +40,3 @@ and .. automodule:: mdio.core.dimension :members: ``` - -### Creation - -```{eval-rst} -.. automodule:: mdio.core.factory - :members: -``` - -### Data I/O - -```{eval-rst} -.. automodule:: mdio.core.serialization - :members: -``` diff --git a/docs/data_models/chunk_grids.md b/docs/data_models/chunk_grids.md index 5f178a04b..c9b47a724 100644 --- a/docs/data_models/chunk_grids.md +++ b/docs/data_models/chunk_grids.md @@ -2,7 +2,7 @@ :tocdepth: 3 ``` -```{currentModule} mdio.schemas.chunk_grid +```{currentModule} mdio.builder.schemas.chunk_grid ``` diff --git a/docs/data_models/compressors.md b/docs/data_models/compressors.md index 17a1afd87..eeee98841 100644 --- a/docs/data_models/compressors.md +++ b/docs/data_models/compressors.md @@ -2,7 +2,7 @@ :tocdepth: 3 ``` -```{currentModule} mdio.schemas.compressors +```{currentModule} mdio.builder.schemas.compressors ``` @@ -65,20 +65,6 @@ For more details about compression modes, see [ZFP Documentation]. ```{eval-rst} .. autopydantic_model:: Blosc - ----------- - -.. autoclass:: BloscAlgorithm() - :members: - :undoc-members: - :member-order: bysource - ----------- - -.. autoclass:: BloscShuffle() - :members: - :undoc-members: - :member-order: bysource ``` ::: diff --git a/docs/data_models/data_types.md b/docs/data_models/data_types.md index b58ff5357..eeed5b80a 100644 --- a/docs/data_models/data_types.md +++ b/docs/data_models/data_types.md @@ -2,7 +2,7 @@ :tocdepth: 3 ``` -```{currentModule} mdio.schemas.dtype +```{currentModule} mdio.builder.schemas.dtype ``` diff --git a/docs/data_models/dimensions.md b/docs/data_models/dimensions.md index c04b9206b..7f4a443f7 100644 --- a/docs/data_models/dimensions.md +++ b/docs/data_models/dimensions.md @@ -2,7 +2,7 @@ :tocdepth: 3 ``` -```{currentModule} mdio.schemas.dimension +```{currentModule} mdio.builder.schemas.dimension ``` diff --git a/docs/data_models/index.md b/docs/data_models/index.md index 191f937b7..861ea190b 100644 --- a/docs/data_models/index.md +++ b/docs/data_models/index.md @@ -5,6 +5,5 @@ This section contains the data models for the MDIO format. ```{toctree} :maxdepth: 2 -version_0 version_1 ``` diff --git a/docs/data_models/version_0.md b/docs/data_models/version_0.md deleted file mode 100644 index bac21ff6b..000000000 --- a/docs/data_models/version_0.md +++ /dev/null @@ -1,55 +0,0 @@ -```{eval-rst} -:tocdepth: 3 -``` - -```{currentModule} mdio.schemas.v0.dataset - -``` - -# MDIO v0 - -```{article-info} -:author: Altay Sansal -:date: "{sub-ref}`today`" -:read-time: "{sub-ref}`wordcount-minutes` min read" -:class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light -``` - -## Intro - -```{eval-rst} -.. autosummary:: - - DatasetModelV0 - VariableModelV0 - DatasetMetadataModelV0 - DimensionModelV0 -``` - -## Reference - -:::{dropdown} Dataset -:open: - -```{eval-rst} -.. autopydantic_model:: DatasetModelV0 - :inherited-members: BaseModel - -.. autopydantic_model:: DatasetMetadataModelV0 - :inherited-members: BaseModel - -.. autopydantic_model:: DimensionModelV0 - :inherited-members: BaseModel -``` - -::: - -:::{dropdown} Variable -:open: - -```{eval-rst} -.. autopydantic_model:: VariableModelV0 - :inherited-members: BaseModel -``` - -::: diff --git a/docs/data_models/version_1.md b/docs/data_models/version_1.md index 2fd397538..e92f3441c 100644 --- a/docs/data_models/version_1.md +++ b/docs/data_models/version_1.md @@ -2,7 +2,7 @@ :tocdepth: 3 ``` -```{currentModule} mdio.schemas.v1.dataset +```{currentModule} mdio.builder.schemas.v1.dataset ``` @@ -39,19 +39,16 @@ :::{dropdown} Variable ```{eval-rst} -.. autopydantic_model:: mdio.schemas.v1.variable.Variable +.. autopydantic_model:: mdio.builder.schemas.v1.variable.Variable :inherited-members: BaseModel -.. autopydantic_model:: mdio.schemas.v1.variable.Coordinate +.. autopydantic_model:: mdio.builder.schemas.v1.variable.Coordinate :inherited-members: BaseModel -.. autopydantic_model:: mdio.schemas.v1.variable.CoordinateMetadata +.. autopydantic_model:: mdio.builder.schemas.v1.variable.CoordinateMetadata :inherited-members: BaseModel -.. automodule:: mdio.schemas.metadata - :members: UserAttributes - -.. autopydantic_model:: mdio.schemas.v1.variable.VariableMetadata +.. autopydantic_model:: mdio.builder.schemas.v1.variable.VariableMetadata :inherited-members: BaseModel ``` @@ -60,11 +57,7 @@ :::{dropdown} Units ```{eval-rst} -.. autopydantic_model:: mdio.schemas.v1.units.AllUnits -``` - -```{eval-rst} -.. automodule:: mdio.schemas.v1.units +.. automodule:: mdio.builder.schemas.v1.units :members: LengthUnitModel, TimeUnitModel, AngleUnitModel, @@ -79,14 +72,12 @@ :::{dropdown} Stats ```{eval-rst} -.. autopydantic_model:: mdio.schemas.v1.stats.StatisticsMetadata - -.. autopydantic_model:: mdio.schemas.v1.stats.SummaryStatistics +.. autopydantic_model:: mdio.builder.schemas.v1.stats.SummaryStatistics -.. autopydantic_model:: mdio.schemas.v1.stats.EdgeDefinedHistogram +.. autopydantic_model:: mdio.builder.schemas.v1.stats.EdgeDefinedHistogram :inherited-members: BaseModel -.. autopydantic_model:: mdio.schemas.v1.stats.CenteredBinHistogram +.. autopydantic_model:: mdio.builder.schemas.v1.stats.CenteredBinHistogram :inherited-members: BaseModel ``` @@ -95,37 +86,37 @@ :::{dropdown} Enums ```{eval-rst} -.. autoclass:: mdio.schemas.v1.units.AngleUnitEnum() +.. autoclass:: mdio.builder.schemas.v1.units.AngleUnitEnum() :members: :undoc-members: :member-order: bysource -.. autoclass:: mdio.schemas.v1.units.DensityUnitEnum() +.. autoclass:: mdio.builder.schemas.v1.units.DensityUnitEnum() :members: :undoc-members: :member-order: bysource -.. autoclass:: mdio.schemas.v1.units.FrequencyUnitEnum() +.. autoclass:: mdio.builder.schemas.v1.units.FrequencyUnitEnum() :members: :undoc-members: :member-order: bysource -.. autoclass:: mdio.schemas.v1.units.LengthUnitEnum() +.. autoclass:: mdio.builder.schemas.v1.units.LengthUnitEnum() :members: :undoc-members: :member-order: bysource -.. autoclass:: mdio.schemas.v1.units.SpeedUnitEnum() +.. autoclass:: mdio.builder.schemas.v1.units.SpeedUnitEnum() :members: :undoc-members: :member-order: bysource -.. autoclass:: mdio.schemas.v1.units.TimeUnitEnum() +.. autoclass:: mdio.builder.schemas.v1.units.TimeUnitEnum() :members: :undoc-members: :member-order: bysource -.. autoclass:: mdio.schemas.v1.units.VoltageUnitEnum() +.. autoclass:: mdio.builder.schemas.v1.units.VoltageUnitEnum() :members: :undoc-members: :member-order: bysource diff --git a/docs/template_registry.md b/docs/template_registry.md index 3b51de811..41ce2fb76 100644 --- a/docs/template_registry.md +++ b/docs/template_registry.md @@ -1,4 +1,4 @@ -# Template Registry Singleton +# Template Registry A thread-safe singleton registry for managing dataset templates in MDIO applications. @@ -27,7 +27,7 @@ The `TemplateRegistry` implements the singleton pattern to ensure there's only o ### Basic Usage ```python -from mdio.schemas.v1.templates.template_registry import TemplateRegistry +from mdio.builder.template_registry import TemplateRegistry # Get the singleton instance registry = TemplateRegistry() @@ -37,7 +37,7 @@ registry = TemplateRegistry.get_instance() # Register a template template = MyDatasetTemplate() -template_name=registry.register(template) +template_name = registry.register(template) print(f"Registered template named {template_name}") # Retrieve a template using a well-known name @@ -58,7 +58,7 @@ template_names = registry.list_all_templates() For convenience, you can use global functions that operate on the singleton instance: ```python -from mdio.schemas.v1.templates.template_registry import ( +from mdio.builder.template_registry import ( register_template, get_template, is_template_registered, @@ -94,81 +94,10 @@ assert registry1 is registry2 is registry3 ## API Reference -### Core Methods - -#### `register(instance: AbstractDatasetTemplate) -> str` - -Registers a template instance and returns its normalized name. - -- **Parameters:** - - `instance`: Template instance implementing `AbstractDatasetTemplate` -- **Returns:** The template name -- **Raises:** `ValueError` if template name is already registered - -#### `get(template_name: str) -> AbstractDatasetTemplate` - -Retrieves a registered template by name. - -- **Parameters:** - - `template_name`: Name of the template (case-insensitive) -- **Returns:** The registered template instance -- **Raises:** `KeyError` if template is not registered - -#### `unregister(template_name: str) -> None` - -Removes a template from the registry. - -- **Parameters:** - - `template_name`: Name of the template to remove -- **Raises:** `KeyError` if template is not registered - -#### `is_registered(template_name: str) -> bool` - -Checks if a template is registered. - -- **Parameters:** - - `template_name`: Name of the template to check -- **Returns:** `True` if template is registered, `False` otherwise - -#### `list_all_templates() -> List[str]` - -Returns a list of all registered template names. - -- **Returns:** List of template names - -#### `clear() -> None` - -Removes all registered templates. Useful for testing. - -### Class Methods - -#### `get_instance() -> TemplateRegistry` - -Alternative way to get the singleton instance. - -- **Returns:** The singleton registry instance - -### Global Functions - -#### `get_template_registry() -> TemplateRegistry` - -Returns the global singleton registry instance. - -#### `register_template(template: AbstractDatasetTemplate) -> str` - -Registers a template in the global registry. - -#### `get_template(name: str) -> AbstractDatasetTemplate` - -Gets a template from the global registry. - -#### `is_template_registered(name: str) -> bool` - -Checks if a template is registered in the global registry. - -#### `list_templates() -> List[str]` - -Lists all templates in the global registry. +```{eval-rst} +.. automodule:: mdio.builder.template_registry + :members: +``` ## Thread Safety @@ -201,10 +130,11 @@ for thread in threads: ## Example: Complete Template Management ```python -from mdio.schemas.v1.templates.template_registry import TemplateRegistry -from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate -from mdio.schemas.v1.templates.seismic_3d_prestack_time import Seismic3DPostStackTimeTemplate -from mdio.schemas.v1.templates.seismic_3d_prestack import Seismic3DPreStackTemplate +from mdio.builder.template_registry import TemplateRegistry +from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.builder.schemas.v1 import Seismic3DPostStackTimeTemplate +from mdio.builder.schemas.v1 import Seismic3DPreStackTemplate + def setup_templates(): """Register MDIO templates runtime. @@ -221,17 +151,14 @@ def setup_templates(): print(f"Registered templates: {list_templates()}") + # Application startup setup_standard_templates() # Later in the application template = TemplateRegistry().get_template("PostStack3DDepth") dataset = template.create_dataset(name="Seismic 3d m/m/ft", - sizes = [256, 512, 384] - coord_units = [ - AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)), - AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)), - AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT))] + sizes=[256, 512, 384]) ``` ## Error Handling diff --git a/pyproject.toml b/pyproject.toml index 3045670bc..9716378b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,22 +20,21 @@ dependencies = [ "click (>=8.2.1,<9.0.0)", "click-params (>=0.5.0,<0.6.0)", "dask (>=2025.7.0)", - "fsspec (>=2025.7.0)", - "pint>=0.24.4,<0.25", + "fsspec (>=2025.9.0)", + "pint>=0.25.0,<1.0.0", "psutil (>=7.0.0,<8.0.0)", "pydantic (>=2.11.7,<3.0.0)", - "pydantic-settings (>=2.10.1,<3.0.0)", "rich (>=14.1.0,<15.0.0)", "segy (>=0.4.2,<0.5.0)", "tqdm (>=4.67.1,<5.0.0)", "universal-pathlib>=0.2.6", - "xarray>=2025.7.1", + "xarray>=2025.9.0", "zarr (>=3.1.2,<4.0.0)", ] [project.optional-dependencies] -cloud = ["s3fs (>=2025.7.0)", "gcsfs (>=2025.7.0)", "adlfs (>=2024.12.0)"] -distributed = ["distributed (>=2025.7.0)", "bokeh (>=3.7.3,<4.0.0)"] +cloud = ["s3fs (>=2025.9.0)", "gcsfs (>=2025.9.0)", "adlfs (>=2025.8.0)"] +distributed = ["distributed (>=2025.7.0)", "bokeh (>=3.8.0,<4.0.0)"] lossy = ["zfpy (>=1.0.1,<2.0.0)"] [project.urls] @@ -48,32 +47,31 @@ mdio = "mdio.__main__:main" [dependency-groups] dev = [ - "ruff (>=0.12.1)", - "coverage[toml] (>=7.9.1,<8)", - "mypy (>=1.16.1,<2)", - "pre-commit (>=4.2.0,<5)", - "pre-commit-hooks (>=5.0.0,<6)", - "pytest (>=8.4.1,<9)", + "ruff (>=0.12.2)", + "coverage[toml] (>=7.10.6,<8)", + "mypy (>=1.17.1,<2)", + "pre-commit (>=4.3.0,<5)", + "pre-commit-hooks (>=6.0.0,<7)", + "pytest (>=8.4.2,<9)", "pytest-dependency (>=0.6.0,<0.7)", - # "safety (>=3.5.2,<4)", # too tight pydantic and psutil dependency "typeguard (>=4.4.4,<5)", "xdoctest[colors] (>=1.2.0,<2)", "Pygments (>=2.19.2,<3)", ] docs = [ - "furo (>=2024.8.6)", + "furo (>=2025.7.19)", "linkify-it-py (>=2.0.3)", - "myst-nb (>=1.2.0)", + "myst-nb (>=1.3.0)", "sphinx (>=8.2.3,<9)", - "sphinx-autobuild (>=2024.10.3)", + "sphinx-autobuild (>=2025.8.25)", "sphinx-click (>=6.0.0,<7)", "sphinx-copybutton (>=0.5.2,<0.6)", "sphinx-design (>=0.6.1,<0.7)", ] [tool.uv] -required-version = ">=0.6.11" +required-version = ">=0.8.15" [tool.ruff] target-version = "py311" @@ -82,37 +80,37 @@ line-length = 120 [tool.ruff.lint] select = [ - "E", # pycodestyle - "F", # pyflakes - "B", # bugbear - "I", # isort - "UP", # pyupgrade - "N", # pep8-naming - "D", # pydocstyle - "ANN", # annotations - "S", # bandit - "A", # builtins - "C4", # comprehensions - "DTZ", # datetimez - "EM", # errmsg - "ICN", # import-conventions - "PIE", # pie - "PT", # pytest-style - "RSE", # raise - "RET", # return - "SIM", # simplify - "TID", # tidy-imports - "TC", # type-checking - "ARG", # unused-arguments - "PTH", # use-pathlib - "TD", # todos - "PL", # pylint - "FLY", # flynt - "NPY201", # numpy - "LOG", # logging - "G", # logging-format - "PERF", # perflint - "FA", # flake8-future-annotations + "E", # pycodestyle + "F", # pyflakes + "B", # bugbear + "I", # isort + "UP", # pyupgrade + "N", # pep8-naming + "D", # pydocstyle + "ANN", # annotations + "S", # bandit + "A", # builtins + "C4", # comprehensions + "DTZ", # datetimez + "EM", # errmsg + "ICN", # import-conventions + "PIE", # pie + "PT", # pytest-style + "RSE", # raise + "RET", # return + "SIM", # simplify + "TID", # tidy-imports + "TC", # type-checking + "ARG", # unused-arguments + "PTH", # use-pathlib + "TD", # todos + "PL", # pylint + "FLY", # flynt + "NPY", # numpy + "LOG", # logging + "G", # logging-format + "PERF", # perflint + "FA", # flake8-future-annotations ] ignore = [ diff --git a/src/mdio/__init__.py b/src/mdio/__init__.py index 5d14853c1..5fed389c8 100644 --- a/src/mdio/__init__.py +++ b/src/mdio/__init__.py @@ -7,15 +7,16 @@ from mdio.converters import mdio_to_segy from mdio.converters import segy_to_mdio +try: + __version__ = metadata.version("multidimio") +except metadata.PackageNotFoundError: + __version__ = "unknown" + + __all__ = [ + "__version__", "open_mdio", "to_mdio", "mdio_to_segy", "segy_to_mdio", ] - - -try: - __version__ = metadata.version("multidimio") -except metadata.PackageNotFoundError: - __version__ = "unknown" diff --git a/src/mdio/builder/__init__.py b/src/mdio/builder/__init__.py new file mode 100644 index 000000000..bcfa2298f --- /dev/null +++ b/src/mdio/builder/__init__.py @@ -0,0 +1 @@ +"""MDIO building utilities.""" diff --git a/src/mdio/schemas/v1/dataset_builder.py b/src/mdio/builder/dataset_builder.py similarity index 79% rename from src/mdio/schemas/v1/dataset_builder.py rename to src/mdio/builder/dataset_builder.py index ba9fe528a..0f3769457 100644 --- a/src/mdio/schemas/v1/dataset_builder.py +++ b/src/mdio/builder/dataset_builder.py @@ -4,30 +4,20 @@ from datetime import datetime from enum import Enum from enum import auto -from importlib import metadata from typing import Any -from typing import TypeAlias - -from pydantic import BaseModel - -from mdio.schemas.compressors import ZFP -from mdio.schemas.compressors import Blosc -from mdio.schemas.dimension import NamedDimension -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.metadata import ChunkGridMetadata -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset import DatasetInfo -from mdio.schemas.v1.stats import StatisticsMetadata -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.variable import Coordinate -from mdio.schemas.v1.variable import Variable - -AnyMetadataList: TypeAlias = list[AllUnits | UserAttributes | ChunkGridMetadata | StatisticsMetadata | DatasetInfo] -CoordinateMetadataList: TypeAlias = list[AllUnits | UserAttributes] -VariableMetadataList: TypeAlias = list[AllUnits | UserAttributes | ChunkGridMetadata | StatisticsMetadata] -DatasetMetadataList: TypeAlias = list[DatasetInfo | UserAttributes] + +from mdio import __version__ +from mdio.builder.schemas.compressors import ZFP +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.dimension import NamedDimension +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.dataset import DatasetMetadata +from mdio.builder.schemas.v1.variable import Coordinate +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.schemas.v1.variable import Variable +from mdio.builder.schemas.v1.variable import VariableMetadata class _BuilderState(Enum): @@ -56,25 +46,6 @@ def _get_named_dimension(dimensions: list[NamedDimension], name: str, size: int return nd -def _to_dictionary(val: BaseModel | dict[str, Any] | AnyMetadataList) -> dict[str, Any]: - """Convert a dictionary, list or pydantic BaseModel to a dictionary.""" - if val is None: - return None - if isinstance(val, BaseModel): - return val.model_dump(mode="json", by_alias=True) - if isinstance(val, dict): - return val - if isinstance(val, list): - metadata_dict = {} - for md in val: - if md is None: - continue - metadata_dict.update(_to_dictionary(md)) - return metadata_dict - msg = f"Expected BaseModel, dict or list, got {type(val).__name__}" - raise TypeError(msg) - - class MDIODatasetBuilder: """Builder for creating MDIO datasets with enforced build order. @@ -86,14 +57,13 @@ class MDIODatasetBuilder: 4. Must call build() to create the dataset. """ - def __init__(self, name: str, attributes: UserAttributes | None = None): - try: - api_version = metadata.version("multidimio") - except metadata.PackageNotFoundError: - api_version = "unknown" - - self._info = DatasetInfo(name=name, api_version=api_version, created_on=datetime.now(UTC)) - self._attributes = attributes + def __init__(self, name: str, attributes: dict[str, Any] | None = None): + self._metadata = DatasetMetadata( + name=name, + api_version=__version__, + created_on=datetime.now(UTC), + attributes=attributes, + ) self._dimensions: list[NamedDimension] = [] self._coordinates: list[Coordinate] = [] self._variables: list[Variable] = [] @@ -139,7 +109,7 @@ def add_coordinate( # noqa: PLR0913 dimensions: tuple[str, ...], data_type: ScalarType, compressor: Blosc | ZFP | None = None, - metadata_info: CoordinateMetadataList | None = None, + metadata: CoordinateMetadata | None = None, ) -> "MDIODatasetBuilder": """Add a coordinate after adding at least one dimension. @@ -152,7 +122,7 @@ def add_coordinate( # noqa: PLR0913 dimensions: List of dimension names that the coordinate is associated with data_type: Data type for the coordinate (defaults to FLOAT32) compressor: Compressor used for the variable (defaults to None) - metadata_info: Optional metadata information for the coordinate + metadata: Optional metadata information for the coordinate Raises: ValueError: If no dimensions have been added yet. @@ -188,18 +158,18 @@ def add_coordinate( # noqa: PLR0913 raise ValueError(msg) named_dimensions.append(nd) - meta_dict = _to_dictionary(metadata_info) coord = Coordinate( name=name, - longName=long_name, + long_name=long_name, dimensions=named_dimensions, compressor=compressor, - dataType=data_type, - metadata=meta_dict, + data_type=data_type, + metadata=metadata, ) self._coordinates.append(coord) # Add a coordinate variable to the dataset + var_metadata = None if coord.metadata is None else VariableMetadata(**coord.metadata.model_dump()) self.add_variable( name=coord.name, long_name=coord.long_name, @@ -207,7 +177,7 @@ def add_coordinate( # noqa: PLR0913 data_type=coord.data_type, compressor=compressor, coordinates=[name], # Use the coordinate name as a reference - metadata_info=coord.metadata, + metadata=var_metadata, ) self._state = _BuilderState.HAS_COORDINATES @@ -222,7 +192,7 @@ def add_variable( # noqa: PLR0913 data_type: ScalarType | StructuredType, compressor: Blosc | ZFP | None = None, coordinates: tuple[str, ...] | None = None, - metadata_info: VariableMetadataList | None = None, + metadata: VariableMetadata | None = None, ) -> "MDIODatasetBuilder": """Add a variable after adding at least one dimension and, optionally, coordinate. @@ -241,7 +211,7 @@ def add_variable( # noqa: PLR0913 compressor: Compressor used for the variable (defaults to None) coordinates: List of coordinate names that the variable is associated with (defaults to None, meaning no coordinates) - metadata_info: Optional metadata information for the variable + metadata: Optional metadata information for the variable Raises: ValueError: If no dimensions have been added yet. @@ -294,7 +264,6 @@ def add_variable( # noqa: PLR0913 if coordinates is not None and len(coordinates) == 1 and coordinates[0] == name: coordinates = coordinate_objs - meta_dict = _to_dictionary(metadata_info) var = Variable( name=name, long_name=long_name, @@ -302,7 +271,7 @@ def add_variable( # noqa: PLR0913 data_type=data_type, compressor=compressor, coordinates=coordinates, - metadata=meta_dict, + metadata=metadata, ) self._variables.append(var) @@ -325,5 +294,4 @@ def build(self) -> Dataset: msg = "Must add at least one dimension before building" raise ValueError(msg) - var_meta_dict = _to_dictionary([self._info, self._attributes]) - return Dataset(variables=self._variables, metadata=var_meta_dict) + return Dataset(variables=self._variables, metadata=self._metadata) diff --git a/src/mdio/builder/schemas/__init__.py b/src/mdio/builder/schemas/__init__.py new file mode 100644 index 000000000..51ef1ee04 --- /dev/null +++ b/src/mdio/builder/schemas/__init__.py @@ -0,0 +1,3 @@ +"""MDIO schemas for different data types.""" + +__all__ = [] diff --git a/src/mdio/schemas/base.py b/src/mdio/builder/schemas/base.py similarity index 80% rename from src/mdio/schemas/base.py rename to src/mdio/builder/schemas/base.py index 5684fa108..af6665b72 100644 --- a/src/mdio/schemas/base.py +++ b/src/mdio/builder/schemas/base.py @@ -4,11 +4,11 @@ from pydantic import Field from pydantic.json_schema import GenerateJsonSchema -from mdio.schemas.compressors import ZFP -from mdio.schemas.compressors import Blosc -from mdio.schemas.core import CamelCaseStrictModel -from mdio.schemas.dimension import NamedDimension -from mdio.schemas.dtype import DataTypeModel +from mdio.builder.schemas.compressors import ZFP +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.dimension import NamedDimension +from mdio.builder.schemas.dtype import DataTypeModel JSON_SCHEMA_DIALECT = GenerateJsonSchema.schema_dialect diff --git a/src/mdio/schemas/chunk_grid.py b/src/mdio/builder/schemas/chunk_grid.py similarity index 95% rename from src/mdio/schemas/chunk_grid.py rename to src/mdio/builder/schemas/chunk_grid.py index 757b6103d..434730d7a 100644 --- a/src/mdio/schemas/chunk_grid.py +++ b/src/mdio/builder/schemas/chunk_grid.py @@ -4,7 +4,7 @@ from pydantic import Field -from mdio.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.core import CamelCaseStrictModel class RegularChunkShape(CamelCaseStrictModel): diff --git a/src/mdio/schemas/compressors.py b/src/mdio/builder/schemas/compressors.py similarity index 98% rename from src/mdio/schemas/compressors.py rename to src/mdio/builder/schemas/compressors.py index bb4729fbc..7794277a2 100644 --- a/src/mdio/schemas/compressors.py +++ b/src/mdio/builder/schemas/compressors.py @@ -14,7 +14,7 @@ from zarr.codecs import BloscCname from zarr.codecs import BloscShuffle -from mdio.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.core import CamelCaseStrictModel class Blosc(CamelCaseStrictModel): diff --git a/src/mdio/builder/schemas/core.py b/src/mdio/builder/schemas/core.py new file mode 100644 index 000000000..92be0e358 --- /dev/null +++ b/src/mdio/builder/schemas/core.py @@ -0,0 +1,19 @@ +"""This module implements the core components of the MDIO schemas.""" + +from __future__ import annotations + +from pydantic import BaseModel +from pydantic import ConfigDict +from pydantic.alias_generators import to_camel + + +class CamelCaseStrictModel(BaseModel): + """A model with forbidden extras and camel case aliases.""" + + model_config = ConfigDict( + alias_generator=to_camel, + validate_by_name=True, + serialize_by_alias=True, + validate_assignment=True, + extra="forbid", + ) diff --git a/src/mdio/schemas/dimension.py b/src/mdio/builder/schemas/dimension.py similarity index 84% rename from src/mdio/schemas/dimension.py rename to src/mdio/builder/schemas/dimension.py index 62185a395..aa82437c7 100644 --- a/src/mdio/schemas/dimension.py +++ b/src/mdio/builder/schemas/dimension.py @@ -2,7 +2,7 @@ from pydantic import Field -from mdio.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.core import CamelCaseStrictModel class NamedDimension(CamelCaseStrictModel): diff --git a/src/mdio/schemas/dtype.py b/src/mdio/builder/schemas/dtype.py similarity index 95% rename from src/mdio/schemas/dtype.py rename to src/mdio/builder/schemas/dtype.py index af7f65820..8e8cbcd35 100644 --- a/src/mdio/schemas/dtype.py +++ b/src/mdio/builder/schemas/dtype.py @@ -10,7 +10,7 @@ from pydantic import Field -from mdio.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.core import CamelCaseStrictModel class ScalarType(StrEnum): diff --git a/src/mdio/schemas/units.py b/src/mdio/builder/schemas/units.py similarity index 96% rename from src/mdio/schemas/units.py rename to src/mdio/builder/schemas/units.py index 6844b4330..64d989c6d 100644 --- a/src/mdio/schemas/units.py +++ b/src/mdio/builder/schemas/units.py @@ -8,7 +8,7 @@ from pydantic import Field from pydantic import create_model -from mdio.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.core import CamelCaseStrictModel @unique diff --git a/src/mdio/schemas/v1/__init__.py b/src/mdio/builder/schemas/v1/__init__.py similarity index 52% rename from src/mdio/schemas/v1/__init__.py rename to src/mdio/builder/schemas/v1/__init__.py index fb98d55ec..cab620269 100644 --- a/src/mdio/schemas/v1/__init__.py +++ b/src/mdio/builder/schemas/v1/__init__.py @@ -1,5 +1,5 @@ """Schema specific to MDIO v1.""" -from mdio.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.dataset import Dataset __all__ = ["Dataset"] diff --git a/src/mdio/schemas/v1/dataset.py b/src/mdio/builder/schemas/v1/dataset.py similarity index 62% rename from src/mdio/schemas/v1/dataset.py rename to src/mdio/builder/schemas/v1/dataset.py index f101093bb..541f07eae 100644 --- a/src/mdio/schemas/v1/dataset.py +++ b/src/mdio/builder/schemas/v1/dataset.py @@ -1,17 +1,16 @@ """Dataset model for MDIO V1.""" +from typing import Any + from pydantic import AwareDatetime from pydantic import Field -from pydantic import create_model -from mdio.schemas.base import BaseDataset -from mdio.schemas.core import CamelCaseStrictModel -from mdio.schemas.core import model_fields -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.variable import Variable +from mdio.builder.schemas.base import BaseDataset +from mdio.builder.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.v1.variable import Variable -class DatasetInfo(CamelCaseStrictModel): +class DatasetMetadata(CamelCaseStrictModel): """Contains information about a dataset.""" name: str = Field(..., description="Name or identifier for the dataset.") @@ -29,14 +28,7 @@ class DatasetInfo(CamelCaseStrictModel): ), ) - -DatasetMetadata = create_model( - "DatasetMetadata", - **model_fields(DatasetInfo), - **model_fields(UserAttributes), - __base__=CamelCaseStrictModel, -) -DatasetMetadata.__doc__ = "The metadata about the dataset." + attributes: dict[str, Any] | None = Field(default=None, description="User defined attributes as key/value pairs.") class Dataset(BaseDataset): diff --git a/src/mdio/schemas/v1/stats.py b/src/mdio/builder/schemas/v1/stats.py similarity index 84% rename from src/mdio/schemas/v1/stats.py rename to src/mdio/builder/schemas/v1/stats.py index 29d50add8..ccd12fc79 100644 --- a/src/mdio/schemas/v1/stats.py +++ b/src/mdio/builder/schemas/v1/stats.py @@ -22,8 +22,7 @@ from pydantic import Field -from mdio.schemas.core import CamelCaseStrictModel -from mdio.schemas.metadata import VersionedMetadataConvention +from mdio.builder.schemas.core import CamelCaseStrictModel class BaseHistogram(CamelCaseStrictModel): @@ -57,12 +56,3 @@ class SummaryStatistics(CamelCaseStrictModel): min: float = Field(..., description="The smallest value in the variable.") max: float = Field(..., description="The largest value in the variable.") histogram: Histogram = Field(..., description="Binned frequency distribution.") - - -class StatisticsMetadata(VersionedMetadataConvention): - """Data Model representing metadata for statistics.""" - - stats_v1: SummaryStatistics | list[SummaryStatistics] | None = Field( - default=None, - description="Minimal summary statistics.", - ) diff --git a/src/mdio/schemas/v1/units.py b/src/mdio/builder/schemas/v1/units.py similarity index 87% rename from src/mdio/schemas/v1/units.py rename to src/mdio/builder/schemas/v1/units.py index 5817c8ce7..07fac2778 100644 --- a/src/mdio/schemas/v1/units.py +++ b/src/mdio/builder/schemas/v1/units.py @@ -5,11 +5,9 @@ from typing import TypeAlias from pint import UnitRegistry -from pydantic import Field -from mdio.schemas.metadata import VersionedMetadataConvention -from mdio.schemas.units import UnitEnum -from mdio.schemas.units import create_unit_model +from mdio.builder.schemas.units import UnitEnum +from mdio.builder.schemas.units import create_unit_model ureg = UnitRegistry() ureg.default_format = "~C" # compact, abbreviated (symbol). @@ -108,10 +106,3 @@ class VoltageUnitEnum(UnitEnum): | FrequencyUnitModel | VoltageUnitModel ) - - -# Versioned metadata conventions for units -class AllUnits(VersionedMetadataConvention): - """All Units.""" - - units_v1: AllUnitModel | list[AllUnitModel] | None = Field(default=None) diff --git a/src/mdio/schemas/v1/variable.py b/src/mdio/builder/schemas/v1/variable.py similarity index 53% rename from src/mdio/schemas/v1/variable.py rename to src/mdio/builder/schemas/v1/variable.py index 93b591177..ed317f8b4 100644 --- a/src/mdio/schemas/v1/variable.py +++ b/src/mdio/builder/schemas/v1/variable.py @@ -11,25 +11,38 @@ variable in MDIO format. It can have coordinates and can also hold metadata. """ +from typing import Any + from pydantic import Field -from pydantic import create_model - -from mdio.schemas.base import NamedArray -from mdio.schemas.core import CamelCaseStrictModel -from mdio.schemas.core import model_fields -from mdio.schemas.dtype import ScalarType -from mdio.schemas.metadata import ChunkGridMetadata -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.stats import StatisticsMetadata -from mdio.schemas.v1.units import AllUnits - -CoordinateMetadata = create_model( - "CoordinateMetadata", - **model_fields(AllUnits), - **model_fields(UserAttributes), - __base__=CamelCaseStrictModel, - __doc__="Reduced Metadata, perfect for simple Coordinates.", -) + +from mdio.builder.schemas.base import NamedArray +from mdio.builder.schemas.chunk_grid import RectilinearChunkGrid +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.core import CamelCaseStrictModel +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.stats import SummaryStatistics +from mdio.builder.schemas.v1.units import AllUnitModel + + +class CoordinateMetadata(CamelCaseStrictModel): + """Reduced Metadata, perfect for simple Coordinates.""" + + units_v1: AllUnitModel | None = Field(default=None) + attributes: dict[str, Any] | None = Field(default=None) + + +class VariableMetadata(CoordinateMetadata): + """Complete Metadata for Variables and complex or large Coordinates.""" + + chunk_grid: RegularChunkGrid | RectilinearChunkGrid | None = Field( + default=None, + description="Chunk grid specification for the array.", + ) + + stats_v1: SummaryStatistics | list[SummaryStatistics] | None = Field( + default=None, + description="Minimal summary statistics.", + ) class Coordinate(NamedArray): @@ -42,17 +55,6 @@ class Coordinate(NamedArray): metadata: CoordinateMetadata | None = Field(default=None, description="Coordinate Metadata.") -VariableMetadata = create_model( - "VariableMetadata", - **model_fields(ChunkGridMetadata), - **model_fields(AllUnits), - **model_fields(StatisticsMetadata), - **model_fields(UserAttributes), - __base__=CamelCaseStrictModel, - __doc__="Complete Metadata for Variables and complex or large Coordinates.", -) - - class Variable(NamedArray): """An MDIO Variable that has coordinates and metadata.""" diff --git a/src/mdio/schemas/v1/templates/template_registry.py b/src/mdio/builder/template_registry.py similarity index 90% rename from src/mdio/schemas/v1/templates/template_registry.py rename to src/mdio/builder/template_registry.py index 93c572caf..b5be5c104 100644 --- a/src/mdio/schemas/v1/templates/template_registry.py +++ b/src/mdio/builder/template_registry.py @@ -3,14 +3,14 @@ import threading from typing import Optional -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate -from mdio.schemas.v1.templates.seismic_2d_poststack import Seismic2DPostStackTemplate -from mdio.schemas.v1.templates.seismic_2d_prestack_cdp import Seismic2DPreStackCDPTemplate -from mdio.schemas.v1.templates.seismic_2d_prestack_shot import Seismic2DPreStackShotTemplate -from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate -from mdio.schemas.v1.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate -from mdio.schemas.v1.templates.seismic_3d_prestack_coca import Seismic3DPreStackCocaTemplate -from mdio.schemas.v1.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.builder.templates.seismic_2d_poststack import Seismic2DPostStackTemplate +from mdio.builder.templates.seismic_2d_prestack_cdp import Seismic2DPreStackCDPTemplate +from mdio.builder.templates.seismic_2d_prestack_shot import Seismic2DPreStackShotTemplate +from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.builder.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate +from mdio.builder.templates.seismic_3d_prestack_coca import Seismic3DPreStackCocaTemplate +from mdio.builder.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate class TemplateRegistry: diff --git a/src/mdio/builder/templates/__init__.py b/src/mdio/builder/templates/__init__.py new file mode 100644 index 000000000..fb2a37c0f --- /dev/null +++ b/src/mdio/builder/templates/__init__.py @@ -0,0 +1 @@ +"""MDIO templates for known dataset kinds.""" diff --git a/src/mdio/schemas/v1/templates/abstract_dataset_template.py b/src/mdio/builder/templates/abstract_dataset_template.py similarity index 81% rename from src/mdio/schemas/v1/templates/abstract_dataset_template.py rename to src/mdio/builder/templates/abstract_dataset_template.py index 10b0bdbff..feb4186dd 100644 --- a/src/mdio/schemas/v1/templates/abstract_dataset_template.py +++ b/src/mdio/builder/templates/abstract_dataset_template.py @@ -3,17 +3,18 @@ import copy from abc import ABC from abc import abstractmethod +from typing import Any -from mdio.schemas import compressors -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.chunk_grid import RegularChunkShape -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.metadata import ChunkGridMetadata -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder -from mdio.schemas.v1.units import AllUnits +from mdio.builder.dataset_builder import MDIODatasetBuilder +from mdio.builder.schemas import compressors +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.chunk_grid import RegularChunkShape +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.schemas.v1.variable import VariableMetadata class AbstractDatasetTemplate(ABC): @@ -38,7 +39,7 @@ def build_dataset( self, name: str, sizes: tuple[int, ...], - horizontal_coord_unit: AllUnits, + horizontal_coord_unit: LengthUnitModel, headers: StructuredType = None, ) -> Dataset: """Template method that builds the dataset. @@ -55,9 +56,9 @@ def build_dataset( self._dim_sizes = sizes self._horizontal_coord_unit = horizontal_coord_unit - attr = self._load_dataset_attributes() or UserAttributes(attributes={}) - attr.attributes["defaultVariableName"] = self._default_variable_name - self._builder = MDIODatasetBuilder(name=name, attributes=attr) + attributes = self._load_dataset_attributes() or {} + attributes["defaultVariableName"] = self._default_variable_name + self._builder = MDIODatasetBuilder(name=name, attributes=attributes) self._add_dimensions() self._add_coordinates() self._add_variables() @@ -120,13 +121,13 @@ def _default_variable_name(self) -> str: return "amplitude" @abstractmethod - def _load_dataset_attributes(self) -> UserAttributes: + def _load_dataset_attributes(self) -> dict[str, Any]: """Abstract method to load dataset attributes. Must be implemented by subclasses. Returns: - UserAttributes: The dataset attributes + The dataset attributes as a dictionary """ def _add_dimensions(self) -> None: @@ -150,20 +151,18 @@ def _add_coordinates(self) -> None: name, dimensions=(name,), data_type=ScalarType.INT32, - metadata_info=None, ) # Add non-dimension coordinates # TODO(Dmitriy Repin): do chunked write for non-dimensional coordinates and trace_mask # https://github.com/TGSAI/mdio-python/issues/587 # The chunk size used for trace mask will be different from the _var_chunk_shape - hor_coord_units = [self._horizontal_coord_unit] * len(self._coord_dim_names) for i in range(len(self._coord_names)): self._builder.add_coordinate( self._coord_names[i], dimensions=self._coord_dim_names, data_type=ScalarType.FLOAT64, - metadata_info=hor_coord_units, + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) def _add_trace_mask(self) -> None: @@ -177,24 +176,20 @@ def _add_trace_mask(self) -> None: data_type=ScalarType.BOOL, compressor=compressors.Blosc(cname=compressors.BloscCname.zstd), # also default in zarr3 coordinates=self._coord_names, - metadata_info=None, ) def _add_trace_headers(self, headers: StructuredType) -> None: """Add trace mask variables.""" # headers = StructuredType.model_validate(header_fields) + chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=self._var_chunk_shape[:-1])) self._builder.add_variable( name="headers", dimensions=self._dim_names[:-1], # All dimensions except vertical (the last one) data_type=headers, compressor=compressors.Blosc(cname=compressors.BloscCname.zstd), # also default in zarr3 coordinates=self._coord_names, - metadata_info=[ - ChunkGridMetadata( - chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=self._var_chunk_shape[:-1])) - ) - ], + metadata=VariableMetadata(chunk_grid=chunk_grid), ) def _add_variables(self) -> None: @@ -203,15 +198,12 @@ def _add_variables(self) -> None: A virtual method that can be overwritten by subclasses to add custom variables. Uses the class field 'builder' to add variables to the dataset. """ + chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=self._var_chunk_shape)) self._builder.add_variable( name=self.default_variable_name, dimensions=self._dim_names, data_type=ScalarType.FLOAT32, compressor=compressors.Blosc(cname=compressors.BloscCname.zstd), # also default in zarr3 coordinates=self._coord_names, - metadata_info=[ - ChunkGridMetadata( - chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=self._var_chunk_shape)) - ), - ], + metadata=VariableMetadata(chunk_grid=chunk_grid), ) diff --git a/src/mdio/schemas/v1/templates/seismic_2d_poststack.py b/src/mdio/builder/templates/seismic_2d_poststack.py similarity index 58% rename from src/mdio/schemas/v1/templates/seismic_2d_poststack.py rename to src/mdio/builder/templates/seismic_2d_poststack.py index d22e24236..a08fe6489 100644 --- a/src/mdio/schemas/v1/templates/seismic_2d_poststack.py +++ b/src/mdio/builder/templates/seismic_2d_poststack.py @@ -1,7 +1,8 @@ """Seismic2DPostStackTemplate MDIO v1 dataset templates.""" -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from typing import Any + +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate class Seismic2DPostStackTemplate(AbstractDatasetTemplate): @@ -19,11 +20,5 @@ def __init__(self, domain: str): def _name(self) -> str: return f"PostStack2D{self._trace_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "2D", - "ensembleType": "line", - "processingStage": "post-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyDimensionality": "2D", "ensembleType": "line", "processingStage": "post-stack"} diff --git a/src/mdio/schemas/v1/templates/seismic_2d_prestack_cdp.py b/src/mdio/builder/templates/seismic_2d_prestack_cdp.py similarity index 59% rename from src/mdio/schemas/v1/templates/seismic_2d_prestack_cdp.py rename to src/mdio/builder/templates/seismic_2d_prestack_cdp.py index f52035ab3..c3631b074 100644 --- a/src/mdio/schemas/v1/templates/seismic_2d_prestack_cdp.py +++ b/src/mdio/builder/templates/seismic_2d_prestack_cdp.py @@ -1,7 +1,8 @@ """Seismic2DPreStackCDPTemplate MDIO v1 dataset templates.""" -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from typing import Any + +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate class Seismic2DPreStackCDPTemplate(AbstractDatasetTemplate): @@ -19,11 +20,5 @@ def __init__(self, domain: str): def _name(self) -> str: return f"PreStackCdpGathers2D{self._trace_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "2D", - "ensembleType": "cdp", - "processingStage": "pre-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyDimensionality": "2D", "ensembleType": "cdp", "processingStage": "pre-stack"} diff --git a/src/mdio/schemas/v1/templates/seismic_2d_prestack_shot.py b/src/mdio/builder/templates/seismic_2d_prestack_shot.py similarity index 63% rename from src/mdio/schemas/v1/templates/seismic_2d_prestack_shot.py rename to src/mdio/builder/templates/seismic_2d_prestack_shot.py index 1db964738..3bfc60450 100644 --- a/src/mdio/schemas/v1/templates/seismic_2d_prestack_shot.py +++ b/src/mdio/builder/templates/seismic_2d_prestack_shot.py @@ -1,9 +1,10 @@ """Seismic2DPreStackShotTemplate MDIO v1 dataset templates.""" -from mdio.schemas.dtype import ScalarType -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate -from mdio.schemas.v1.units import AllUnits +from typing import Any + +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate class Seismic2DPreStackShotTemplate(AbstractDatasetTemplate): @@ -21,53 +22,42 @@ def __init__(self, domain: str): def _name(self) -> str: return f"PreStackShotGathers2D{self._trace_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "2D", - "ensembleType": "shot_point", - "processingStage": "pre-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyDimensionality": "2D", "ensembleType": "shot_point", "processingStage": "pre-stack"} def _add_coordinates(self) -> None: # Add dimension coordinates for name in self._dim_names: - self._builder.add_coordinate( - name, - dimensions=(name,), - data_type=ScalarType.INT32, - metadata_info=None, - ) + self._builder.add_coordinate(name, dimensions=(name,), data_type=ScalarType.INT32) # Add non-dimension coordinates + coordinate_metadata = CoordinateMetadata(units_v1=self._horizontal_coord_unit) self._builder.add_coordinate( "gun", dimensions=("shot_point", "channel"), data_type=ScalarType.UINT8, - metadata_info=[AllUnits(units_v1=None)], ) self._builder.add_coordinate( "source_coord_x", dimensions=("shot_point", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=coordinate_metadata, ) self._builder.add_coordinate( "source_coord_y", dimensions=("shot_point", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=coordinate_metadata, ) self._builder.add_coordinate( "group_coord_x", dimensions=("shot_point", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=coordinate_metadata, ) self._builder.add_coordinate( "group_coord_y", dimensions=("shot_point", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=coordinate_metadata, ) diff --git a/src/mdio/schemas/v1/templates/seismic_3d_poststack.py b/src/mdio/builder/templates/seismic_3d_poststack.py similarity index 61% rename from src/mdio/schemas/v1/templates/seismic_3d_poststack.py rename to src/mdio/builder/templates/seismic_3d_poststack.py index f21ffdbe7..77893b0e4 100644 --- a/src/mdio/schemas/v1/templates/seismic_3d_poststack.py +++ b/src/mdio/builder/templates/seismic_3d_poststack.py @@ -1,7 +1,8 @@ """Seismic3DPostStackTemplate MDIO v1 dataset templates.""" -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from typing import Any + +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate class Seismic3DPostStackTemplate(AbstractDatasetTemplate): @@ -19,11 +20,5 @@ def __init__(self, domain: str): def _name(self) -> str: return f"PostStack3D{self._trace_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "3D", - "ensembleType": "line", - "processingStage": "post-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyDimensionality": "3D", "ensembleType": "line", "processingStage": "post-stack"} diff --git a/src/mdio/schemas/v1/templates/seismic_3d_prestack_cdp.py b/src/mdio/builder/templates/seismic_3d_prestack_cdp.py similarity index 60% rename from src/mdio/schemas/v1/templates/seismic_3d_prestack_cdp.py rename to src/mdio/builder/templates/seismic_3d_prestack_cdp.py index b35e047d1..40346004d 100644 --- a/src/mdio/schemas/v1/templates/seismic_3d_prestack_cdp.py +++ b/src/mdio/builder/templates/seismic_3d_prestack_cdp.py @@ -1,7 +1,8 @@ """Seismic3DPreStackCDPTemplate MDIO v1 dataset templates.""" -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from typing import Any + +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate class Seismic3DPreStackCDPTemplate(AbstractDatasetTemplate): @@ -19,11 +20,5 @@ def __init__(self, domain: str): def _name(self) -> str: return f"PreStackCdpGathers3D{self._trace_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "3D", - "ensembleType": "cdp", - "processingStage": "pre-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyDimensionality": "3D", "ensembleType": "cdp", "processingStage": "pre-stack"} diff --git a/src/mdio/schemas/v1/templates/seismic_3d_prestack_coca.py b/src/mdio/builder/templates/seismic_3d_prestack_coca.py similarity index 69% rename from src/mdio/schemas/v1/templates/seismic_3d_prestack_coca.py rename to src/mdio/builder/templates/seismic_3d_prestack_coca.py index 808b849d1..0514ddfdb 100644 --- a/src/mdio/schemas/v1/templates/seismic_3d_prestack_coca.py +++ b/src/mdio/builder/templates/seismic_3d_prestack_coca.py @@ -1,9 +1,11 @@ """Seismic3DPreStackCocaTemplate MDIO v1 dataset templates.""" -from mdio.schemas.dtype import ScalarType -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate -from mdio.schemas.v1.units import AllUnits +from typing import Any + +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.units import AngleUnitModel +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate class Seismic3DPreStackCocaTemplate(AbstractDatasetTemplate): @@ -21,14 +23,8 @@ def __init__(self, domain: str): def _name(self) -> str: return f"PreStackCocaGathers3D{self._trace_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "3D", - "ensembleType": "cdp_coca", - "processingStage": "pre-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyDimensionality": "3D", "ensembleType": "cdp_coca", "processingStage": "pre-stack"} def _add_coordinates(self) -> None: # Add dimension coordinates @@ -46,14 +42,14 @@ def _add_coordinates(self) -> None: "offset", dimensions=("offset",), data_type=ScalarType.INT32, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) - angle_unit = AllUnits(units_v1={"angle": "deg"}) + angle_unit = AngleUnitModel(angle="deg") self._builder.add_coordinate( "azimuth", dimensions=("azimuth",), data_type=ScalarType.FLOAT32, - metadata_info=[angle_unit], + metadata=CoordinateMetadata(units_v1=angle_unit), ) self._builder.add_coordinate( self.trace_domain, @@ -66,11 +62,11 @@ def _add_coordinates(self) -> None: "cdp_x", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) self._builder.add_coordinate( "cdp_y", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) diff --git a/src/mdio/schemas/v1/templates/seismic_3d_prestack_shot.py b/src/mdio/builder/templates/seismic_3d_prestack_shot.py similarity index 64% rename from src/mdio/schemas/v1/templates/seismic_3d_prestack_shot.py rename to src/mdio/builder/templates/seismic_3d_prestack_shot.py index ff02377b4..778134bbb 100644 --- a/src/mdio/schemas/v1/templates/seismic_3d_prestack_shot.py +++ b/src/mdio/builder/templates/seismic_3d_prestack_shot.py @@ -1,9 +1,10 @@ """Seismic3DPreStackShotTemplate MDIO v1 dataset templates.""" -from mdio.schemas.dtype import ScalarType -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate -from mdio.schemas.v1.units import AllUnits +from typing import Any + +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate class Seismic3DPreStackShotTemplate(AbstractDatasetTemplate): @@ -21,53 +22,41 @@ def __init__(self, domain: str): def _name(self) -> str: return f"PreStackShotGathers3D{self._trace_domain.capitalize()}" - def _load_dataset_attributes(self) -> UserAttributes: - return UserAttributes( - attributes={ - "surveyDimensionality": "3D", - "ensembleType": "shot_point", - "processingStage": "pre-stack", - } - ) + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack"} def _add_coordinates(self) -> None: # Add dimension coordinates for name in self._dim_names: - self._builder.add_coordinate( - name, - dimensions=(name,), - data_type=ScalarType.INT32, - metadata_info=None, - ) + self._builder.add_coordinate(name, dimensions=(name,), data_type=ScalarType.INT32) # Add non-dimension coordinates self._builder.add_coordinate( "gun", dimensions=("shot_point", "cable", "channel"), data_type=ScalarType.UINT8, - metadata_info=[AllUnits(units_v1=None)], ) self._builder.add_coordinate( "source_coord_x", dimensions=("shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) self._builder.add_coordinate( "source_coord_y", dimensions=("shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) self._builder.add_coordinate( "group_coord_x", dimensions=("shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) self._builder.add_coordinate( "group_coord_y", dimensions=("shot_point", "cable", "channel"), data_type=ScalarType.FLOAT64, - metadata_info=[self._horizontal_coord_unit], + metadata=CoordinateMetadata(units_v1=self._horizontal_coord_unit), ) diff --git a/src/mdio/schemas/v1/dataset_serializer.py b/src/mdio/builder/xarray_builder.py similarity index 92% rename from src/mdio/schemas/v1/dataset_serializer.py rename to src/mdio/builder/xarray_builder.py index 0df24a019..20bc20621 100644 --- a/src/mdio/schemas/v1/dataset_serializer.py +++ b/src/mdio/builder/xarray_builder.py @@ -18,17 +18,16 @@ except ImportError: zfpy_ZFPY = None # noqa: N816 +from mdio.builder.schemas.compressors import ZFP as mdio_ZFP # noqa: N811 +from mdio.builder.schemas.compressors import Blosc as mdio_Blosc +from mdio.builder.schemas.dimension import NamedDimension +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.variable import Coordinate +from mdio.builder.schemas.v1.variable import Variable from mdio.constants import ZarrFormat from mdio.constants import fill_value_map -from mdio.schemas.compressors import ZFP as mdio_ZFP # noqa: N811 -from mdio.schemas.compressors import Blosc as mdio_Blosc -from mdio.schemas.dimension import NamedDimension -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset_builder import _to_dictionary -from mdio.schemas.v1.variable import Coordinate -from mdio.schemas.v1.variable import Variable def _get_all_named_dimensions(dataset: Dataset) -> dict[str, NamedDimension]: @@ -212,13 +211,8 @@ def to_xarray_dataset(mdio_ds: Dataset) -> xr_Dataset: # noqa: PLR0912 # Add array attributes if v.metadata is not None: - meta_dict = _to_dictionary(v.metadata) - # Exclude chunk_grid - del meta_dict["chunkGrid"] - # Remove empty attributes - meta_dict = {k: v for k, v in meta_dict.items() if v is not None} - # Add metadata to the data array attributes - data_array.attrs.update(meta_dict) + metadata_dict = v.metadata.model_dump(exclude_none=True, exclude={"chunkGrid"}) + data_array.attrs.update(metadata_dict) if v.long_name: data_array.attrs["long_name"] = v.long_name diff --git a/src/mdio/constants.py b/src/mdio/constants.py index 783623281..f2ddb65c3 100644 --- a/src/mdio/constants.py +++ b/src/mdio/constants.py @@ -4,7 +4,7 @@ import numpy as np -from mdio.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import ScalarType class ZarrFormat(IntEnum): diff --git a/src/mdio/converters/__init__.py b/src/mdio/converters/__init__.py index 0c3ebe922..fd88595ff 100644 --- a/src/mdio/converters/__init__.py +++ b/src/mdio/converters/__init__.py @@ -1,6 +1,6 @@ """MDIO Data conversion API.""" -from .mdio import mdio_to_segy -from .segy import segy_to_mdio +from mdio.converters.mdio import mdio_to_segy +from mdio.converters.segy import segy_to_mdio __all__ = ["mdio_to_segy", "segy_to_mdio"] diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index 84393b264..628179326 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -14,16 +14,15 @@ from mdio.api.io import _normalize_path from mdio.api.io import to_mdio +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.xarray_builder import to_xarray_dataset from mdio.constants import UINT32_MAX from mdio.converters.exceptions import EnvironmentFormatError from mdio.converters.exceptions import GridTraceCountError from mdio.converters.exceptions import GridTraceSparsityError from mdio.converters.type_converter import to_structured_type from mdio.core.grid import Grid -from mdio.schemas.v1.dataset_serializer import to_xarray_dataset -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel from mdio.segy import blocked_io from mdio.segy.utilities import get_grid_plan @@ -36,9 +35,9 @@ from upath import UPath from xarray import Dataset as xr_Dataset + from mdio.builder.schemas import Dataset + from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate from mdio.core.dimension import Dimension - from mdio.schemas.v1.dataset import Dataset - from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate logger = logging.getLogger(__name__) @@ -240,23 +239,19 @@ def populate_non_dim_coordinates( return dataset, drop_vars_delayed -def _get_horizontal_coordinate_unit(segy_headers: list[Dimension]) -> AllUnits | None: +def _get_horizontal_coordinate_unit(segy_headers: list[Dimension]) -> LengthUnitModel | None: """Get the coordinate unit from the SEG-Y headers.""" name = TraceHeaderFieldsRev0.COORDINATE_UNIT.name.upper() unit_hdr = next((c for c in segy_headers if c.name.upper() == name), None) if unit_hdr is None or len(unit_hdr.coords) == 0: - # If the coordinate unit header is not found or empty, return None - # This is a common case for SEG-Y files, where the coordinate unit is not specified return None if segy_MeasurementSystem(unit_hdr.coords[0]) == segy_MeasurementSystem.METERS: - # If the coordinate unit is in meters, return "m" - return AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) + unit = LengthUnitEnum.METER if segy_MeasurementSystem(unit_hdr.coords[0]) == segy_MeasurementSystem.FEET: - # If the coordinate unit is in feet, return "ft" - return AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT)) - err = f"Unsupported coordinate unit value: {unit_hdr.value[0]} in SEG-Y file." - raise ValueError(err) + unit = LengthUnitEnum.FOOT + + return LengthUnitModel(length=unit) def _populate_coordinates( diff --git a/src/mdio/converters/type_converter.py b/src/mdio/converters/type_converter.py index 20ce63d2e..427abdc7d 100644 --- a/src/mdio/converters/type_converter.py +++ b/src/mdio/converters/type_converter.py @@ -2,9 +2,9 @@ from numpy import dtype as np_dtype -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredField +from mdio.builder.schemas.dtype import StructuredType def to_scalar_type(data_type: np_dtype) -> ScalarType: diff --git a/src/mdio/core/dimension.py b/src/mdio/core/dimension.py index 9aafbed35..b07928d18 100644 --- a/src/mdio/core/dimension.py +++ b/src/mdio/core/dimension.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from typing import Any import numpy as np @@ -45,15 +44,6 @@ def size(self) -> int: """Size of the dimension.""" return len(self.coords) - def to_dict(self) -> dict[str, Any]: - """Convert dimension to dictionary.""" - return {"name": self.name, "coords": self.coords.tolist()} - - @classmethod - def from_dict(cls, other: dict[str, Any]) -> Dimension: - """Make dimension from dictionary.""" - return Dimension(**other) - def __len__(self) -> int: """Length magic.""" return self.size diff --git a/src/mdio/core/exceptions.py b/src/mdio/core/exceptions.py deleted file mode 100644 index 4c2de95ae..000000000 --- a/src/mdio/core/exceptions.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Core exceptions for MDIO.""" - -from mdio.exceptions import MDIOError - - -class MDIOAlreadyExistsError(MDIOError): - """Raised when MDIO file already exists.""" - - -class MDIONotFoundError(MDIOError): - """Raised when MDIO file doesn't exist.""" diff --git a/src/mdio/exceptions.py b/src/mdio/exceptions.py index 832913e54..908a05c04 100644 --- a/src/mdio/exceptions.py +++ b/src/mdio/exceptions.py @@ -51,3 +51,11 @@ def __init__(self, message: str, name: str = None, expected: str = None): class InvalidMDIOError(MDIOError): """Raised when an invalid MDIO file is encountered.""" + + +class MDIOAlreadyExistsError(MDIOError): + """Raised when MDIO file already exists.""" + + +class MDIONotFoundError(MDIOError): + """Raised when MDIO file doesn't exist.""" diff --git a/src/mdio/schemas/__init__.py b/src/mdio/schemas/__init__.py deleted file mode 100644 index 8d9d8b86e..000000000 --- a/src/mdio/schemas/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -"""MDIO schemas for different data types.""" - -from mdio.schemas.compressors import ZFP -from mdio.schemas.compressors import Blosc -from mdio.schemas.dimension import NamedDimension -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType - -__all__ = [ - "Blosc", - "ZFP", - "NamedDimension", - "ScalarType", - "StructuredField", - "StructuredType", -] diff --git a/src/mdio/schemas/builder.py b/src/mdio/schemas/builder.py deleted file mode 100644 index 0e5977def..000000000 --- a/src/mdio/schemas/builder.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Schema builders.""" - -from __future__ import annotations - -from typing import Any - -from mdio.schemas import NamedDimension -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset import DatasetMetadata -from mdio.schemas.v1.variable import Variable -from mdio.schemas.v1.variable import VariableMetadata - - -class VariableBuilder: - """Dataset builder.""" - - def __init__(self) -> None: - self.name = None - self.long_name = None - self.dtype = None - self.chunks = None - self.dims = None - self.coords = None - self.compressor = None - self.meta_dict = None - - def set_name(self, name: str) -> VariableBuilder: - """Set variable name.""" - self.name = name - return self - - def set_long_name(self, long_name: str) -> VariableBuilder: - """Add long, descriptive name to the variable.""" - self.long_name = long_name - return self - - def set_compressor(self, compressor: dict[str, Any]) -> VariableBuilder: - """Add long, descriptive name to the variable.""" - self.compressor = compressor - return self - - def add_dimension(self, *dimensions: str | dict[str, int]) -> VariableBuilder: - """Add a dimension to the dataset.""" - if self.dims is None: - self.dims = [] - - if isinstance(dimensions[0], str): - dims = list(dimensions) - elif isinstance(dimensions[0], dict): - dims = [NamedDimension(name=name, size=size) for dim in dimensions for name, size in dim.items()] - else: - raise NotImplementedError - - self.dims.extend(dims) - return self - - def add_coordinate(self, *names: str) -> VariableBuilder: - """Add a coordinate to the variable.""" - if self.coords is None: - self.coords = [] - - self.coords.extend(names) - return self - - def set_format(self, format_: str | dict[str, str]) -> VariableBuilder: - """Set variable format.""" - if isinstance(format_, dict): - fields = [{"name": n, "format": f} for n, f in format_.items()] - format_ = {"fields": fields} - - self.dtype = format_ - return self - - def set_chunks(self, chunks: list[int]) -> VariableBuilder: - """Set variable chunks.""" - if self.meta_dict is None: - self.meta_dict = {} - - self.meta_dict["chunkGrid"] = {"configuration": {"chunkShape": chunks}} - return self - - def set_units(self, units: dict[str, str]) -> VariableBuilder: - """Set variable units.""" - if self.meta_dict is None: - self.meta_dict = {} - - self.meta_dict["unitsV1"] = units - return self - - def add_attribute(self, key: str, value: Any) -> VariableBuilder: # noqa: ANN401 - """Add a user attribute to the variable metadata.""" - if self.meta_dict is None: - self.meta_dict = {} - - self.meta_dict["attributes"] = {key: value} - return self - - def build(self) -> Variable: - """Build the dataset model.""" - if self.chunks is not None and len(self.chunks) != len(self.dims): - msg = "Variable chunks must have same number of dimensions." - raise ValueError(msg) - - var_kwargs = {} - - if self.meta_dict is not None: - var_kwargs["metadata"] = VariableMetadata.model_validate(self.meta_dict) - - return Variable( - name=self.name, - long_name=self.long_name, - data_type=self.dtype, - dimensions=self.dims, - coordinates=self.coords, - compressor=self.compressor, - **var_kwargs, - ) - - -class DatasetBuilder: - """Dataset builder.""" - - def __init__(self) -> None: - self.variables = [] - self.name = None - self.metadata = None - - def set_name(self, name: str) -> DatasetBuilder: - """Set dataset name.""" - self.name = name - return self - - def add_variable(self, variable: Variable) -> DatasetBuilder: - """Add a variable to the dataset.""" - self.variables.append(variable) - return self - - def add_variables(self, variables: list[Variable]) -> DatasetBuilder: - """Add multiple variables to the dataset.""" - [self.add_variable(variable) for variable in variables] - return self - - def set_metadata(self, metadata: DatasetMetadata) -> DatasetBuilder: - """Add a metadata to the dataset.""" - self.metadata = metadata - return self - - def build(self) -> Dataset: - """Build the dataset model.""" - return Dataset(variables=self.variables, metadata=self.metadata) diff --git a/src/mdio/schemas/core.py b/src/mdio/schemas/core.py deleted file mode 100644 index 34a09066a..000000000 --- a/src/mdio/schemas/core.py +++ /dev/null @@ -1,49 +0,0 @@ -"""This module implements the core components of the MDIO schemas.""" - -from __future__ import annotations - -from typing import Any -from typing import get_type_hints - -from pydantic import BaseModel -from pydantic import ConfigDict -from pydantic.alias_generators import to_camel - - -def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: - """Extract Pydantic BaseModel fields. - - Args: - model: (Type) The model object for which the fields will be extracted. - - Returns: - A dictionary containing the fields of the model along with - their corresponding types and default values. - - Example: - >>> class MyModel(BaseModel): - ... name: str - ... age: int = 0 - ... - >>> model_fields(MyModel) - {'name': (str, ), 'age': (int, 0)} - """ - annotations = get_type_hints(model) - - fields = {} - for field_name, field in model.model_fields.items(): - fields[field_name] = (annotations[field_name], field) - - return fields - - -class StrictModel(BaseModel): - """A model with forbidden extras.""" - - model_config = ConfigDict(extra="forbid", populate_by_name=True) - - -class CamelCaseStrictModel(StrictModel): - """A model with forbidden extras and camel case aliases.""" - - model_config = ConfigDict(alias_generator=to_camel) diff --git a/src/mdio/schemas/metadata.py b/src/mdio/schemas/metadata.py deleted file mode 100644 index dc2d66767..000000000 --- a/src/mdio/schemas/metadata.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Metadata schemas and conventions.""" - -from typing import Any - -from pydantic import Field - -from mdio.schemas.chunk_grid import RectilinearChunkGrid -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.core import CamelCaseStrictModel - - -class ChunkGridMetadata(CamelCaseStrictModel): - """Definition of chunk grid.""" - - chunk_grid: RegularChunkGrid | RectilinearChunkGrid | None = Field( - default=None, - description="Chunk grid specification for the array.", - ) - - -class VersionedMetadataConvention(CamelCaseStrictModel): - """Data model for versioned metadata convention.""" - - -class UserAttributes(CamelCaseStrictModel): - """User defined attributes as key/value pairs.""" - - attributes: dict[str, Any] | None = Field( - default=None, - description="User defined attributes as key/value pairs.", - ) diff --git a/src/mdio/segy/_workers.py b/src/mdio/segy/_workers.py index de3257e7e..306821fe3 100644 --- a/src/mdio/segy/_workers.py +++ b/src/mdio/segy/_workers.py @@ -11,7 +11,7 @@ from segy import SegyFile from mdio.api.io import to_mdio -from mdio.schemas import ScalarType +from mdio.builder.schemas.dtype import ScalarType if TYPE_CHECKING: from segy.arrays import HeaderArray @@ -24,10 +24,10 @@ from xarray import Variable from zarr.core.config import config as zarr_config +from mdio.builder.schemas.v1.stats import CenteredBinHistogram +from mdio.builder.schemas.v1.stats import SummaryStatistics +from mdio.builder.xarray_builder import _get_fill_value from mdio.constants import UINT32_MAX -from mdio.schemas.v1.dataset_serializer import _get_fill_value -from mdio.schemas.v1.stats import CenteredBinHistogram -from mdio.schemas.v1.stats import SummaryStatistics class SegyFileArguments(TypedDict): diff --git a/src/mdio/segy/blocked_io.py b/src/mdio/segy/blocked_io.py index fbd7531ae..27141474c 100644 --- a/src/mdio/segy/blocked_io.py +++ b/src/mdio/segy/blocked_io.py @@ -18,10 +18,10 @@ from zarr import open_group as zarr_open_group from mdio.api.io import _normalize_storage_options +from mdio.builder.schemas.v1.stats import CenteredBinHistogram +from mdio.builder.schemas.v1.stats import SummaryStatistics from mdio.constants import ZarrFormat from mdio.core.indexing import ChunkIterator -from mdio.schemas.v1.stats import CenteredBinHistogram -from mdio.schemas.v1.stats import SummaryStatistics from mdio.segy._workers import trace_worker from mdio.segy.creation import SegyPartRecord from mdio.segy.creation import concat_files diff --git a/src/mdio/segy/helpers_segy.py b/src/mdio/segy/helpers_segy.py index a1d59df5c..a629591e5 100644 --- a/src/mdio/segy/helpers_segy.py +++ b/src/mdio/segy/helpers_segy.py @@ -4,7 +4,7 @@ from zarr.errors import ContainsGroupError -from mdio.core.exceptions import MDIOAlreadyExistsError +from mdio.exceptions import MDIOAlreadyExistsError if TYPE_CHECKING: from zarr import Group diff --git a/src/mdio/segy/utilities.py b/src/mdio/segy/utilities.py index 96a85d828..150653821 100644 --- a/src/mdio/segy/utilities.py +++ b/src/mdio/segy/utilities.py @@ -19,7 +19,7 @@ from segy import SegyFile from segy.arrays import HeaderArray - from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate + from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate logger = logging.getLogger(__name__) diff --git a/tests/integration/test_segy_import_export.py b/tests/integration/test_segy_import_export.py index f4b480267..2b3f154ef 100644 --- a/tests/integration/test_segy_import_export.py +++ b/tests/integration/test_segy_import_export.py @@ -22,9 +22,9 @@ from mdio import mdio_to_segy from mdio.api.io import open_mdio +from mdio.builder.template_registry import TemplateRegistry from mdio.converters.exceptions import GridTraceSparsityError from mdio.converters.segy import segy_to_mdio -from mdio.schemas.v1.templates.template_registry import TemplateRegistry from mdio.segy.geometry import StreamerShotGeometryType if TYPE_CHECKING: @@ -270,10 +270,10 @@ def test_variable_metadata(self, zarr_tmp: Path) -> None: expected_attrs = { "count": 97354860, "sum": -8594.551666259766, - "sum_squares": 40571291.6875, + "sumSquares": 40571291.6875, "min": -8.375323295593262, "max": 0.0, - "histogram": {"counts": [], "bin_centers": []}, + "histogram": {"counts": [], "binCenters": []}, } actual_attrs_json = json.loads(ds["amplitude"].attrs["statsV1"]) assert actual_attrs_json == expected_attrs @@ -352,6 +352,8 @@ class TestExport: def test_3d_export(self, segy_input: Path, zarr_tmp: Path, segy_export_tmp: Path) -> None: """Test 3D export to IBM and IEEE.""" + rng = np.random.default_rng(seed=1234) + spec = custom_teapot_dome_segy_spec(keep_unaltered=True) mdio_to_segy(segy_spec=spec, input_path=zarr_tmp, output_path=segy_export_tmp) @@ -364,7 +366,7 @@ def test_3d_export(self, segy_input: Path, zarr_tmp: Path, segy_export_tmp: Path out_segy = SegyFile(segy_export_tmp, spec=spec) num_traces = in_segy.num_traces - random_indices = np.random.choice(num_traces, 100, replace=False) + random_indices = rng.choice(num_traces, 100, replace=False) in_traces = in_segy.trace[random_indices] out_traces = out_segy.trace[random_indices] diff --git a/tests/integration/test_segy_import_export_masked.py b/tests/integration/test_segy_import_export_masked.py index 9290dae80..a1536a6a6 100644 --- a/tests/integration/test_segy_import_export_masked.py +++ b/tests/integration/test_segy_import_export_masked.py @@ -24,8 +24,8 @@ from mdio import mdio_to_segy from mdio.api.io import open_mdio +from mdio.builder.template_registry import TemplateRegistry from mdio.converters.segy import segy_to_mdio -from mdio.schemas.v1.templates.template_registry import TemplateRegistry if TYPE_CHECKING: from collections.abc import Iterable @@ -230,6 +230,8 @@ def mock_nd_segy(path: str, grid_conf: GridConfig, segy_factory_conf: SegyFactor def generate_selection_mask(selection_conf: SelectionMaskConfig, grid_conf: GridConfig) -> NDArray: """Generate a boolean selection mask for a masked export test.""" + rng = np.random.default_rng(seed=1234) + spatial_shape = [dim.size for dim in grid_conf.dims] mask_dims = selection_conf.mask_num_dims mask_dim_shape = [dim.size for dim in grid_conf.dims[:mask_dims]] @@ -238,7 +240,7 @@ def generate_selection_mask(selection_conf: SelectionMaskConfig, grid_conf: Grid cut_axes = np.zeros(shape=mask_dim_shape, dtype="bool") cut_size = int((1 - selection_conf.remove_frac) * cut_axes.size) - rand_idx = np.random.choice(cut_axes.size, size=cut_size, replace=False) + rand_idx = rng.choice(cut_axes.size, size=cut_size, replace=False) rand_idx = np.unravel_index(rand_idx, mask_dim_shape) selection_mask[rand_idx] = True @@ -367,6 +369,8 @@ def test_export(self, test_conf: MaskedExportConfig, export_masked_path: Path) - NOTE: This test must be executed after the 'test_import' and 'test_ingested_mdio' successfully complete. """ + rng = np.random.default_rng(seed=1234) + grid_conf, segy_factory_conf, segy_to_mdio_conf, _ = test_conf segy_path = export_masked_path / f"{grid_conf.name}.sgy" @@ -383,7 +387,7 @@ def test_export(self, test_conf: MaskedExportConfig, export_masked_path: Path) - actual_sgy = SegyFile(segy_rt_path) num_traces = expected_sgy.num_traces - random_indices = np.random.choice(num_traces, 10, replace=False) + random_indices = rng.choice(num_traces, 10, replace=False) expected_traces = expected_sgy.trace[random_indices] actual_traces = actual_sgy.trace[random_indices] diff --git a/tests/unit/v1/core/test_indexing.py b/tests/unit/test_indexing.py similarity index 100% rename from tests/unit/v1/core/test_indexing.py rename to tests/unit/test_indexing.py diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py deleted file mode 100644 index 3aefd7e2d..000000000 --- a/tests/unit/test_schema.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Test the schema for the v1 dataset.""" - -from mdio.schemas.v1 import Dataset as V1Dataset - -TEST_SCHEMA = { - "metadata": { - "name": "test_dataset", - "api_version": "1.0.0", - "created_on": "2023-01-01T00:00:00Z", - }, - "variables": [ - { - "name": "actual_variable", - "data_type": "float32", - "dimensions": ["dim0", "dim1"], - "compressor": {"name": "blosc", "clevel": 3}, - "coordinates": ["coord"], - "metadata": { - "chunk_grid": { - "name": "regular", - "configuration": {"chunk_shape": [10, 20]}, - }, - }, - }, - { - "name": "coord", - "data_type": "float32", - "dimensions": ["dim0", "dim1"], - "metadata": { - "chunk_grid": { - "name": "regular", - "configuration": {"chunk_shape": [10, 20]}, - }, - "units_v1": {"length": "m"}, - }, - }, - { - "name": "dim0", - "data_type": "int32", - "dimensions": [{"name": "dim0", "size": 100}], - }, - { - "name": "dim1", - "data_type": "int32", - "dimensions": [{"name": "dim1", "size": 200}], - }, - ], -} - - -def test_dataset_schema_validation() -> None: - """Test that the dataset schema validates correctly.""" - V1Dataset.model_validate(TEST_SCHEMA) diff --git a/tests/unit/v1/converters/test_type_converter.py b/tests/unit/test_type_converter.py similarity index 93% rename from tests/unit/v1/converters/test_type_converter.py rename to tests/unit/test_type_converter.py index 903e9eb28..e7b07f52a 100644 --- a/tests/unit/v1/converters/test_type_converter.py +++ b/tests/unit/test_type_converter.py @@ -3,16 +3,16 @@ import numpy as np import pytest +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredField +from mdio.builder.schemas.dtype import StructuredType from mdio.converters.type_converter import to_numpy_dtype from mdio.converters.type_converter import to_scalar_type from mdio.converters.type_converter import to_structured_type -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType @pytest.fixture -def supported_scalar_types_map() -> tuple[ScalarType, str]: +def supported_scalar_types_map() -> tuple[tuple[ScalarType, str], ...]: """Supported scalar types and their numpy equivalents.""" return ( (ScalarType.INT8, "int8"), diff --git a/tests/unit/v1/helpers.py b/tests/unit/v1/helpers.py index 536594a93..d52dd76f4 100644 --- a/tests/unit/v1/helpers.py +++ b/tests/unit/v1/helpers.py @@ -2,29 +2,27 @@ from pathlib import Path -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.chunk_grid import RegularChunkShape -from mdio.schemas.compressors import Blosc -from mdio.schemas.compressors import BloscCname -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType -from mdio.schemas.metadata import ChunkGridMetadata -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder -from mdio.schemas.v1.dataset_builder import _BuilderState -from mdio.schemas.v1.dataset_builder import _get_named_dimension -from mdio.schemas.v1.stats import CenteredBinHistogram -from mdio.schemas.v1.stats import StatisticsMetadata -from mdio.schemas.v1.stats import SummaryStatistics -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.units import SpeedUnitEnum -from mdio.schemas.v1.units import SpeedUnitModel -from mdio.schemas.v1.variable import Coordinate -from mdio.schemas.v1.variable import Variable +from mdio.builder.dataset_builder import MDIODatasetBuilder +from mdio.builder.dataset_builder import _BuilderState +from mdio.builder.dataset_builder import _get_named_dimension +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.chunk_grid import RegularChunkShape +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredField +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.stats import CenteredBinHistogram +from mdio.builder.schemas.v1.stats import SummaryStatistics +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import SpeedUnitEnum +from mdio.builder.schemas.v1.units import SpeedUnitModel +from mdio.builder.schemas.v1.variable import Coordinate +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.schemas.v1.variable import Variable +from mdio.builder.schemas.v1.variable import VariableMetadata def validate_builder(builder: MDIODatasetBuilder, state: _BuilderState, n_dims: int, n_coords: int, n_var: int) -> None: @@ -157,82 +155,57 @@ def make_seismic_poststack_3d_acceptance_dataset(dataset_name: str) -> Dataset: """Create in-memory Seismic PostStack 3D Acceptance dataset.""" ds = MDIODatasetBuilder( dataset_name, - attributes=UserAttributes( - attributes={ - "textHeader": [ - "C01 .......................... ", - "C02 .......................... ", - "C03 .......................... ", - ], - "foo": "bar", - } - ), + attributes={ + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... ", + ], + "foo": "bar", + }, ) - # Add dimensions + # Add dimensions and dimension coordinates + units_meter = CoordinateMetadata(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) + ds.add_dimension("inline", 256) ds.add_dimension("crossline", 512) ds.add_dimension("depth", 384) ds.add_coordinate("inline", dimensions=("inline",), data_type=ScalarType.UINT32) ds.add_coordinate("crossline", dimensions=("crossline",), data_type=ScalarType.UINT32) - ds.add_coordinate( - "depth", - dimensions=("depth",), - data_type=ScalarType.UINT32, - metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], - ) - # Add coordinates - ds.add_coordinate( - "cdp_x", - dimensions=("inline", "crossline"), - data_type=ScalarType.FLOAT32, - metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], - ) - ds.add_coordinate( - "cdp_y", - dimensions=("inline", "crossline"), - data_type=ScalarType.FLOAT32, - metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], - ) + ds.add_coordinate("depth", dimensions=("depth",), data_type=ScalarType.UINT32, metadata=units_meter) + # Add regular coordinates + ds.add_coordinate("cdp_x", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT32, metadata=units_meter) + ds.add_coordinate("cdp_y", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT32, metadata=units_meter) + + chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(128, 128, 128))) + common_metadata = VariableMetadata(chunk_grid=chunk_grid) # Add image variable + histogram = CenteredBinHistogram(bin_centers=[1, 2], counts=[10, 15]) + stats = SummaryStatistics(count=100, sum=1215.1, sum_squares=125.12, min=5.61, max=10.84, histogram=histogram) + image_metadata = common_metadata.model_copy(update={"stats_v1": stats, "attributes": {"fizz": "buzz"}}) ds.add_variable( name="image", dimensions=("inline", "crossline", "depth"), data_type=ScalarType.FLOAT32, compressor=Blosc(cname=BloscCname.zstd), # also default in zarr3 coordinates=("cdp_x", "cdp_y"), - metadata_info=[ - ChunkGridMetadata( - chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(128, 128, 128))) - ), - StatisticsMetadata( - stats_v1=SummaryStatistics( - count=100, - sum=1215.1, - sum_squares=125.12, - min=5.61, - max=10.84, - histogram=CenteredBinHistogram(bin_centers=[1, 2], counts=[10, 15]), - ) - ), - UserAttributes(attributes={"fizz": "buzz"}), - ], + metadata=image_metadata, ) # Add velocity variable + speed_unit = SpeedUnitModel(speed=SpeedUnitEnum.METER_PER_SECOND) + velocity_metadata = common_metadata.model_copy(update={"units_v1": speed_unit}) ds.add_variable( name="velocity", dimensions=("inline", "crossline", "depth"), data_type=ScalarType.FLOAT16, coordinates=("cdp_x", "cdp_y"), - metadata_info=[ - ChunkGridMetadata( - chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(128, 128, 128))) - ), - AllUnits(units_v1=SpeedUnitModel(speed=SpeedUnitEnum.METER_PER_SECOND)), - ], + metadata=velocity_metadata, ) # Add inline-optimized image variable + fast_il_chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(4, 512, 512))) + fast_inline_metadata = common_metadata.model_copy(update={"chunk_grid": fast_il_chunk_grid}) ds.add_variable( name="image_inline", long_name="inline optimized version of 3d_stack", @@ -240,11 +213,11 @@ def make_seismic_poststack_3d_acceptance_dataset(dataset_name: str) -> Dataset: data_type=ScalarType.FLOAT32, compressor=Blosc(cname=BloscCname.zstd), # also default in zarr3 coordinates=("cdp_x", "cdp_y"), - metadata_info=[ - ChunkGridMetadata(chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(4, 512, 512)))) - ], + metadata=fast_inline_metadata, ) # Add headers variable with structured dtype + header_chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(128, 128))) + header_metadata = VariableMetadata(chunk_grid=header_chunk_grid) ds.add_variable( name="image_headers", dimensions=("inline", "crossline"), @@ -257,8 +230,6 @@ def make_seismic_poststack_3d_acceptance_dataset(dataset_name: str) -> Dataset: StructuredField(name="some_scalar", format=ScalarType.FLOAT16), ] ), - metadata_info=[ - ChunkGridMetadata(chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(128, 128)))) - ], + metadata=header_metadata, ) return ds.build() diff --git a/tests/unit/v1/templates/conftest.py b/tests/unit/v1/templates/conftest.py index 1a3e5e097..5406074c0 100644 --- a/tests/unit/v1/templates/conftest.py +++ b/tests/unit/v1/templates/conftest.py @@ -3,9 +3,9 @@ # conftest.py import pytest -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredField +from mdio.builder.schemas.dtype import StructuredType @pytest.fixture(scope="session") diff --git a/tests/unit/v1/templates/test_seismic_2d_poststack.py b/tests/unit/v1/templates/test_seismic_2d_poststack.py index d5aa4e7ff..668451af0 100644 --- a/tests/unit/v1/templates/test_seismic_2d_poststack.py +++ b/tests/unit/v1/templates/test_seismic_2d_poststack.py @@ -2,19 +2,18 @@ from tests.unit.v1.helpers import validate_variable -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.templates.seismic_2d_poststack import Seismic2DPostStackTemplate -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.units import TimeUnitEnum -from mdio.schemas.v1.units import TimeUnitModel +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_2d_poststack import Seismic2DPostStackTemplate -_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) -_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: @@ -61,7 +60,7 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur coords=["cdp_x"], dtype=ScalarType.FLOAT64, ) - assert cdp_x.metadata.units_v1.length == LengthUnitEnum.METER + assert cdp_x.metadata.units_v1 == UNITS_METER cdp_y = validate_variable( dataset, @@ -70,7 +69,7 @@ def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: Structur coords=["cdp_y"], dtype=ScalarType.FLOAT64, ) - assert cdp_y.metadata.units_v1.length == LengthUnitEnum.METER + assert cdp_y.metadata.units_v1 == UNITS_METER class TestSeismic2DPostStackTemplate: @@ -94,7 +93,7 @@ def test_configuration_depth(self) -> None: # Verify dataset attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "2D", "ensembleType": "line", "processingStage": "post-stack", @@ -120,7 +119,7 @@ def test_configuration_time(self) -> None: # Verify dataset attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "2D", "ensembleType": "line", "processingStage": "post-stack", @@ -146,7 +145,7 @@ def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "Seismic 2D Depth Line 001", sizes=(2048, 4096), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) @@ -176,8 +175,8 @@ def test_build_dataset_time(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "Seismic 2D Time Line 001", - sizes=[2048, 4096], - horizontal_coord_unit=_UNIT_METER, + sizes=(2048, 4096), + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) diff --git a/tests/unit/v1/templates/test_seismic_3d_poststack.py b/tests/unit/v1/templates/test_seismic_3d_poststack.py index d4c314fed..2094f5d34 100644 --- a/tests/unit/v1/templates/test_seismic_3d_poststack.py +++ b/tests/unit/v1/templates/test_seismic_3d_poststack.py @@ -2,21 +2,20 @@ from tests.unit.v1.helpers import validate_variable -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.compressors import Blosc -from mdio.schemas.compressors import BloscCname -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.units import TimeUnitEnum -from mdio.schemas.v1.units import TimeUnitModel - -_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) -_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: @@ -111,7 +110,7 @@ def test_configuration_depth(self) -> None: # Verify dataset attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "3D", "ensembleType": "line", "processingStage": "post-stack", @@ -134,7 +133,7 @@ def test_configuration_time(self) -> None: assert t._dim_sizes == () assert t._horizontal_coord_unit is None - assert t._load_dataset_attributes().attributes == { + assert t._load_dataset_attributes() == { "surveyDimensionality": "3D", "ensembleType": "line", "processingStage": "post-stack", @@ -162,7 +161,7 @@ def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "Seismic 3D", sizes=(256, 512, 1024), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) @@ -195,7 +194,7 @@ def test_build_dataset_time(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "Seismic 3D", sizes=(256, 512, 1024), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py b/tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py index 067977b2f..8f1e1ea77 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py @@ -2,21 +2,20 @@ from tests.unit.v1.helpers import validate_variable -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.compressors import Blosc -from mdio.schemas.compressors import BloscCname -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.units import TimeUnitEnum -from mdio.schemas.v1.units import TimeUnitModel - -_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) -_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) def validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: @@ -120,7 +119,7 @@ def test_configuration_depth(self) -> None: # Verify prestack CDP attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "3D", "ensembleType": "cdp", "processingStage": "pre-stack", @@ -145,7 +144,7 @@ def test_configuration_time(self) -> None: # Verify prestack CDP attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "3D", "ensembleType": "cdp", "processingStage": "pre-stack", @@ -173,7 +172,7 @@ def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "North Sea 3D Prestack Depth", sizes=(512, 768, 36, 1536), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) @@ -206,7 +205,7 @@ def test_build_dataset_time(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "Santos Basin 3D Prestack", sizes=(512, 768, 36, 1536), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_coca.py b/tests/unit/v1/templates/test_seismic_3d_prestack_coca.py index bfaa14108..c3576ef62 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_coca.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_coca.py @@ -2,22 +2,21 @@ from tests.unit.v1.helpers import validate_variable -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.compressors import Blosc -from mdio.schemas.compressors import BloscCname -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.templates.seismic_3d_prestack_coca import Seismic3DPreStackCocaTemplate -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import AngleUnitEnum -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.units import TimeUnitEnum -from mdio.schemas.v1.units import TimeUnitModel - -_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) -_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import AngleUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_prestack_coca import Seismic3DPreStackCocaTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: @@ -129,7 +128,7 @@ def test_configuration_time(self) -> None: # Verify dataset attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "3D", "ensembleType": "cdp_coca", "processingStage": "pre-stack", @@ -143,7 +142,7 @@ def test_build_dataset_time(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "Permian Basin 3D CDP Coca Gathers", sizes=(256, 256, 100, 6, 2048), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_shot.py b/tests/unit/v1/templates/test_seismic_3d_prestack_shot.py index 4f26546a3..0948a1976 100644 --- a/tests/unit/v1/templates/test_seismic_3d_prestack_shot.py +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_shot.py @@ -2,21 +2,17 @@ from tests.unit.v1.helpers import validate_variable -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.compressors import Blosc -from mdio.schemas.compressors import BloscCname -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.units import TimeUnitEnum -from mdio.schemas.v1.units import TimeUnitModel - -_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) -_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: @@ -146,7 +142,7 @@ def test_configuration_depth(self) -> None: # Verify prestack shot attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack", @@ -171,7 +167,7 @@ def test_configuration_time(self) -> None: # Verify prestack shot attributes attrs = t._load_dataset_attributes() - assert attrs.attributes == { + assert attrs == { "surveyDimensionality": "3D", "ensembleType": "shot_point", "processingStage": "pre-stack", @@ -199,7 +195,7 @@ def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "Gulf of Mexico 3D Shot Depth", sizes=(256, 512, 24, 2048), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) @@ -232,7 +228,7 @@ def test_build_dataset_time(self, structured_headers: StructuredType) -> None: dataset = t.build_dataset( "North Sea 3D Shot Time", sizes=(256, 512, 24, 2048), - horizontal_coord_unit=_UNIT_METER, + horizontal_coord_unit=UNITS_METER, headers=structured_headers, ) diff --git a/tests/unit/v1/templates/test_seismic_templates.py b/tests/unit/v1/templates/test_seismic_templates.py index 2fdc6433d..3cdfeea14 100644 --- a/tests/unit/v1/templates/test_seismic_templates.py +++ b/tests/unit/v1/templates/test_seismic_templates.py @@ -2,15 +2,15 @@ # Import all concrete template classes from tests.unit.v1.helpers import validate_variable -from tests.unit.v1.templates.test_seismic_2d_poststack import _UNIT_METER +from tests.unit.v1.templates.test_seismic_2d_poststack import UNITS_METER -from mdio.schemas.dtype import ScalarType -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate -from mdio.schemas.v1.templates.seismic_2d_poststack import Seismic2DPostStackTemplate -from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate -from mdio.schemas.v1.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate -from mdio.schemas.v1.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate -from mdio.schemas.v1.templates.template_registry import TemplateRegistry +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.template_registry import TemplateRegistry +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.builder.templates.seismic_2d_poststack import Seismic2DPostStackTemplate +from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.builder.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate +from mdio.builder.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate class TestSeismicTemplates: @@ -36,7 +36,7 @@ def _name(self) -> str: assert t.name == "Velocity2DDepth" assert t.default_variable_name == "velocity" - dataset = t.build_dataset("Velocity 2D Depth Line 001", sizes=[2048, 4096], horizontal_coord_unit=_UNIT_METER) + dataset = t.build_dataset("Velocity 2D Depth Line 001", sizes=(2048, 4096), horizontal_coord_unit=UNITS_METER) # Verify velocity variable validate_variable( diff --git a/tests/unit/v1/templates/test_template_registry.py b/tests/unit/v1/templates/test_template_registry.py index 015df7e64..b1eea1b2a 100644 --- a/tests/unit/v1/templates/test_template_registry.py +++ b/tests/unit/v1/templates/test_template_registry.py @@ -5,13 +5,13 @@ import pytest -from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate -from mdio.schemas.v1.templates.template_registry import TemplateRegistry -from mdio.schemas.v1.templates.template_registry import get_template -from mdio.schemas.v1.templates.template_registry import get_template_registry -from mdio.schemas.v1.templates.template_registry import is_template_registered -from mdio.schemas.v1.templates.template_registry import list_templates -from mdio.schemas.v1.templates.template_registry import register_template +from mdio.builder.template_registry import TemplateRegistry +from mdio.builder.template_registry import get_template +from mdio.builder.template_registry import get_template_registry +from mdio.builder.template_registry import is_template_registered +from mdio.builder.template_registry import list_templates +from mdio.builder.template_registry import register_template +from mdio.builder.templates.abstract_dataset_template import AbstractDatasetTemplate EXPECTED_DEFAULT_TEMPLATE_NAMES = [ "PostStack2DTime", diff --git a/tests/unit/v1/test_dataset_builder_add_coordinate.py b/tests/unit/v1/test_dataset_builder_add_coordinate.py index 153687885..f5d3190d0 100644 --- a/tests/unit/v1/test_dataset_builder_add_coordinate.py +++ b/tests/unit/v1/test_dataset_builder_add_coordinate.py @@ -3,15 +3,14 @@ import pytest from zarr.codecs import BloscCname -from mdio.schemas.compressors import Blosc -from mdio.schemas.dtype import ScalarType -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder -from mdio.schemas.v1.dataset_builder import _BuilderState -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.variable import VariableMetadata +from mdio.builder.dataset_builder import MDIODatasetBuilder +from mdio.builder.dataset_builder import _BuilderState +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.schemas.v1.variable import VariableMetadata from .helpers import validate_builder from .helpers import validate_coordinate @@ -25,7 +24,7 @@ def test_add_coordinate() -> None: msg = "Must add at least one dimension before adding coordinates" with pytest.raises(ValueError, match=msg): - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + builder.add_coordinate("cdp", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT32) builder.add_dimension("inline", 100) builder.add_dimension("crossline", 200) @@ -33,21 +32,21 @@ def test_add_coordinate() -> None: # Validate required parameters bad_name = None with pytest.raises(ValueError, match="'name' must be a non-empty string"): - builder.add_coordinate(bad_name, dimensions=["speed"], data_type=ScalarType.FLOAT32) + builder.add_coordinate(bad_name, dimensions=("speed",), data_type=ScalarType.FLOAT32) with pytest.raises(ValueError, match="'name' must be a non-empty string"): - builder.add_coordinate("", dimensions=["speed"], data_type=ScalarType.FLOAT32) + builder.add_coordinate("", dimensions=("speed",), data_type=ScalarType.FLOAT32) with pytest.raises(ValueError, match="'dimensions' must be a non-empty list"): builder.add_coordinate("cdp_x", dimensions=None, data_type=ScalarType.FLOAT32) with pytest.raises(ValueError, match="'dimensions' must be a non-empty list"): - builder.add_coordinate("cdp_x", dimensions=[], data_type=ScalarType.FLOAT32) + builder.add_coordinate("cdp_x", dimensions=(), data_type=ScalarType.FLOAT32) # Add a variable using non-existent dimensions msg = "Pre-existing dimension named 'xline' is not found" with pytest.raises(ValueError, match=msg): - builder.add_coordinate("bad_cdp-x", dimensions=["inline", "xline"], data_type=ScalarType.FLOAT32) + builder.add_coordinate("bad_cdp-x", dimensions=("inline", "xline"), data_type=ScalarType.FLOAT32) # Validate state transition - builder.add_coordinate("cdp_x", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + builder.add_coordinate("cdp_x", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT32) validate_builder(builder, _BuilderState.HAS_COORDINATES, n_dims=2, n_coords=1, n_var=1) validate_variable( builder, @@ -60,7 +59,7 @@ def test_add_coordinate() -> None: # Adding coordinate with the same name twice msg = "Adding coordinate with the same name twice is not allowed" with pytest.raises(ValueError, match=msg): - builder.add_coordinate("cdp_x", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + builder.add_coordinate("cdp_x", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT32) def test_add_coordinate_with_defaults() -> None: @@ -70,7 +69,7 @@ def test_add_coordinate_with_defaults() -> None: builder.add_dimension("crossline", 200) # Add coordinate using defaults - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + builder.add_coordinate("cdp", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT32) validate_builder(builder, _BuilderState.HAS_COORDINATES, n_dims=2, n_coords=1, n_var=1) validate_coordinate(builder, name="cdp", dims=[("inline", 100), ("crossline", 200)], dtype=ScalarType.FLOAT32) v = validate_variable( @@ -92,16 +91,17 @@ def test_coordinate_with_full_parameters() -> None: builder.add_dimension("crossline", 200) # Add coordinate with all metadata + metadata = CoordinateMetadata( + units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT), + attributes={"MGA": 51, "UnitSystem": "Imperial"}, + ) builder.add_coordinate( "cdp", long_name="Common Depth Point", - dimensions=["inline", "crossline"], + dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT16, compressor=Blosc(cname=BloscCname.zstd), - metadata_info=[ - AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT)), - UserAttributes(attributes={"MGA": 51, "UnitSystem": "Imperial"}), - ], + metadata=metadata, ) validate_builder(builder, _BuilderState.HAS_COORDINATES, n_dims=2, n_coords=1, n_var=1) c = validate_coordinate(builder, name="cdp", dims=[("inline", 100), ("crossline", 200)], dtype=ScalarType.FLOAT16) diff --git a/tests/unit/v1/test_dataset_builder_add_dimension.py b/tests/unit/v1/test_dataset_builder_add_dimension.py index 112e7c9cf..eca60d3b1 100644 --- a/tests/unit/v1/test_dataset_builder_add_dimension.py +++ b/tests/unit/v1/test_dataset_builder_add_dimension.py @@ -2,9 +2,9 @@ import pytest -from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder -from mdio.schemas.v1.dataset_builder import _BuilderState -from mdio.schemas.v1.dataset_builder import _get_named_dimension +from mdio.builder.dataset_builder import MDIODatasetBuilder +from mdio.builder.dataset_builder import _BuilderState +from mdio.builder.dataset_builder import _get_named_dimension from .helpers import validate_builder diff --git a/tests/unit/v1/test_dataset_builder_add_variable.py b/tests/unit/v1/test_dataset_builder_add_variable.py index c896e1ccb..359ad29f7 100644 --- a/tests/unit/v1/test_dataset_builder_add_variable.py +++ b/tests/unit/v1/test_dataset_builder_add_variable.py @@ -2,22 +2,19 @@ import pytest -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.chunk_grid import RegularChunkShape -from mdio.schemas.compressors import Blosc -from mdio.schemas.compressors import BloscCname -from mdio.schemas.dtype import ScalarType -from mdio.schemas.metadata import ChunkGridMetadata -from mdio.schemas.metadata import UserAttributes -from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder -from mdio.schemas.v1.dataset_builder import _BuilderState -from mdio.schemas.v1.stats import CenteredBinHistogram -from mdio.schemas.v1.stats import StatisticsMetadata -from mdio.schemas.v1.stats import SummaryStatistics -from mdio.schemas.v1.units import AllUnits -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import LengthUnitModel -from mdio.schemas.v1.variable import VariableMetadata +from mdio.builder.dataset_builder import MDIODatasetBuilder +from mdio.builder.dataset_builder import _BuilderState +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.chunk_grid import RegularChunkShape +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.stats import CenteredBinHistogram +from mdio.builder.schemas.v1.stats import SummaryStatistics +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.schemas.v1.variable import VariableMetadata from .helpers import validate_builder from .helpers import validate_variable @@ -137,14 +134,10 @@ def test_add_variable_with_defaults() -> None: builder.add_dimension("crossline", 200) builder.add_dimension("depth", 300) # Add dimension coordinates + depth_metadata = CoordinateMetadata(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) builder.add_coordinate("inline", dimensions=("inline",), data_type=ScalarType.UINT32) builder.add_coordinate("crossline", dimensions=("crossline",), data_type=ScalarType.UINT32) - builder.add_coordinate( - "depth", - dimensions=("depth",), - data_type=ScalarType.UINT32, - metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], - ) + builder.add_coordinate("depth", dimensions=("depth",), data_type=ScalarType.UINT32, metadata=depth_metadata) # Add data variable using defaults builder.add_variable("ampl", dimensions=("inline", "crossline", "depth"), data_type=ScalarType.FLOAT32) @@ -178,6 +171,14 @@ def test_add_variable_full_parameters() -> None: builder.add_coordinate("cdp_y", dimensions=("inline", "crossline"), data_type=ScalarType.FLOAT64) # Add data variable with full parameters + histogram = CenteredBinHistogram(bin_centers=[1, 2], counts=[10, 15]) + stats = SummaryStatistics(count=100, sum=1215.1, sum_squares=125.12, min=5.61, max=10.84, histogram=histogram) + metadata = VariableMetadata( + units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT), + attributes={"MGA": 51, "UnitSystem": "Imperial"}, + chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(20,))), + stats_v1=stats, + ) builder.add_variable( "ampl", long_name="Amplitude (dimensionless)", @@ -185,21 +186,7 @@ def test_add_variable_full_parameters() -> None: data_type=ScalarType.FLOAT32, compressor=Blosc(cname=BloscCname.zstd), coordinates=("inline", "crossline", "depth", "cdp_x", "cdp_y"), - metadata_info=[ - AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT)), - UserAttributes(attributes={"MGA": 51, "UnitSystem": "Imperial"}), - ChunkGridMetadata(chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=[20]))), - StatisticsMetadata( - stats_v1=SummaryStatistics( - count=100, - sum=1215.1, - sumSquares=125.12, - min=5.61, - max=10.84, - histogram=CenteredBinHistogram(binCenters=[1, 2], counts=[10, 15]), - ) - ), - ], + metadata=metadata, ) validate_builder(builder, _BuilderState.HAS_VARIABLES, n_dims=3, n_coords=5, n_var=6) v = validate_variable( diff --git a/tests/unit/v1/test_dataset_builder_build.py b/tests/unit/v1/test_dataset_builder_build.py index 06ef2d620..090d7d546 100644 --- a/tests/unit/v1/test_dataset_builder_build.py +++ b/tests/unit/v1/test_dataset_builder_build.py @@ -1,13 +1,13 @@ """Tests the schema v1 dataset_builder.build() public API.""" -from mdio.schemas.compressors import BloscCname -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder -from mdio.schemas.v1.units import LengthUnitEnum -from mdio.schemas.v1.units import SpeedUnitEnum +from mdio.builder.dataset_builder import MDIODatasetBuilder +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredField +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import SpeedUnitEnum from .helpers import make_seismic_poststack_3d_acceptance_dataset from .helpers import validate_variable diff --git a/tests/unit/v1/test_dataset_builder_helpers.py b/tests/unit/v1/test_dataset_builder_helpers.py index 2b35fa841..f1c42d1fd 100644 --- a/tests/unit/v1/test_dataset_builder_helpers.py +++ b/tests/unit/v1/test_dataset_builder_helpers.py @@ -1,15 +1,9 @@ """Tests the schema v1 dataset_builder internal methods.""" -from datetime import UTC -from datetime import datetime - import pytest -from pydantic import Field -from mdio.schemas.core import StrictModel -from mdio.schemas.dimension import NamedDimension -from mdio.schemas.v1.dataset_builder import _get_named_dimension -from mdio.schemas.v1.dataset_builder import _to_dictionary +from mdio.builder.dataset_builder import _get_named_dimension +from mdio.builder.schemas.dimension import NamedDimension def test__get_named_dimension() -> None: @@ -27,60 +21,3 @@ def test__get_named_dimension() -> None: _get_named_dimension(dimensions, 42) with pytest.raises(ValueError, match="Dimension 'inline' found but size 2 does not match expected size 200"): _get_named_dimension(dimensions, "inline", size=200) - - -def test__to_dictionary() -> None: - """Test converting a dictionary, list or pydantic BaseModel to a dictionary.""" - # Validate inputs - with pytest.raises(TypeError, match="Expected BaseModel, dict or list, got datetime"): - _to_dictionary(datetime.now(UTC)) - - # Convert None to None - result = _to_dictionary(None) - assert result is None - - # Validate conversion of a Pydantic BaseModel - class SomeModel(StrictModel): - count: int = Field(default=None, description="Samples count") - samples: list[float] = Field(default_factory=list, description="Samples.") - created: datetime = Field(default_factory=datetime.now, description="Creation time with TZ info.") - - md = SomeModel(count=3, samples=[1.0, 2.0, 3.0], created=datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC)) - result = _to_dictionary(md) - assert isinstance(result, dict) - assert result == {"count": 3, "created": "2023-10-01T12:00:00Z", "samples": [1.0, 2.0, 3.0]} - - # Validate conversion of a dictionary - dct = { - "count": 3, - "samples": [1.0, 2.0, 3.0], - "created": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), - } - result = _to_dictionary(dct) - assert isinstance(result, dict) - assert result == { - "count": 3, - "samples": [1.0, 2.0, 3.0], - "created": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), - } - - # Validate conversion of a dictionary - lst = [ - None, - SomeModel(count=3, samples=[1.0, 2.0, 3.0], created=datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC)), - { - "count2": 3, - "samples2": [1.0, 2.0, 3.0], - "created2": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), - }, - ] - result = _to_dictionary(lst) - assert isinstance(result, dict) - assert result == { - "count": 3, - "samples": [1.0, 2.0, 3.0], - "created": "2023-10-01T12:00:00Z", - "count2": 3, - "samples2": [1.0, 2.0, 3.0], - "created2": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), - } diff --git a/tests/unit/v1/test_dataset_serializer.py b/tests/unit/v1/test_dataset_serializer.py index 3d1c2ded6..79d5ce40c 100644 --- a/tests/unit/v1/test_dataset_serializer.py +++ b/tests/unit/v1/test_dataset_serializer.py @@ -9,28 +9,27 @@ from zarr.codecs import BloscCodec from mdio import to_mdio +from mdio.builder.dataset_builder import MDIODatasetBuilder +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.chunk_grid import RegularChunkShape +from mdio.builder.schemas.dimension import NamedDimension +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredField +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.dataset import DatasetMetadata +from mdio.builder.schemas.v1.variable import Coordinate +from mdio.builder.schemas.v1.variable import Variable +from mdio.builder.schemas.v1.variable import VariableMetadata +from mdio.builder.xarray_builder import _convert_compressor +from mdio.builder.xarray_builder import _get_all_named_dimensions +from mdio.builder.xarray_builder import _get_coord_names +from mdio.builder.xarray_builder import _get_dimension_names +from mdio.builder.xarray_builder import _get_fill_value +from mdio.builder.xarray_builder import _get_zarr_chunks +from mdio.builder.xarray_builder import _get_zarr_shape +from mdio.builder.xarray_builder import to_xarray_dataset from mdio.constants import fill_value_map -from mdio.schemas.chunk_grid import RegularChunkGrid -from mdio.schemas.chunk_grid import RegularChunkShape -from mdio.schemas.dimension import NamedDimension -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType -from mdio.schemas.metadata import ChunkGridMetadata -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset import DatasetInfo -from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder -from mdio.schemas.v1.dataset_builder import _to_dictionary -from mdio.schemas.v1.dataset_serializer import _convert_compressor -from mdio.schemas.v1.dataset_serializer import _get_all_named_dimensions -from mdio.schemas.v1.dataset_serializer import _get_coord_names -from mdio.schemas.v1.dataset_serializer import _get_dimension_names -from mdio.schemas.v1.dataset_serializer import _get_fill_value -from mdio.schemas.v1.dataset_serializer import _get_zarr_chunks -from mdio.schemas.v1.dataset_serializer import _get_zarr_shape -from mdio.schemas.v1.dataset_serializer import to_xarray_dataset -from mdio.schemas.v1.variable import Coordinate -from mdio.schemas.v1.variable import Variable from .helpers import make_seismic_poststack_3d_acceptance_dataset from .helpers import output_path @@ -44,11 +43,11 @@ HAS_ZFPY = False -from mdio.schemas.compressors import ZFP as MDIO_ZFP -from mdio.schemas.compressors import Blosc as mdio_Blosc -from mdio.schemas.compressors import BloscCname -from mdio.schemas.compressors import BloscShuffle -from mdio.schemas.compressors import ZFPMode as mdio_ZFPMode +from mdio.builder.schemas.compressors import ZFP as MDIO_ZFP +from mdio.builder.schemas.compressors import Blosc as mdio_Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.compressors import BloscShuffle +from mdio.builder.schemas.compressors import ZFPMode as mdio_ZFPMode def test_get_all_named_dimensions() -> None: @@ -65,9 +64,7 @@ def test_get_all_named_dimensions() -> None: v3 = Variable(name="unresolved_dims", data_type=ScalarType.FLOAT32, dimensions=["x", "y", "z"]) ds = Dataset( variables=[v1, v2, v3], - metadata=_to_dictionary( - [DatasetInfo(name="test_dataset", api_version="1.0.0", created_on="2023-10-01T00:00:00Z")] - ), + metadata=DatasetMetadata(name="test_dataset", api_version="1.0.0", created_on="2023-10-01T00:00:00Z"), ) all_dims = _get_all_named_dimensions(ds) @@ -141,9 +138,7 @@ def test_get_zarr_shape() -> None: v2 = Variable(name="str var", data_type=ScalarType.FLOAT32, dimensions=["inline", "crossline", "depth"]) Dataset( variables=[v1, v2], - metadata=_to_dictionary( - [DatasetInfo(name="test_dataset", api_version="1.0.0", created_on="2023-10-01T00:00:00Z")] - ), + metadata=DatasetMetadata(name="test_dataset", api_version="1.0.0", created_on="2023-10-01T00:00:00Z"), ) assert _get_zarr_shape(v1, all_named_dims) == (100, 200, 300) @@ -154,17 +149,12 @@ def test_get_zarr_chunks() -> None: """Test for _get_zarr_chunks function.""" d1 = NamedDimension(name="inline", size=100) d2 = NamedDimension(name="crossline", size=200) - d3 = NamedDimension(name="depth", size=300) + d3 = NamedDimension(name="crossline", size=300) # Test 1: Variable with chunk defined in metadata - v = Variable( - name="seismic 3d var", - data_type=ScalarType.FLOAT32, - dimensions=[d1, d2, d3], - metadata=_to_dictionary( - ChunkGridMetadata(chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=[10, 20, 30]))) - ), - ) + chunk_grid = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=(10, 20, 30))) + metadata = VariableMetadata(chunk_grid=chunk_grid) + v = Variable(name="seismic 3d var", data_type=ScalarType.FLOAT32, dimensions=[d1, d2], metadata=metadata) assert _get_zarr_chunks(v, all_named_dims=[d1, d2, d3]) == (10, 20, 30) # Test 2: Variable with no chunks defined diff --git a/uv.lock b/uv.lock index 27f05a006..6ee410644 100644 --- a/uv.lock +++ b/uv.lock @@ -1164,11 +1164,11 @@ wheels = [ [[package]] name = "identify" -version = "2.6.13" +version = "2.6.14" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/ca/ffbabe3635bb839aa36b3a893c91a9b0d368cb4d8073e03a12896970af82/identify-2.6.13.tar.gz", hash = "sha256:da8d6c828e773620e13bfa86ea601c5a5310ba4bcd65edf378198b56a1f9fb32", size = 99243, upload-time = "2025-08-09T19:35:00.6Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/c4/62963f25a678f6a050fb0505a65e9e726996171e6dbe1547f79619eefb15/identify-2.6.14.tar.gz", hash = "sha256:663494103b4f717cb26921c52f8751363dc89db64364cd836a9bf1535f53cd6a", size = 99283, upload-time = "2025-09-06T19:30:52.938Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/ce/461b60a3ee109518c055953729bf9ed089a04db895d47e95444071dcdef2/identify-2.6.13-py2.py3-none-any.whl", hash = "sha256:60381139b3ae39447482ecc406944190f690d4a2997f2584062089848361b33b", size = 99153, upload-time = "2025-08-09T19:34:59.1Z" }, + { url = "https://files.pythonhosted.org/packages/e5/ae/2ad30f4652712c82f1c23423d79136fbce338932ad166d70c1efb86a5998/identify-2.6.14-py2.py3-none-any.whl", hash = "sha256:11a073da82212c6646b1f39bb20d4483bfb9543bd5566fec60053c4bb309bf2e", size = 99172, upload-time = "2025-09-06T19:30:51.759Z" }, ] [[package]] @@ -1680,7 +1680,6 @@ dependencies = [ { name = "pint" }, { name = "psutil" }, { name = "pydantic" }, - { name = "pydantic-settings" }, { name = "rich" }, { name = "segy" }, { name = "tqdm" }, @@ -1729,24 +1728,23 @@ docs = [ [package.metadata] requires-dist = [ - { name = "adlfs", marker = "extra == 'cloud'", specifier = ">=2024.12.0" }, - { name = "bokeh", marker = "extra == 'distributed'", specifier = ">=3.7.3,<4.0.0" }, + { name = "adlfs", marker = "extra == 'cloud'", specifier = ">=2025.8.0" }, + { name = "bokeh", marker = "extra == 'distributed'", specifier = ">=3.8.0,<4.0.0" }, { name = "click", specifier = ">=8.2.1,<9.0.0" }, { name = "click-params", specifier = ">=0.5.0,<0.6.0" }, { name = "dask", specifier = ">=2025.7.0" }, { name = "distributed", marker = "extra == 'distributed'", specifier = ">=2025.7.0" }, - { name = "fsspec", specifier = ">=2025.7.0" }, - { name = "gcsfs", marker = "extra == 'cloud'", specifier = ">=2025.7.0" }, - { name = "pint", specifier = ">=0.24.4,<0.25" }, + { name = "fsspec", specifier = ">=2025.9.0" }, + { name = "gcsfs", marker = "extra == 'cloud'", specifier = ">=2025.9.0" }, + { name = "pint", specifier = ">=0.25.0,<1.0.0" }, { name = "psutil", specifier = ">=7.0.0,<8.0.0" }, { name = "pydantic", specifier = ">=2.11.7,<3.0.0" }, - { name = "pydantic-settings", specifier = ">=2.10.1,<3.0.0" }, { name = "rich", specifier = ">=14.1.0,<15.0.0" }, - { name = "s3fs", marker = "extra == 'cloud'", specifier = ">=2025.7.0" }, + { name = "s3fs", marker = "extra == 'cloud'", specifier = ">=2025.9.0" }, { name = "segy", specifier = ">=0.4.2,<0.5.0" }, { name = "tqdm", specifier = ">=4.67.1,<5.0.0" }, { name = "universal-pathlib", specifier = ">=0.2.6" }, - { name = "xarray", specifier = ">=2025.7.1" }, + { name = "xarray", specifier = ">=2025.9.0" }, { name = "zarr", specifier = ">=3.1.2,<4.0.0" }, { name = "zfpy", marker = "extra == 'lossy'", specifier = ">=1.0.1,<2.0.0" }, ] @@ -1754,23 +1752,23 @@ provides-extras = ["cloud", "distributed", "lossy"] [package.metadata.requires-dev] dev = [ - { name = "coverage", extras = ["toml"], specifier = ">=7.9.1,<8" }, - { name = "mypy", specifier = ">=1.16.1,<2" }, - { name = "pre-commit", specifier = ">=4.2.0,<5" }, - { name = "pre-commit-hooks", specifier = ">=5.0.0,<6" }, + { name = "coverage", extras = ["toml"], specifier = ">=7.10.6,<8" }, + { name = "mypy", specifier = ">=1.17.1,<2" }, + { name = "pre-commit", specifier = ">=4.3.0,<5" }, + { name = "pre-commit-hooks", specifier = ">=6.0.0,<7" }, { name = "pygments", specifier = ">=2.19.2,<3" }, - { name = "pytest", specifier = ">=8.4.1,<9" }, + { name = "pytest", specifier = ">=8.4.2,<9" }, { name = "pytest-dependency", specifier = ">=0.6.0,<0.7" }, - { name = "ruff", specifier = ">=0.12.1" }, + { name = "ruff", specifier = ">=0.12.2" }, { name = "typeguard", specifier = ">=4.4.4,<5" }, { name = "xdoctest", extras = ["colors"], specifier = ">=1.2.0,<2" }, ] docs = [ - { name = "furo", specifier = ">=2024.8.6" }, + { name = "furo", specifier = ">=2025.7.19" }, { name = "linkify-it-py", specifier = ">=2.0.3" }, - { name = "myst-nb", specifier = ">=1.2.0" }, + { name = "myst-nb", specifier = ">=1.3.0" }, { name = "sphinx", specifier = ">=8.2.3,<9" }, - { name = "sphinx-autobuild", specifier = ">=2024.10.3" }, + { name = "sphinx-autobuild", specifier = ">=2025.8.25" }, { name = "sphinx-click", specifier = ">=6.0.0,<7" }, { name = "sphinx-copybutton", specifier = ">=0.5.2,<0.6" }, { name = "sphinx-design", specifier = ">=0.6.1,<0.7" }, @@ -2185,7 +2183,7 @@ wheels = [ [[package]] name = "pint" -version = "0.24.4" +version = "0.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "flexcache" }, @@ -2193,9 +2191,9 @@ dependencies = [ { name = "platformdirs" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/20/bb/52b15ddf7b7706ed591134a895dbf6e41c8348171fb635e655e0a4bbb0ea/pint-0.24.4.tar.gz", hash = "sha256:35275439b574837a6cd3020a5a4a73645eb125ce4152a73a2f126bf164b91b80", size = 342225, upload-time = "2024-11-07T16:29:46.061Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/cb/e6ffaf3d019e8501b1264dac529bf829ac2f1fe1d488cfcf67f1fccadacf/pint-0.25.tar.gz", hash = "sha256:22911a30d682ee0540d656571c19a7b1806ce00b2be88a16f67218108b7b8cc2", size = 253010, upload-time = "2025-08-15T19:49:12.72Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/16/bd2f5904557265882108dc2e04f18abc05ab0c2b7082ae9430091daf1d5c/Pint-0.24.4-py3-none-any.whl", hash = "sha256:aa54926c8772159fcf65f82cc0d34de6768c151b32ad1deb0331291c38fe7659", size = 302029, upload-time = "2024-11-07T16:29:43.976Z" }, + { url = "https://files.pythonhosted.org/packages/76/cc/c528311d798e22ec884b816e8aa2989e0f1f28cdc8e5969e2be5f10bce85/pint-0.25-py3-none-any.whl", hash = "sha256:cc20ae3dff010b9bbea41fb80c2de008f683cc83512cea73633d55aead80aa1e", size = 305462, upload-time = "2025-08-15T19:49:11.083Z" }, ] [[package]] @@ -2234,14 +2232,14 @@ wheels = [ [[package]] name = "pre-commit-hooks" -version = "5.0.0" +version = "6.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "ruamel-yaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/7d/3299241a753c738d114600c360d754550b28c285281dc6a5132c4ccfae65/pre_commit_hooks-5.0.0.tar.gz", hash = "sha256:10626959a9eaf602fbfc22bc61b6e75801436f82326bfcee82bb1f2fc4bc646e", size = 29747, upload-time = "2024-10-05T18:43:11.225Z" } +sdist = { url = "https://files.pythonhosted.org/packages/36/4d/93e63e48f8fd16d6c1e4cef5dabadcade4d1325c7fd6f29f075a4d2284f3/pre_commit_hooks-6.0.0.tar.gz", hash = "sha256:76d8370c006f5026cdd638a397a678d26dda735a3c88137e05885a020f824034", size = 28293, upload-time = "2025-08-09T19:25:04.6Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/29/db1d855a661c02dbde5cab3057969133fcc62e7a0c6393e48fe9d0e81679/pre_commit_hooks-5.0.0-py2.py3-none-any.whl", hash = "sha256:8d71cfb582c5c314a5498d94e0104b6567a8b93fb35903ea845c491f4e290a7a", size = 41245, upload-time = "2024-10-05T18:43:09.901Z" }, + { url = "https://files.pythonhosted.org/packages/12/46/eba9be9daa403fa94854ce16a458c29df9a01c6c047931c3d8be6016cd9a/pre_commit_hooks-6.0.0-py2.py3-none-any.whl", hash = "sha256:76161b76d321d2f8ee2a8e0b84c30ee8443e01376121fd1c90851e33e3bd7ee2", size = 41338, upload-time = "2025-08-09T19:25:03.513Z" }, ] [[package]]