From 99a702b21482c33b4cb72143da48ef2fdaf10ddd Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Sat, 9 Aug 2025 04:51:54 +0000 Subject: [PATCH 01/13] Fix integration import tests --- tests/integration/test_segy_import_export_masked.py | 4 +--- tests/integration/testing_helpers.py | 10 ++++++++-- tests/integration/v1/test_segy_to_mdio_v1.py | 9 ++------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_segy_import_export_masked.py b/tests/integration/test_segy_import_export_masked.py index 3e3320bc1..3baebabaa 100644 --- a/tests/integration/test_segy_import_export_masked.py +++ b/tests/integration/test_segy_import_export_masked.py @@ -316,9 +316,7 @@ def test_ingested_mdio(self, test_conf: MaskedExportConfig, export_masked_path: assert expected.start == actual_dim.values[0] live_mask = ds["trace_mask"].values - - expected_sizes = [d.size for d in expected_dims] - num_traces = np.prod(expected_sizes) + num_traces = np.prod(live_mask.shape) # Ensure live mask is full np.testing.assert_equal(live_mask.ravel(), True) diff --git a/tests/integration/testing_helpers.py b/tests/integration/testing_helpers.py index 0a33fbd69..68d47535b 100644 --- a/tests/integration/testing_helpers.py +++ b/tests/integration/testing_helpers.py @@ -1,12 +1,18 @@ """This module provides testing helpers for integration testing.""" -from collections.abc import Callable +from __future__ import annotations + +from typing import TYPE_CHECKING import numpy as np -import xarray as xr from segy.schema import HeaderField from segy.schema import SegySpec +if TYPE_CHECKING: + from collections.abc import Callable + + import xarray as xr + def customize_segy_specs( segy_spec: SegySpec, diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py index 58e324b4b..eee1fc82b 100644 --- a/tests/integration/v1/test_segy_to_mdio_v1.py +++ b/tests/integration/v1/test_segy_to_mdio_v1.py @@ -45,7 +45,7 @@ def _validate_variable( # noqa PLR0913 arr = dataset[name] assert shape == arr.shape assert set(dims) == set(arr.dims) - assert data_type == arr.dtype + # assert data_type == arr.dtype actual_values = actual_func(arr) assert np.array_equal(expected_values, actual_values) @@ -102,12 +102,7 @@ def test_segy_to_mdio_v1__f3() -> None: _validate_variable(ds, "cdp_y", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value) # Tests "headers" variable - # NOTE: segy_sec.trace.header.dtype includes offsets. - # Let's ignore them assuming there is no overlaps and gaps - dtype_names = segy_sec.trace.header.names - dtype_formats = segy_sec.trace.header.formats - dtype_conf = {"names": dtype_names, "formats": dtype_formats} - data_type = np.dtype(dtype_conf) + data_type = segy_sec.trace.header.dtype expected = np.array( [ [6201972, 6202222, 6202472], From 03251c0a50cb5921be317e8bc1541dd6a3c589ce Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Mon, 11 Aug 2025 06:05:20 +0000 Subject: [PATCH 02/13] mask_and_scale=False --- src/mdio/core/grid.py | 1 - tests/integration/v1/test_segy_to_mdio_v1.py | 10 +++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mdio/core/grid.py b/src/mdio/core/grid.py index ec66ee471..2436c9448 100644 --- a/src/mdio/core/grid.py +++ b/src/mdio/core/grid.py @@ -148,7 +148,6 @@ def build_map(self, index_headers: HeaderArray) -> None: # Assign trace indices trace_indices = np.arange(start, end, dtype=np.uint64) - self.map.vindex[live_dim_indices] = trace_indices self.live_mask.vindex[live_dim_indices] = True diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py index eee1fc82b..6e5becbb7 100644 --- a/tests/integration/v1/test_segy_to_mdio_v1.py +++ b/tests/integration/v1/test_segy_to_mdio_v1.py @@ -45,7 +45,7 @@ def _validate_variable( # noqa PLR0913 arr = dataset[name] assert shape == arr.shape assert set(dims) == set(arr.dims) - # assert data_type == arr.dtype + assert data_type == arr.dtype actual_values = actual_func(arr) assert np.array_equal(expected_values, actual_values) @@ -102,7 +102,12 @@ def test_segy_to_mdio_v1__f3() -> None: _validate_variable(ds, "cdp_y", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value) # Tests "headers" variable - data_type = segy_sec.trace.header.dtype + # NOTE: segy_sec.trace.header.dtype includes offsets. + # Let's ignore them assuming there is no overlaps and gaps + dtype_names = segy_sec.trace.header.names + dtype_formats = segy_sec.trace.header.formats + dtype_conf = {"names": dtype_names, "formats": dtype_formats} + data_type = np.dtype(dtype_conf) expected = np.array( [ [6201972, 6202222, 6202472], @@ -111,7 +116,6 @@ def test_segy_to_mdio_v1__f3() -> None: ], dtype=np.int32, ) - def get_actual_headers(arr: xr.DataArray) -> np.ndarray: cdp_x_headers = arr.values["cdp_x"] return cdp_x_headers[_slice_three_values(arr.shape, values_from_start=True)] From c40be9950387297cfa4a23873ff0d9d08ba7e6de Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Mon, 11 Aug 2025 14:22:17 +0000 Subject: [PATCH 03/13] pre-commit --- tests/integration/v1/test_segy_to_mdio_v1.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py index 6e5becbb7..58e324b4b 100644 --- a/tests/integration/v1/test_segy_to_mdio_v1.py +++ b/tests/integration/v1/test_segy_to_mdio_v1.py @@ -116,6 +116,7 @@ def test_segy_to_mdio_v1__f3() -> None: ], dtype=np.int32, ) + def get_actual_headers(arr: xr.DataArray) -> np.ndarray: cdp_x_headers = arr.values["cdp_x"] return cdp_x_headers[_slice_three_values(arr.shape, values_from_start=True)] From b7632efd76572faac751bf21819fe380caa8fdbe Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Mon, 11 Aug 2025 21:59:21 +0000 Subject: [PATCH 04/13] PR Review issues --- tests/integration/test_segy_import_export_masked.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_segy_import_export_masked.py b/tests/integration/test_segy_import_export_masked.py index 3baebabaa..3e3320bc1 100644 --- a/tests/integration/test_segy_import_export_masked.py +++ b/tests/integration/test_segy_import_export_masked.py @@ -316,7 +316,9 @@ def test_ingested_mdio(self, test_conf: MaskedExportConfig, export_masked_path: assert expected.start == actual_dim.values[0] live_mask = ds["trace_mask"].values - num_traces = np.prod(live_mask.shape) + + expected_sizes = [d.size for d in expected_dims] + num_traces = np.prod(expected_sizes) # Ensure live mask is full np.testing.assert_equal(live_mask.ravel(), True) From 9d0d40ba47fcc46c82ac41fe5916b59287cf7f55 Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Mon, 11 Aug 2025 22:03:00 +0000 Subject: [PATCH 05/13] serialize-text-and-binary-headers --- src/mdio/converters/segy.py | 26 +++++++ tests/integration/testing_data.py | 82 ++++++++++++++++++++ tests/integration/v1/test_segy_to_mdio_v1.py | 19 +++-- 3 files changed, 120 insertions(+), 7 deletions(-) create mode 100644 tests/integration/testing_data.py diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index 62c02fb5c..d6211c204 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -280,6 +280,30 @@ def _populate_coordinates( return dataset, drop_vars_delayed +def _add_text_binary_headers(dataset: xr_Dataset, segy_file: SegyFile) -> None: + text_header = segy_file.text_header.splitlines() + # Validate: + # text_header this should be a 40-items array of strings with width of 80 characters. + item_count = 40 + if len(text_header) != item_count: + err = f"Invalid text header count: expected {item_count}, got {len(text_header)}" + raise ValueError(err) + char_count = 80 + for i, line in enumerate(text_header): + if len(line) != char_count: + err = f"Invalid text header {i} line length: expected {char_count}, got {len(line)}" + raise ValueError(err) + ext_text_header = segy_file.ext_text_header + + # If using SegyFile.ext_text_header this should be a minimum of 40 elements and must + # capture all textual information (ensure text_header is a subset of ext_text_header). + if ext_text_header is not None: + for ext_hdr in ext_text_header: + text_header.append(ext_hdr.splitlines()) + dataset.metadata.attributes["text_header"] = text_header + dataset.metadata.attributes["binary_header"] = segy_file.binary_header.to_dict() + + def segy_to_mdio( segy_spec: SegySpec, mdio_template: AbstractDatasetTemplate, @@ -324,6 +348,8 @@ def segy_to_mdio( name=mdio_template.name, sizes=shape, horizontal_coord_unit=horizontal_unit, headers=headers ) + _add_text_binary_headers(dataset=mdio_ds, segy_file=segy_file) + xr_dataset: xr_Dataset = to_xarray_dataset(mdio_ds=mdio_ds) xr_dataset, drop_vars_delayed = _populate_coordinates( diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py new file mode 100644 index 000000000..31bcf04ba --- /dev/null +++ b/tests/integration/testing_data.py @@ -0,0 +1,82 @@ +"""Integration tests data +""" +from typing import Any + + +def text_header_f3() -> list[str]: + return [ + "C 1 Cropped F3 2-byte integer data set ", + "C 2 This file is a cropped copy of the F3 block in the Dutch North Sea ", + "C 3 This copy was obtained from ", + "C 4 https://www.opendtect.org/osr/Main/NetherlandsOffshoreF3BlockComplete4GB ", + "C 5 and was created by inclusively extracting: ", + "C 6 inlines: 111 .. 133 ", + "C 7 crosslines: 875 .. 892 ", + "C 8 samples: 0 .. 300 ", + "C 9 ", + "C10 This file is cropped and modified with the intention of unit testing segyio ", + "C11 ", + "C12 ", + "C13 ", + "C14 ", + "C15 ", + "C16 ", + "C17 ", + "C18 ", + "C19 ", + "C20 ", + "C21 ", + "C22 ", + "C23 ", + "C24 ", + "C25 ", + "C26 ", + "C27 ", + "C28 ", + "C29 ", + "C30 ", + "C31 ", + "C32 ", + "C33 ", + "C34 ", + "C35 ", + "C36 ", + "C37 ", + "C38 ", + "C39 ", + "C40 "] + +def binary_header_f3() -> dict[str, Any]: + return { + "job_id": 1, + "line_num": 0, + "reel_num": 0, + "data_traces_per_ensemble": 0, + "aux_traces_per_ensemble": 0, + "sample_interval": 4000, + "orig_sample_interval": 0, + "samples_per_trace": 75, + "orig_samples_per_trace": 0, + "data_sample_format": 3, + "ensemble_fold": 0, + "trace_sorting_code": 4, + "vertical_sum_code": 0, + "sweep_freq_start": 0, + "sweep_freq_end": 0, + "sweep_length": 0, + "sweep_type_code": 0, + "sweep_trace_num": 0, + "sweep_taper_start": 0, + "sweep_taper_end": 0, + "taper_type_code": 0, + "correlated_data_code": 0, + "binary_gain_code": 0, + "amp_recovery_code": 0, + "measurement_system_code": 1, + "impulse_polarity_code": 0, + "vibratory_polarity_code": 0, + "fixed_length_trace_flag": 1, + "num_extended_text_headers": 0, + "segy_revision_major": 1, + "segy_revision_minor": 0 + } \ No newline at end of file diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py index 58e324b4b..d87138cf5 100644 --- a/tests/integration/v1/test_segy_to_mdio_v1.py +++ b/tests/integration/v1/test_segy_to_mdio_v1.py @@ -8,6 +8,9 @@ import xarray as xr import zarr from segy.standards import get_segy_standard +from tests.integration.testing_data import binary_header_f3 +from tests.integration.testing_data import text_header_f3 +from tests.integration.testing_helpers import f3_segy_path from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding from mdio.converters.segy import segy_to_mdio @@ -50,20 +53,16 @@ def _validate_variable( # noqa PLR0913 assert np.array_equal(expected_values, actual_values) -def test_segy_to_mdio_v1__f3() -> None: +def test_tiny_3d_import_v1() -> None: """Test the SEG-Y to MDIO conversion for the f3 equinor/segyio dataset.""" - # The f3 dataset comes from - # equinor/segyio (https://github.com/equinor/segyio) project (GNU LGPL license) - # wget https://github.com/equinor/segyio/blob/main/test-data/f3.sgy - - pref_path = "/DATA/equinor-segyio/f3.sgy" + pref_path = f3_segy_path() mdio_path = f"{pref_path}_mdio_v1" segy_sec = get_segy_standard(1.0) segy_to_mdio( segy_spec=segy_sec, mdio_template=TemplateRegistry().get("PostStack3DTime"), - input_location=StorageLocation(pref_path), + input_location=StorageLocation(str(pref_path)), output_location=StorageLocation(mdio_path), overwrite=True, ) @@ -149,6 +148,12 @@ def get_actual_amplitudes(arr: xr.DataArray) -> np.ndarray: get_actual_amplitudes, ) + # Validate text header + assert ds.attrs["attributes"]["text_header"] == text_header_f3() + + # Validate binary header + assert ds.attrs["attributes"]["binary_header"] == binary_header_f3() + @pytest.mark.skip(reason="Bug reproducer for the issue 582") def test_bug_reproducer_structured_xr_to_zar() -> None: From 88c28657a8325818db2e8b6289acac772efcde64 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 12 Aug 2025 13:32:17 -0500 Subject: [PATCH 06/13] remove dev test data --- tests/integration/testing_data.py | 82 ------ tests/integration/v1/test_segy_to_mdio_v1.py | 258 ------------------- 2 files changed, 340 deletions(-) delete mode 100644 tests/integration/testing_data.py delete mode 100644 tests/integration/v1/test_segy_to_mdio_v1.py diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py deleted file mode 100644 index 31bcf04ba..000000000 --- a/tests/integration/testing_data.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Integration tests data -""" -from typing import Any - - -def text_header_f3() -> list[str]: - return [ - "C 1 Cropped F3 2-byte integer data set ", - "C 2 This file is a cropped copy of the F3 block in the Dutch North Sea ", - "C 3 This copy was obtained from ", - "C 4 https://www.opendtect.org/osr/Main/NetherlandsOffshoreF3BlockComplete4GB ", - "C 5 and was created by inclusively extracting: ", - "C 6 inlines: 111 .. 133 ", - "C 7 crosslines: 875 .. 892 ", - "C 8 samples: 0 .. 300 ", - "C 9 ", - "C10 This file is cropped and modified with the intention of unit testing segyio ", - "C11 ", - "C12 ", - "C13 ", - "C14 ", - "C15 ", - "C16 ", - "C17 ", - "C18 ", - "C19 ", - "C20 ", - "C21 ", - "C22 ", - "C23 ", - "C24 ", - "C25 ", - "C26 ", - "C27 ", - "C28 ", - "C29 ", - "C30 ", - "C31 ", - "C32 ", - "C33 ", - "C34 ", - "C35 ", - "C36 ", - "C37 ", - "C38 ", - "C39 ", - "C40 "] - -def binary_header_f3() -> dict[str, Any]: - return { - "job_id": 1, - "line_num": 0, - "reel_num": 0, - "data_traces_per_ensemble": 0, - "aux_traces_per_ensemble": 0, - "sample_interval": 4000, - "orig_sample_interval": 0, - "samples_per_trace": 75, - "orig_samples_per_trace": 0, - "data_sample_format": 3, - "ensemble_fold": 0, - "trace_sorting_code": 4, - "vertical_sum_code": 0, - "sweep_freq_start": 0, - "sweep_freq_end": 0, - "sweep_length": 0, - "sweep_type_code": 0, - "sweep_trace_num": 0, - "sweep_taper_start": 0, - "sweep_taper_end": 0, - "taper_type_code": 0, - "correlated_data_code": 0, - "binary_gain_code": 0, - "amp_recovery_code": 0, - "measurement_system_code": 1, - "impulse_polarity_code": 0, - "vibratory_polarity_code": 0, - "fixed_length_trace_flag": 1, - "num_extended_text_headers": 0, - "segy_revision_major": 1, - "segy_revision_minor": 0 - } \ No newline at end of file diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py deleted file mode 100644 index d87138cf5..000000000 --- a/tests/integration/v1/test_segy_to_mdio_v1.py +++ /dev/null @@ -1,258 +0,0 @@ -"""End to end testing for SEG-Y to MDIO conversion v1.""" - -from __future__ import annotations - -import numcodecs -import numpy as np -import pytest -import xarray as xr -import zarr -from segy.standards import get_segy_standard -from tests.integration.testing_data import binary_header_f3 -from tests.integration.testing_data import text_header_f3 -from tests.integration.testing_helpers import f3_segy_path -from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding - -from mdio.converters.segy import segy_to_mdio -from mdio.converters.type_converter import to_numpy_dtype -from mdio.core.storage_location import StorageLocation -from mdio.schemas.dtype import ScalarType -from mdio.schemas.dtype import StructuredField -from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.templates.template_registry import TemplateRegistry - - -def _slice_three_values(dims: tuple[int], values_from_start: bool) -> tuple[slice, ...]: - if values_from_start: - slices = tuple([slice(0, 3) for _ in range(len(dims))]) - else: - slices = tuple([slice(-3, None) for _ in range(len(dims))]) - return slices - - -def _get_actual_value(arr: xr.DataArray) -> np.ndarray: - return arr.values[_slice_three_values(arr.shape, values_from_start=True)] - - -def _validate_variable( # noqa PLR0913 - dataset: xr.Dataset, - name: str, - shape: list[int], - dims: list[str], - data_type: np.dtype, # noqa ARG001 - # expected_values: range | None, - # actual_func: Callable, - expected_values: np.ndarray, - actual_func: callable[[xr.DataArray], np.ndarray], -) -> None: - arr = dataset[name] - assert shape == arr.shape - assert set(dims) == set(arr.dims) - assert data_type == arr.dtype - actual_values = actual_func(arr) - assert np.array_equal(expected_values, actual_values) - - -def test_tiny_3d_import_v1() -> None: - """Test the SEG-Y to MDIO conversion for the f3 equinor/segyio dataset.""" - pref_path = f3_segy_path() - mdio_path = f"{pref_path}_mdio_v1" - - segy_sec = get_segy_standard(1.0) - segy_to_mdio( - segy_spec=segy_sec, - mdio_template=TemplateRegistry().get("PostStack3DTime"), - input_location=StorageLocation(str(pref_path)), - output_location=StorageLocation(mdio_path), - overwrite=True, - ) - - # Load Xarray dataset from the MDIO file - # NOTE: If mask_and_scale is not set, - # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(mdio_path, engine="zarr", mask_and_scale=False) - - # The template uses data_type=ScalarType.INT32 for dimensional coordinates - # Tests "inline" variable - expected = np.array([111, 112, 113]) - _validate_variable(ds, "inline", (23,), ["inline"], np.int32, expected, _get_actual_value) - - # Tests "crossline" variable - expected = np.array([875, 876, 877]) - _validate_variable(ds, "crossline", (18,), ["crossline"], np.int32, expected, _get_actual_value) - - # Tests "time" variable - expected = np.array([0, 4, 8]) - _validate_variable(ds, "time", (75,), ["time"], np.int32, expected, _get_actual_value) - - # The template uses data_type=ScalarType.FLOAT64 for non-dimensional coordinates - # Tests "cdp_x" variable - expected = np.array([[6201972, 6202222, 6202472], [6201965, 6202215, 6202465], [6201958, 6202208, 6202458]]) - _validate_variable(ds, "cdp_x", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value) - - # Tests "cdp_y" variable - expected = np.array( - [ - [60742329, 60742336, 60742343], - [60742579, 60742586, 60742593], - [60742828, 60742835, 60742842], - ] - ) - _validate_variable(ds, "cdp_y", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value) - - # Tests "headers" variable - # NOTE: segy_sec.trace.header.dtype includes offsets. - # Let's ignore them assuming there is no overlaps and gaps - dtype_names = segy_sec.trace.header.names - dtype_formats = segy_sec.trace.header.formats - dtype_conf = {"names": dtype_names, "formats": dtype_formats} - data_type = np.dtype(dtype_conf) - expected = np.array( - [ - [6201972, 6202222, 6202472], - [6201965, 6202215, 6202465], - [6201958, 6202208, 6202458], - ], - dtype=np.int32, - ) - - def get_actual_headers(arr: xr.DataArray) -> np.ndarray: - cdp_x_headers = arr.values["cdp_x"] - return cdp_x_headers[_slice_three_values(arr.shape, values_from_start=True)] - - _validate_variable(ds, "headers", (23, 18), ["inline", "crossline"], data_type, expected, get_actual_headers) - - # Tests "trace_mask" variable - expected = np.array([[True, True, True], [True, True, True], [True, True, True]]) - _validate_variable(ds, "trace_mask", (23, 18), ["inline", "crossline"], np.bool, expected, _get_actual_value) - - # Tests "amplitude" variable - expected = np.array( - [ - [[487.0, -1104.0, -1456.0], [-129.0, -1728.0, 445.0], [-1443.0, 741.0, 1458.0]], - [[2464.0, 3220.0, 1362.0], [686.0, 530.0, -282.0], [3599.0, 2486.0, 433.0]], - [[4018.0, 5159.0, 2087.0], [-81.0, -3039.0, -1850.0], [2898.0, 1060.0, -121.0]], - ] - ) - - def get_actual_amplitudes(arr: xr.DataArray) -> np.ndarray: - return arr.values[_slice_three_values(arr.shape, values_from_start=False)] - - _validate_variable( - ds, - "amplitude", - (23, 18, 75), - ["inline", "crossline", "time"], - np.float32, - expected, - get_actual_amplitudes, - ) - - # Validate text header - assert ds.attrs["attributes"]["text_header"] == text_header_f3() - - # Validate binary header - assert ds.attrs["attributes"]["binary_header"] == binary_header_f3() - - -@pytest.mark.skip(reason="Bug reproducer for the issue 582") -def test_bug_reproducer_structured_xr_to_zar() -> None: - """Bug reproducer for the issue https://github.com/TGSAI/mdio-python/issues/582. - - Will be removed in the when the bug is fixed - """ - shape = (4, 4, 2) - dim_names = ["inline", "crossline", "depth"] - chunks = (2, 2, 2) - # Pretend that we created a pydantic model from a template - structured_type = StructuredType( - fields=[ - StructuredField(name="cdp_x", format=ScalarType.INT32), - StructuredField(name="cdp_y", format=ScalarType.INT32), - StructuredField(name="elevation", format=ScalarType.FLOAT16), - StructuredField(name="some_scalar", format=ScalarType.FLOAT16), - ] - ) - - xr_dataset = xr.Dataset() - - # Add traces to the dataset, shape = (4, 4, 2) of floats - traces_zarr = zarr.zeros(shape=shape, dtype=np.float32, zarr_format=2) - traces_xr = xr.DataArray(traces_zarr, dims=dim_names) - traces_xr.encoding = { - "_FillValue": np.nan, - "chunks": chunks, - "chunk_key_encoding": V2ChunkKeyEncoding(separator="/").to_dict(), - "compressor": numcodecs.Blosc(cname="zstd", clevel=5, shuffle=1, blocksize=0), - } - xr_dataset["traces"] = traces_xr - - # Add headers to the dataset, shape = (4, 4) of structured type - data_type = to_numpy_dtype(structured_type) - - # Validate the conversion - assert data_type == np.dtype([("cdp_x", " trace_worker - - not_null = np.array( - [ - [True, False, False, False], - [False, True, False, False], - [False, False, True, False], - [False, False, False, True], - ] - ) - hdr = (11, 22, -33.0, 44.0) - headers = np.array([hdr, hdr, hdr, hdr], dtype=data_type) - trace = np.array([[100.0, 200.0], [300.0, 400.0], [500.0, 600.0], [700.0, 800.0]], dtype=np.float32) - - # Here is one iteration of it: - ds_to_write = xr_dataset[["traces", "headers"]] - # We do not have any coords to reset - # ds_to_write = ds_to_write.reset_coords() - - ds_to_write["headers"].data[not_null] = headers - ds_to_write["headers"].data[~not_null] = 0 - ds_to_write["traces"].data[not_null] = trace - - region = { - "inline": slice(0, 2, None), - "crossline": slice(0, 2, None), - "depth": slice(0, 2, None), - } - - sub_dataset = ds_to_write.isel(region) - sub_dataset.to_zarr( - store="/tmp/reproducer_xr.zarr", # noqa: S108 - region=region, - mode="r+", - write_empty_chunks=False, - zarr_format=2, - ) From 3a9e67291bcb65989fa450b6ec391f67ff6510e2 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 12 Aug 2025 13:38:19 -0500 Subject: [PATCH 07/13] add back whitespace --- src/mdio/core/grid.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mdio/core/grid.py b/src/mdio/core/grid.py index 2436c9448..ec66ee471 100644 --- a/src/mdio/core/grid.py +++ b/src/mdio/core/grid.py @@ -148,6 +148,7 @@ def build_map(self, index_headers: HeaderArray) -> None: # Assign trace indices trace_indices = np.arange(start, end, dtype=np.uint64) + self.map.vindex[live_dim_indices] = trace_indices self.live_mask.vindex[live_dim_indices] = True From 28d07b42583b014dab6ad2a7276127c8dfc7f5a1 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 12 Aug 2025 13:39:10 -0500 Subject: [PATCH 08/13] revert import changes --- tests/integration/testing_helpers.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/integration/testing_helpers.py b/tests/integration/testing_helpers.py index 68d47535b..0a33fbd69 100644 --- a/tests/integration/testing_helpers.py +++ b/tests/integration/testing_helpers.py @@ -1,18 +1,12 @@ """This module provides testing helpers for integration testing.""" -from __future__ import annotations - -from typing import TYPE_CHECKING +from collections.abc import Callable import numpy as np +import xarray as xr from segy.schema import HeaderField from segy.schema import SegySpec -if TYPE_CHECKING: - from collections.abc import Callable - - import xarray as xr - def customize_segy_specs( segy_spec: SegySpec, From 9a74f15724a231dd84548ad69679cf73b0831f63 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 12 Aug 2025 13:48:55 -0500 Subject: [PATCH 09/13] fix attribute initialization in `_add_text_binary_headers` --- src/mdio/converters/segy.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index d6211c204..8756ddd09 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -280,7 +280,7 @@ def _populate_coordinates( return dataset, drop_vars_delayed -def _add_text_binary_headers(dataset: xr_Dataset, segy_file: SegyFile) -> None: +def _add_text_binary_headers(dataset: Dataset, segy_file: SegyFile) -> None: text_header = segy_file.text_header.splitlines() # Validate: # text_header this should be a 40-items array of strings with width of 80 characters. @@ -300,8 +300,18 @@ def _add_text_binary_headers(dataset: xr_Dataset, segy_file: SegyFile) -> None: if ext_text_header is not None: for ext_hdr in ext_text_header: text_header.append(ext_hdr.splitlines()) - dataset.metadata.attributes["text_header"] = text_header - dataset.metadata.attributes["binary_header"] = segy_file.binary_header.to_dict() + + # Handle case where it may not have any metadata yet + if dataset.metadata.attributes is None: + dataset.attrs["attributes"] = {} + + # Update the attributes with the text and binary headers. + dataset.metadata.attributes.update( + { + "textHeader": text_header, + "binaryHeader": segy_file.binary_header.to_dict(), + } + ) def segy_to_mdio( From c6714cb229f24315501e874df0c8916af8161fa6 Mon Sep 17 00:00:00 2001 From: Dmitriy Repin Date: Tue, 12 Aug 2025 19:31:51 +0000 Subject: [PATCH 10/13] Add tests --- tests/conftest.py | 11 ++- tests/integration/test_segy_import_export.py | 44 ++++++----- tests/integration/testing_data.py | 82 ++++++++++++++++++++ 3 files changed, 111 insertions(+), 26 deletions(-) create mode 100644 tests/integration/testing_data.py diff --git a/tests/conftest.py b/tests/conftest.py index 6f4f2d1f3..b4a9804e2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,15 +3,11 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING +from pathlib import Path # noqa TC003 from urllib.request import urlretrieve import pytest -if TYPE_CHECKING: - from pathlib import Path - - # Suppress Dask's chunk balancing warning warnings.filterwarnings( "ignore", @@ -46,7 +42,10 @@ def segy_input(segy_input_uri: str, tmp_path_factory: pytest.TempPathFactory) -> @pytest.fixture(scope="module") def zarr_tmp(tmp_path_factory: pytest.TempPathFactory) -> Path: """Make a temp file for the output MDIO.""" - return tmp_path_factory.mktemp(r"mdio") + path = tmp_path_factory.mktemp(r"mdio") + # For debugging purposes to use a fixed path, uncomment the following: + # path = Path("./TMP/zarr_tmp") + return path # noqa RET504 @pytest.fixture(scope="module") diff --git a/tests/integration/test_segy_import_export.py b/tests/integration/test_segy_import_export.py index 40e10b489..50352d7c3 100644 --- a/tests/integration/test_segy_import_export.py +++ b/tests/integration/test_segy_import_export.py @@ -13,6 +13,8 @@ import xarray as xr from segy import SegyFile from segy.standards import get_segy_standard +from tests.integration.testing_data import binary_header_teapot_dome +from tests.integration.testing_data import text_header_teapot_dome from tests.integration.testing_helpers import customize_segy_specs from tests.integration.testing_helpers import get_inline_header_values from tests.integration.testing_helpers import get_values @@ -266,8 +268,8 @@ def test_3d_import( segy_to_mdio( segy_spec=segy_spec, mdio_template=TemplateRegistry().get("PostStack3DTime"), - input_location=StorageLocation(segy_input.__str__()), - output_location=StorageLocation(zarr_tmp.__str__()), + input_location=StorageLocation(str(segy_input)), + output_location=StorageLocation(str(zarr_tmp)), overwrite=True, ) @@ -278,11 +280,9 @@ class TestReader: def test_meta_dataset_read(self, zarr_tmp: Path) -> None: """Metadata reading tests.""" - path = zarr_tmp.__str__() - # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) expected_attrs = { "apiVersion": "1.0.0a1", "createdOn": "2025-08-06 16:21:54.747880+00:00", @@ -297,13 +297,25 @@ def test_meta_dataset_read(self, zarr_tmp: Path) -> None: else: assert actual_attrs_json[key] == value + attributes = ds.attrs["attributes"] + assert attributes is not None + + # Validate attributes provided by the template + assert attributes["surveyDimensionality"] == "3D" + assert attributes["ensembleType"] == "line" + assert attributes["processingStage"] == "post-stack" + + # Validate text header + assert attributes["textHeader"] == text_header_teapot_dome() + + # Validate binary header + assert attributes["binaryHeader"] == binary_header_teapot_dome() + def test_meta_variable_read(self, zarr_tmp: Path) -> None: """Metadata reading tests.""" - path = zarr_tmp.__str__() # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" - ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) expected_attrs = { "count": 97354860, "sum": -8594.551666259766, @@ -318,11 +330,9 @@ def test_meta_variable_read(self, zarr_tmp: Path) -> None: def test_grid(self, zarr_tmp: Path) -> None: """Test validating MDIO variables.""" # Load Xarray dataset from the MDIO file - path = zarr_tmp.__str__() - # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) # Note: in order to create the dataset we used the Time template, so the # sample dimension is called "time" @@ -366,22 +376,18 @@ def test_grid(self, zarr_tmp: Path) -> None: def test_inline(self, zarr_tmp: Path) -> None: """Read and compare every 75 inlines' mean and std. dev.""" - path = zarr_tmp.__str__() - # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) inlines = ds["amplitude"][::75, :, :] mean, std = inlines.mean(), inlines.std() npt.assert_allclose([mean, std], [1.0555277e-04, 6.0027051e-01]) def test_crossline(self, zarr_tmp: Path) -> None: """Read and compare every 75 crosslines' mean and std. dev.""" - path = zarr_tmp.__str__() - # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) xlines = ds["amplitude"][:, ::75, :] mean, std = xlines.mean(), xlines.std() @@ -389,11 +395,9 @@ def test_crossline(self, zarr_tmp: Path) -> None: def test_zslice(self, zarr_tmp: Path) -> None: """Read and compare every 225 z-slices' mean and std. dev.""" - path = zarr_tmp.__str__() - # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) slices = ds["amplitude"][:, :, ::225] mean, std = slices.mean(), slices.std() npt.assert_allclose([mean, std], [0.005236923, 0.61279935]) diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py new file mode 100644 index 000000000..4aeb6ef14 --- /dev/null +++ b/tests/integration/testing_data.py @@ -0,0 +1,82 @@ +"""Integration tests data""" + +from typing import Any + +def text_header_teapot_dome() -> list[str]: + return [ + "C 1 CLIENT: ROCKY MOUNTAIN OILFIELD TESTING CENTER ", + "C 2 PROJECT: NAVAL PETROLEUM RESERVE #3 (TEAPOT DOME); NATRONA COUNTY, WYOMING ", + "C 3 LINE: 3D ", + "C 4 ", + "C 5 THIS IS THE FILTERED POST STACK MIGRATION ", + "C 6 ", + "C 7 INLINE 1, XLINE 1: X COORDINATE: 788937 Y COORDINATE: 938845 ", + "C 8 INLINE 1, XLINE 188: X COORDINATE: 809501 Y COORDINATE: 939333 ", + "C 9 INLINE 188, XLINE 1: X COORDINATE: 788039 Y COORDINATE: 976674 ", + "C10 INLINE NUMBER: MIN: 1 MAX: 345 TOTAL: 345 ", + "C11 CROSSLINE NUMBER: MIN: 1 MAX: 188 TOTAL: 188 ", + "C12 TOTAL NUMBER OF CDPS: 64860 BIN DIMENSION: 110' X 110' ", + "C13 ", + "C14 ", + "C15 ", + "C16 ", + "C17 ", + "C18 ", + "C19 GENERAL SEGY INFORMATION ", + "C20 RECORD LENGHT (MS): 3000 ", + "C21 SAMPLE RATE (MS): 2.0 ", + "C22 DATA FORMAT: 4 BYTE IBM FLOATING POINT ", + "C23 BYTES 13- 16: CROSSLINE NUMBER (TRACE) ", + "C24 BYTES 17- 20: INLINE NUMBER (LINE) ", + "C25 BYTES 81- 84: CDP_X COORD ", + "C26 BYTES 85- 88: CDP_Y COORD ", + "C27 BYTES 181-184: INLINE NUMBER (LINE) ", + "C28 BYTES 185-188: CROSSLINE NUMBER (TRACE) ", + "C29 BYTES 189-192: CDP_X COORD ", + "C30 BYTES 193-196: CDP_Y COORD ", + "C31 ", + "C32 ", + "C33 ", + "C34 ", + "C35 ", + "C36 Processed by: Excel Geophysical Services, Inc. ", + "C37 8301 East Prentice Ave. Ste. 402 ", + "C38 Englewood, Colorado 80111 ", + "C39 (voice) 303.694.9629 (fax) 303.771.1646 ", + "C40 END EBCDIC " + ] + +def binary_header_teapot_dome() -> dict[str, Any]: + return { + "job_id": 9999, + "line_num": 9999, + "reel_num": 1, + "data_traces_per_ensemble": 188, + "aux_traces_per_ensemble": 0, + "sample_interval": 2000, + "orig_sample_interval": 0, + "samples_per_trace": 1501, + "orig_samples_per_trace": 1501, + "data_sample_format": 1, + "ensemble_fold": 57, + "trace_sorting_code": 4, + "vertical_sum_code": 1, + "sweep_freq_start": 0, + "sweep_freq_end": 0, + "sweep_length": 0, + "sweep_type_code": 0, + "sweep_trace_num": 0, + "sweep_taper_start": 0, + "sweep_taper_end": 0, + "taper_type_code": 0, + "correlated_data_code": 2, + "binary_gain_code": 1, + "amp_recovery_code": 4, + "measurement_system_code": 2, + "impulse_polarity_code": 1, + "vibratory_polarity_code": 0, + "fixed_length_trace_flag": 0, + "num_extended_text_headers": 0, + "segy_revision_major": 0, + "segy_revision_minor": 0 + } \ No newline at end of file From 024bed71b38564b23a14cef648661dc5518e2573 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 12 Aug 2025 15:04:06 -0500 Subject: [PATCH 11/13] refactor: improve type annotations and docstrings in test utilities --- tests/conftest.py | 7 +-- tests/integration/testing_data.py | 74 ++++++++++++++++--------------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b4a9804e2..e884cc84d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,11 +3,14 @@ from __future__ import annotations import warnings -from pathlib import Path # noqa TC003 +from typing import TYPE_CHECKING from urllib.request import urlretrieve import pytest +if TYPE_CHECKING: + from pathlib import Path + # Suppress Dask's chunk balancing warning warnings.filterwarnings( "ignore", @@ -43,8 +46,6 @@ def segy_input(segy_input_uri: str, tmp_path_factory: pytest.TempPathFactory) -> def zarr_tmp(tmp_path_factory: pytest.TempPathFactory) -> Path: """Make a temp file for the output MDIO.""" path = tmp_path_factory.mktemp(r"mdio") - # For debugging purposes to use a fixed path, uncomment the following: - # path = Path("./TMP/zarr_tmp") return path # noqa RET504 diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py index 4aeb6ef14..3696a7cf1 100644 --- a/tests/integration/testing_data.py +++ b/tests/integration/testing_data.py @@ -1,8 +1,8 @@ -"""Integration tests data""" +"""Integration tests data for teapot dome SEG-Y.""" -from typing import Any def text_header_teapot_dome() -> list[str]: + """Return the teapot dome expected text header.""" return [ "C 1 CLIENT: ROCKY MOUNTAIN OILFIELD TESTING CENTER ", "C 2 PROJECT: NAVAL PETROLEUM RESERVE #3 (TEAPOT DOME); NATRONA COUNTY, WYOMING ", @@ -43,40 +43,42 @@ def text_header_teapot_dome() -> list[str]: "C37 8301 East Prentice Ave. Ste. 402 ", "C38 Englewood, Colorado 80111 ", "C39 (voice) 303.694.9629 (fax) 303.771.1646 ", - "C40 END EBCDIC " + "C40 END EBCDIC ", ] -def binary_header_teapot_dome() -> dict[str, Any]: + +def binary_header_teapot_dome() -> dict[str, int]: + """Return the teapot dome expected binary header.""" return { - "job_id": 9999, - "line_num": 9999, - "reel_num": 1, - "data_traces_per_ensemble": 188, - "aux_traces_per_ensemble": 0, - "sample_interval": 2000, - "orig_sample_interval": 0, - "samples_per_trace": 1501, - "orig_samples_per_trace": 1501, - "data_sample_format": 1, - "ensemble_fold": 57, - "trace_sorting_code": 4, - "vertical_sum_code": 1, - "sweep_freq_start": 0, - "sweep_freq_end": 0, - "sweep_length": 0, - "sweep_type_code": 0, - "sweep_trace_num": 0, - "sweep_taper_start": 0, - "sweep_taper_end": 0, - "taper_type_code": 0, - "correlated_data_code": 2, - "binary_gain_code": 1, - "amp_recovery_code": 4, - "measurement_system_code": 2, - "impulse_polarity_code": 1, - "vibratory_polarity_code": 0, - "fixed_length_trace_flag": 0, - "num_extended_text_headers": 0, - "segy_revision_major": 0, - "segy_revision_minor": 0 - } \ No newline at end of file + "job_id": 9999, + "line_num": 9999, + "reel_num": 1, + "data_traces_per_ensemble": 188, + "aux_traces_per_ensemble": 0, + "sample_interval": 2000, + "orig_sample_interval": 0, + "samples_per_trace": 1501, + "orig_samples_per_trace": 1501, + "data_sample_format": 1, + "ensemble_fold": 57, + "trace_sorting_code": 4, + "vertical_sum_code": 1, + "sweep_freq_start": 0, + "sweep_freq_end": 0, + "sweep_length": 0, + "sweep_type_code": 0, + "sweep_trace_num": 0, + "sweep_taper_start": 0, + "sweep_taper_end": 0, + "taper_type_code": 0, + "correlated_data_code": 2, + "binary_gain_code": 1, + "amp_recovery_code": 4, + "measurement_system_code": 2, + "impulse_polarity_code": 1, + "vibratory_polarity_code": 0, + "fixed_length_trace_flag": 0, + "num_extended_text_headers": 0, + "segy_revision_major": 0, + "segy_revision_minor": 0, + } From 3bb2d4094a1217aba640211fff3b62af15ba43de Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 12 Aug 2025 15:05:34 -0500 Subject: [PATCH 12/13] fix formatting --- tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index e884cc84d..6f4f2d1f3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from pathlib import Path + # Suppress Dask's chunk balancing warning warnings.filterwarnings( "ignore", @@ -45,8 +46,7 @@ def segy_input(segy_input_uri: str, tmp_path_factory: pytest.TempPathFactory) -> @pytest.fixture(scope="module") def zarr_tmp(tmp_path_factory: pytest.TempPathFactory) -> Path: """Make a temp file for the output MDIO.""" - path = tmp_path_factory.mktemp(r"mdio") - return path # noqa RET504 + return tmp_path_factory.mktemp(r"mdio") @pytest.fixture(scope="module") From e04bb7c2da46e08db3fd3a804de4331ac8771ce9 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 12 Aug 2025 15:47:52 -0500 Subject: [PATCH 13/13] remove redundant `str()` casting in `xr.open_dataset` calls --- tests/integration/test_segy_import_export.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_segy_import_export.py b/tests/integration/test_segy_import_export.py index 50352d7c3..e80028df1 100644 --- a/tests/integration/test_segy_import_export.py +++ b/tests/integration/test_segy_import_export.py @@ -282,7 +282,7 @@ def test_meta_dataset_read(self, zarr_tmp: Path) -> None: """Metadata reading tests.""" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False) expected_attrs = { "apiVersion": "1.0.0a1", "createdOn": "2025-08-06 16:21:54.747880+00:00", @@ -315,7 +315,7 @@ def test_meta_variable_read(self, zarr_tmp: Path) -> None: """Metadata reading tests.""" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False) expected_attrs = { "count": 97354860, "sum": -8594.551666259766, @@ -332,7 +332,7 @@ def test_grid(self, zarr_tmp: Path) -> None: # Load Xarray dataset from the MDIO file # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False) # Note: in order to create the dataset we used the Time template, so the # sample dimension is called "time" @@ -378,7 +378,7 @@ def test_inline(self, zarr_tmp: Path) -> None: """Read and compare every 75 inlines' mean and std. dev.""" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False) inlines = ds["amplitude"][::75, :, :] mean, std = inlines.mean(), inlines.std() npt.assert_allclose([mean, std], [1.0555277e-04, 6.0027051e-01]) @@ -387,7 +387,7 @@ def test_crossline(self, zarr_tmp: Path) -> None: """Read and compare every 75 crosslines' mean and std. dev.""" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False) xlines = ds["amplitude"][:, ::75, :] mean, std = xlines.mean(), xlines.std() @@ -397,7 +397,7 @@ def test_zslice(self, zarr_tmp: Path) -> None: """Read and compare every 225 z-slices' mean and std. dev.""" # NOTE: If mask_and_scale is not set, # Xarray will convert int to float and replace _FillValue with NaN - ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False) + ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False) slices = ds["amplitude"][:, :, ::225] mean, std = slices.mean(), slices.std() npt.assert_allclose([mean, std], [0.005236923, 0.61279935])