From 99a702b21482c33b4cb72143da48ef2fdaf10ddd Mon Sep 17 00:00:00 2001
From: Dmitriy Repin <drepin@hotmail.com>
Date: Sat, 9 Aug 2025 04:51:54 +0000
Subject: [PATCH 01/13] Fix integration import tests

---
 tests/integration/test_segy_import_export_masked.py |  4 +---
 tests/integration/testing_helpers.py                | 10 ++++++++--
 tests/integration/v1/test_segy_to_mdio_v1.py        |  9 ++-------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/tests/integration/test_segy_import_export_masked.py b/tests/integration/test_segy_import_export_masked.py
index 3e3320bc1..3baebabaa 100644
--- a/tests/integration/test_segy_import_export_masked.py
+++ b/tests/integration/test_segy_import_export_masked.py
@@ -316,9 +316,7 @@ def test_ingested_mdio(self, test_conf: MaskedExportConfig, export_masked_path:
             assert expected.start == actual_dim.values[0]
 
         live_mask = ds["trace_mask"].values
-
-        expected_sizes = [d.size for d in expected_dims]
-        num_traces = np.prod(expected_sizes)
+        num_traces = np.prod(live_mask.shape)
 
         # Ensure live mask is full
         np.testing.assert_equal(live_mask.ravel(), True)
diff --git a/tests/integration/testing_helpers.py b/tests/integration/testing_helpers.py
index 0a33fbd69..68d47535b 100644
--- a/tests/integration/testing_helpers.py
+++ b/tests/integration/testing_helpers.py
@@ -1,12 +1,18 @@
 """This module provides testing helpers for integration testing."""
 
-from collections.abc import Callable
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import numpy as np
-import xarray as xr
 from segy.schema import HeaderField
 from segy.schema import SegySpec
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    import xarray as xr
+
 
 def customize_segy_specs(
     segy_spec: SegySpec,
diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py
index 58e324b4b..eee1fc82b 100644
--- a/tests/integration/v1/test_segy_to_mdio_v1.py
+++ b/tests/integration/v1/test_segy_to_mdio_v1.py
@@ -45,7 +45,7 @@ def _validate_variable(  # noqa PLR0913
     arr = dataset[name]
     assert shape == arr.shape
     assert set(dims) == set(arr.dims)
-    assert data_type == arr.dtype
+    # assert data_type == arr.dtype
     actual_values = actual_func(arr)
     assert np.array_equal(expected_values, actual_values)
 
@@ -102,12 +102,7 @@ def test_segy_to_mdio_v1__f3() -> None:
     _validate_variable(ds, "cdp_y", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value)
 
     # Tests "headers" variable
-    # NOTE: segy_sec.trace.header.dtype includes offsets.
-    # Let's ignore them assuming there is no overlaps and gaps
-    dtype_names = segy_sec.trace.header.names
-    dtype_formats = segy_sec.trace.header.formats
-    dtype_conf = {"names": dtype_names, "formats": dtype_formats}
-    data_type = np.dtype(dtype_conf)
+    data_type = segy_sec.trace.header.dtype
     expected = np.array(
         [
             [6201972, 6202222, 6202472],

From 03251c0a50cb5921be317e8bc1541dd6a3c589ce Mon Sep 17 00:00:00 2001
From: Dmitriy Repin <drepin@hotmail.com>
Date: Mon, 11 Aug 2025 06:05:20 +0000
Subject: [PATCH 02/13] mask_and_scale=False

---
 src/mdio/core/grid.py                        |  1 -
 tests/integration/v1/test_segy_to_mdio_v1.py | 10 +++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/mdio/core/grid.py b/src/mdio/core/grid.py
index ec66ee471..2436c9448 100644
--- a/src/mdio/core/grid.py
+++ b/src/mdio/core/grid.py
@@ -148,7 +148,6 @@ def build_map(self, index_headers: HeaderArray) -> None:
 
             # Assign trace indices
             trace_indices = np.arange(start, end, dtype=np.uint64)
-
             self.map.vindex[live_dim_indices] = trace_indices
             self.live_mask.vindex[live_dim_indices] = True
 
diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py
index eee1fc82b..6e5becbb7 100644
--- a/tests/integration/v1/test_segy_to_mdio_v1.py
+++ b/tests/integration/v1/test_segy_to_mdio_v1.py
@@ -45,7 +45,7 @@ def _validate_variable(  # noqa PLR0913
     arr = dataset[name]
     assert shape == arr.shape
     assert set(dims) == set(arr.dims)
-    # assert data_type == arr.dtype
+    assert data_type == arr.dtype
     actual_values = actual_func(arr)
     assert np.array_equal(expected_values, actual_values)
 
@@ -102,7 +102,12 @@ def test_segy_to_mdio_v1__f3() -> None:
     _validate_variable(ds, "cdp_y", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value)
 
     # Tests "headers" variable
-    data_type = segy_sec.trace.header.dtype
+    # NOTE: segy_sec.trace.header.dtype includes offsets.
+    # Let's ignore them assuming there is no overlaps and gaps
+    dtype_names = segy_sec.trace.header.names
+    dtype_formats = segy_sec.trace.header.formats
+    dtype_conf = {"names": dtype_names, "formats": dtype_formats}
+    data_type = np.dtype(dtype_conf)
     expected = np.array(
         [
             [6201972, 6202222, 6202472],
@@ -111,7 +116,6 @@ def test_segy_to_mdio_v1__f3() -> None:
         ],
         dtype=np.int32,
     )
-
     def get_actual_headers(arr: xr.DataArray) -> np.ndarray:
         cdp_x_headers = arr.values["cdp_x"]
         return cdp_x_headers[_slice_three_values(arr.shape, values_from_start=True)]

From c40be9950387297cfa4a23873ff0d9d08ba7e6de Mon Sep 17 00:00:00 2001
From: Dmitriy Repin <drepin@hotmail.com>
Date: Mon, 11 Aug 2025 14:22:17 +0000
Subject: [PATCH 03/13] pre-commit

---
 tests/integration/v1/test_segy_to_mdio_v1.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py
index 6e5becbb7..58e324b4b 100644
--- a/tests/integration/v1/test_segy_to_mdio_v1.py
+++ b/tests/integration/v1/test_segy_to_mdio_v1.py
@@ -116,6 +116,7 @@ def test_segy_to_mdio_v1__f3() -> None:
         ],
         dtype=np.int32,
     )
+
     def get_actual_headers(arr: xr.DataArray) -> np.ndarray:
         cdp_x_headers = arr.values["cdp_x"]
         return cdp_x_headers[_slice_three_values(arr.shape, values_from_start=True)]

From b7632efd76572faac751bf21819fe380caa8fdbe Mon Sep 17 00:00:00 2001
From: Dmitriy Repin <drepin@hotmail.com>
Date: Mon, 11 Aug 2025 21:59:21 +0000
Subject: [PATCH 04/13] PR Review issues

---
 tests/integration/test_segy_import_export_masked.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_segy_import_export_masked.py b/tests/integration/test_segy_import_export_masked.py
index 3baebabaa..3e3320bc1 100644
--- a/tests/integration/test_segy_import_export_masked.py
+++ b/tests/integration/test_segy_import_export_masked.py
@@ -316,7 +316,9 @@ def test_ingested_mdio(self, test_conf: MaskedExportConfig, export_masked_path:
             assert expected.start == actual_dim.values[0]
 
         live_mask = ds["trace_mask"].values
-        num_traces = np.prod(live_mask.shape)
+
+        expected_sizes = [d.size for d in expected_dims]
+        num_traces = np.prod(expected_sizes)
 
         # Ensure live mask is full
         np.testing.assert_equal(live_mask.ravel(), True)

From 9d0d40ba47fcc46c82ac41fe5916b59287cf7f55 Mon Sep 17 00:00:00 2001
From: Dmitriy Repin <drepin@hotmail.com>
Date: Mon, 11 Aug 2025 22:03:00 +0000
Subject: [PATCH 05/13] serialize-text-and-binary-headers

---
 src/mdio/converters/segy.py                  | 26 +++++++
 tests/integration/testing_data.py            | 82 ++++++++++++++++++++
 tests/integration/v1/test_segy_to_mdio_v1.py | 19 +++--
 3 files changed, 120 insertions(+), 7 deletions(-)
 create mode 100644 tests/integration/testing_data.py

diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py
index 62c02fb5c..d6211c204 100644
--- a/src/mdio/converters/segy.py
+++ b/src/mdio/converters/segy.py
@@ -280,6 +280,30 @@ def _populate_coordinates(
     return dataset, drop_vars_delayed
 
 
+def _add_text_binary_headers(dataset: xr_Dataset, segy_file: SegyFile) -> None:
+    text_header = segy_file.text_header.splitlines()
+    # Validate:
+    # text_header this should be a 40-items array of strings with width of 80 characters.
+    item_count = 40
+    if len(text_header) != item_count:
+        err = f"Invalid text header count: expected {item_count}, got {len(text_header)}"
+        raise ValueError(err)
+    char_count = 80
+    for i, line in enumerate(text_header):
+        if len(line) != char_count:
+            err = f"Invalid text header {i} line length: expected {char_count}, got {len(line)}"
+            raise ValueError(err)
+    ext_text_header = segy_file.ext_text_header
+
+    # If using SegyFile.ext_text_header this should be a minimum of 40 elements and must
+    # capture all textual information (ensure text_header is a subset of ext_text_header).
+    if ext_text_header is not None:
+        for ext_hdr in ext_text_header:
+            text_header.append(ext_hdr.splitlines())
+    dataset.metadata.attributes["text_header"] = text_header
+    dataset.metadata.attributes["binary_header"] = segy_file.binary_header.to_dict()
+
+
 def segy_to_mdio(
     segy_spec: SegySpec,
     mdio_template: AbstractDatasetTemplate,
@@ -324,6 +348,8 @@ def segy_to_mdio(
         name=mdio_template.name, sizes=shape, horizontal_coord_unit=horizontal_unit, headers=headers
     )
 
+    _add_text_binary_headers(dataset=mdio_ds, segy_file=segy_file)
+
     xr_dataset: xr_Dataset = to_xarray_dataset(mdio_ds=mdio_ds)
 
     xr_dataset, drop_vars_delayed = _populate_coordinates(
diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py
new file mode 100644
index 000000000..31bcf04ba
--- /dev/null
+++ b/tests/integration/testing_data.py
@@ -0,0 +1,82 @@
+"""Integration tests data
+"""
+from typing import Any
+
+
+def text_header_f3() -> list[str]:
+    return [
+        "C 1 Cropped F3 2-byte integer data set                                          ",
+        "C 2 This file is a cropped copy of the F3 block in the Dutch North Sea          ",
+        "C 3 This copy was obtained from                                                 ",
+        "C 4 https://www.opendtect.org/osr/Main/NetherlandsOffshoreF3BlockComplete4GB    ",
+        "C 5 and was created by inclusively extracting:                                  ",
+        "C 6     inlines:    111 .. 133                                                  ",
+        "C 7     crosslines: 875 .. 892                                                  ",
+        "C 8     samples:      0 .. 300                                                  ",
+        "C 9                                                                             ",
+        "C10 This file is cropped and modified with the intention of unit testing segyio ",
+        "C11                                                                             ",
+        "C12                                                                             ",
+        "C13                                                                             ",
+        "C14                                                                             ",
+        "C15                                                                             ",
+        "C16                                                                             ",
+        "C17                                                                             ",
+        "C18                                                                             ",
+        "C19                                                                             ",
+        "C20                                                                             ",
+        "C21                                                                             ",
+        "C22                                                                             ",
+        "C23                                                                             ",
+        "C24                                                                             ",
+        "C25                                                                             ",
+        "C26                                                                             ",
+        "C27                                                                             ",
+        "C28                                                                             ",
+        "C29                                                                             ",
+        "C30                                                                             ",
+        "C31                                                                             ",
+        "C32                                                                             ",
+        "C33                                                                             ",
+        "C34                                                                             ",
+        "C35                                                                             ",
+        "C36                                                                             ",
+        "C37                                                                             ",
+        "C38                                                                             ",
+        "C39                                                                             ",
+        "C40                                                                             "]
+
+def binary_header_f3() -> dict[str, Any]:
+    return {
+        "job_id": 1,
+        "line_num": 0,
+        "reel_num": 0,
+        "data_traces_per_ensemble": 0,
+        "aux_traces_per_ensemble": 0,
+        "sample_interval": 4000,
+        "orig_sample_interval": 0,
+        "samples_per_trace": 75,
+        "orig_samples_per_trace": 0,
+        "data_sample_format": 3,
+        "ensemble_fold": 0,
+        "trace_sorting_code": 4,
+        "vertical_sum_code": 0,
+        "sweep_freq_start": 0,
+        "sweep_freq_end": 0,
+        "sweep_length": 0,
+        "sweep_type_code": 0,
+        "sweep_trace_num": 0,
+        "sweep_taper_start": 0,
+        "sweep_taper_end": 0,
+        "taper_type_code": 0,
+        "correlated_data_code": 0,
+        "binary_gain_code": 0,
+        "amp_recovery_code": 0,
+        "measurement_system_code": 1,
+        "impulse_polarity_code": 0,
+        "vibratory_polarity_code": 0,
+        "fixed_length_trace_flag": 1,
+        "num_extended_text_headers": 0,
+        "segy_revision_major": 1,
+        "segy_revision_minor": 0
+    }
\ No newline at end of file
diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py
index 58e324b4b..d87138cf5 100644
--- a/tests/integration/v1/test_segy_to_mdio_v1.py
+++ b/tests/integration/v1/test_segy_to_mdio_v1.py
@@ -8,6 +8,9 @@
 import xarray as xr
 import zarr
 from segy.standards import get_segy_standard
+from tests.integration.testing_data import binary_header_f3
+from tests.integration.testing_data import text_header_f3
+from tests.integration.testing_helpers import f3_segy_path
 from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding
 
 from mdio.converters.segy import segy_to_mdio
@@ -50,20 +53,16 @@ def _validate_variable(  # noqa PLR0913
     assert np.array_equal(expected_values, actual_values)
 
 
-def test_segy_to_mdio_v1__f3() -> None:
+def test_tiny_3d_import_v1() -> None:
     """Test the SEG-Y to MDIO conversion for the f3 equinor/segyio dataset."""
-    # The f3 dataset comes from
-    # equinor/segyio (https://github.com/equinor/segyio) project (GNU LGPL license)
-    # wget https://github.com/equinor/segyio/blob/main/test-data/f3.sgy
-
-    pref_path = "/DATA/equinor-segyio/f3.sgy"
+    pref_path = f3_segy_path()
     mdio_path = f"{pref_path}_mdio_v1"
 
     segy_sec = get_segy_standard(1.0)
     segy_to_mdio(
         segy_spec=segy_sec,
         mdio_template=TemplateRegistry().get("PostStack3DTime"),
-        input_location=StorageLocation(pref_path),
+        input_location=StorageLocation(str(pref_path)),
         output_location=StorageLocation(mdio_path),
         overwrite=True,
     )
@@ -149,6 +148,12 @@ def get_actual_amplitudes(arr: xr.DataArray) -> np.ndarray:
         get_actual_amplitudes,
     )
 
+    # Validate text header
+    assert ds.attrs["attributes"]["text_header"] == text_header_f3()
+
+    # Validate binary header
+    assert ds.attrs["attributes"]["binary_header"] == binary_header_f3()
+
 
 @pytest.mark.skip(reason="Bug reproducer for the issue 582")
 def test_bug_reproducer_structured_xr_to_zar() -> None:

From 88c28657a8325818db2e8b6289acac772efcde64 Mon Sep 17 00:00:00 2001
From: Altay Sansal <tasansal@users.noreply.github.com>
Date: Tue, 12 Aug 2025 13:32:17 -0500
Subject: [PATCH 06/13] remove dev test data

---
 tests/integration/testing_data.py            |  82 ------
 tests/integration/v1/test_segy_to_mdio_v1.py | 258 -------------------
 2 files changed, 340 deletions(-)
 delete mode 100644 tests/integration/testing_data.py
 delete mode 100644 tests/integration/v1/test_segy_to_mdio_v1.py

diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py
deleted file mode 100644
index 31bcf04ba..000000000
--- a/tests/integration/testing_data.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""Integration tests data
-"""
-from typing import Any
-
-
-def text_header_f3() -> list[str]:
-    return [
-        "C 1 Cropped F3 2-byte integer data set                                          ",
-        "C 2 This file is a cropped copy of the F3 block in the Dutch North Sea          ",
-        "C 3 This copy was obtained from                                                 ",
-        "C 4 https://www.opendtect.org/osr/Main/NetherlandsOffshoreF3BlockComplete4GB    ",
-        "C 5 and was created by inclusively extracting:                                  ",
-        "C 6     inlines:    111 .. 133                                                  ",
-        "C 7     crosslines: 875 .. 892                                                  ",
-        "C 8     samples:      0 .. 300                                                  ",
-        "C 9                                                                             ",
-        "C10 This file is cropped and modified with the intention of unit testing segyio ",
-        "C11                                                                             ",
-        "C12                                                                             ",
-        "C13                                                                             ",
-        "C14                                                                             ",
-        "C15                                                                             ",
-        "C16                                                                             ",
-        "C17                                                                             ",
-        "C18                                                                             ",
-        "C19                                                                             ",
-        "C20                                                                             ",
-        "C21                                                                             ",
-        "C22                                                                             ",
-        "C23                                                                             ",
-        "C24                                                                             ",
-        "C25                                                                             ",
-        "C26                                                                             ",
-        "C27                                                                             ",
-        "C28                                                                             ",
-        "C29                                                                             ",
-        "C30                                                                             ",
-        "C31                                                                             ",
-        "C32                                                                             ",
-        "C33                                                                             ",
-        "C34                                                                             ",
-        "C35                                                                             ",
-        "C36                                                                             ",
-        "C37                                                                             ",
-        "C38                                                                             ",
-        "C39                                                                             ",
-        "C40                                                                             "]
-
-def binary_header_f3() -> dict[str, Any]:
-    return {
-        "job_id": 1,
-        "line_num": 0,
-        "reel_num": 0,
-        "data_traces_per_ensemble": 0,
-        "aux_traces_per_ensemble": 0,
-        "sample_interval": 4000,
-        "orig_sample_interval": 0,
-        "samples_per_trace": 75,
-        "orig_samples_per_trace": 0,
-        "data_sample_format": 3,
-        "ensemble_fold": 0,
-        "trace_sorting_code": 4,
-        "vertical_sum_code": 0,
-        "sweep_freq_start": 0,
-        "sweep_freq_end": 0,
-        "sweep_length": 0,
-        "sweep_type_code": 0,
-        "sweep_trace_num": 0,
-        "sweep_taper_start": 0,
-        "sweep_taper_end": 0,
-        "taper_type_code": 0,
-        "correlated_data_code": 0,
-        "binary_gain_code": 0,
-        "amp_recovery_code": 0,
-        "measurement_system_code": 1,
-        "impulse_polarity_code": 0,
-        "vibratory_polarity_code": 0,
-        "fixed_length_trace_flag": 1,
-        "num_extended_text_headers": 0,
-        "segy_revision_major": 1,
-        "segy_revision_minor": 0
-    }
\ No newline at end of file
diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py
deleted file mode 100644
index d87138cf5..000000000
--- a/tests/integration/v1/test_segy_to_mdio_v1.py
+++ /dev/null
@@ -1,258 +0,0 @@
-"""End to end testing for SEG-Y to MDIO conversion v1."""
-
-from __future__ import annotations
-
-import numcodecs
-import numpy as np
-import pytest
-import xarray as xr
-import zarr
-from segy.standards import get_segy_standard
-from tests.integration.testing_data import binary_header_f3
-from tests.integration.testing_data import text_header_f3
-from tests.integration.testing_helpers import f3_segy_path
-from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding
-
-from mdio.converters.segy import segy_to_mdio
-from mdio.converters.type_converter import to_numpy_dtype
-from mdio.core.storage_location import StorageLocation
-from mdio.schemas.dtype import ScalarType
-from mdio.schemas.dtype import StructuredField
-from mdio.schemas.dtype import StructuredType
-from mdio.schemas.v1.templates.template_registry import TemplateRegistry
-
-
-def _slice_three_values(dims: tuple[int], values_from_start: bool) -> tuple[slice, ...]:
-    if values_from_start:
-        slices = tuple([slice(0, 3) for _ in range(len(dims))])
-    else:
-        slices = tuple([slice(-3, None) for _ in range(len(dims))])
-    return slices
-
-
-def _get_actual_value(arr: xr.DataArray) -> np.ndarray:
-    return arr.values[_slice_three_values(arr.shape, values_from_start=True)]
-
-
-def _validate_variable(  # noqa PLR0913
-    dataset: xr.Dataset,
-    name: str,
-    shape: list[int],
-    dims: list[str],
-    data_type: np.dtype,  # noqa ARG001
-    # expected_values: range | None,
-    # actual_func: Callable,
-    expected_values: np.ndarray,
-    actual_func: callable[[xr.DataArray], np.ndarray],
-) -> None:
-    arr = dataset[name]
-    assert shape == arr.shape
-    assert set(dims) == set(arr.dims)
-    assert data_type == arr.dtype
-    actual_values = actual_func(arr)
-    assert np.array_equal(expected_values, actual_values)
-
-
-def test_tiny_3d_import_v1() -> None:
-    """Test the SEG-Y to MDIO conversion for the f3 equinor/segyio dataset."""
-    pref_path = f3_segy_path()
-    mdio_path = f"{pref_path}_mdio_v1"
-
-    segy_sec = get_segy_standard(1.0)
-    segy_to_mdio(
-        segy_spec=segy_sec,
-        mdio_template=TemplateRegistry().get("PostStack3DTime"),
-        input_location=StorageLocation(str(pref_path)),
-        output_location=StorageLocation(mdio_path),
-        overwrite=True,
-    )
-
-    # Load Xarray dataset from the MDIO file
-    # NOTE: If mask_and_scale is not set,
-    # Xarray will convert int to float and replace _FillValue with NaN
-    ds = xr.open_dataset(mdio_path, engine="zarr", mask_and_scale=False)
-
-    # The template uses data_type=ScalarType.INT32 for dimensional coordinates
-    # Tests "inline" variable
-    expected = np.array([111, 112, 113])
-    _validate_variable(ds, "inline", (23,), ["inline"], np.int32, expected, _get_actual_value)
-
-    # Tests "crossline" variable
-    expected = np.array([875, 876, 877])
-    _validate_variable(ds, "crossline", (18,), ["crossline"], np.int32, expected, _get_actual_value)
-
-    # Tests "time" variable
-    expected = np.array([0, 4, 8])
-    _validate_variable(ds, "time", (75,), ["time"], np.int32, expected, _get_actual_value)
-
-    # The template uses data_type=ScalarType.FLOAT64 for non-dimensional coordinates
-    # Tests "cdp_x" variable
-    expected = np.array([[6201972, 6202222, 6202472], [6201965, 6202215, 6202465], [6201958, 6202208, 6202458]])
-    _validate_variable(ds, "cdp_x", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value)
-
-    # Tests "cdp_y" variable
-    expected = np.array(
-        [
-            [60742329, 60742336, 60742343],
-            [60742579, 60742586, 60742593],
-            [60742828, 60742835, 60742842],
-        ]
-    )
-    _validate_variable(ds, "cdp_y", (23, 18), ["inline", "crossline"], np.float64, expected, _get_actual_value)
-
-    # Tests "headers" variable
-    # NOTE: segy_sec.trace.header.dtype includes offsets.
-    # Let's ignore them assuming there is no overlaps and gaps
-    dtype_names = segy_sec.trace.header.names
-    dtype_formats = segy_sec.trace.header.formats
-    dtype_conf = {"names": dtype_names, "formats": dtype_formats}
-    data_type = np.dtype(dtype_conf)
-    expected = np.array(
-        [
-            [6201972, 6202222, 6202472],
-            [6201965, 6202215, 6202465],
-            [6201958, 6202208, 6202458],
-        ],
-        dtype=np.int32,
-    )
-
-    def get_actual_headers(arr: xr.DataArray) -> np.ndarray:
-        cdp_x_headers = arr.values["cdp_x"]
-        return cdp_x_headers[_slice_three_values(arr.shape, values_from_start=True)]
-
-    _validate_variable(ds, "headers", (23, 18), ["inline", "crossline"], data_type, expected, get_actual_headers)
-
-    # Tests "trace_mask" variable
-    expected = np.array([[True, True, True], [True, True, True], [True, True, True]])
-    _validate_variable(ds, "trace_mask", (23, 18), ["inline", "crossline"], np.bool, expected, _get_actual_value)
-
-    # Tests "amplitude" variable
-    expected = np.array(
-        [
-            [[487.0, -1104.0, -1456.0], [-129.0, -1728.0, 445.0], [-1443.0, 741.0, 1458.0]],
-            [[2464.0, 3220.0, 1362.0], [686.0, 530.0, -282.0], [3599.0, 2486.0, 433.0]],
-            [[4018.0, 5159.0, 2087.0], [-81.0, -3039.0, -1850.0], [2898.0, 1060.0, -121.0]],
-        ]
-    )
-
-    def get_actual_amplitudes(arr: xr.DataArray) -> np.ndarray:
-        return arr.values[_slice_three_values(arr.shape, values_from_start=False)]
-
-    _validate_variable(
-        ds,
-        "amplitude",
-        (23, 18, 75),
-        ["inline", "crossline", "time"],
-        np.float32,
-        expected,
-        get_actual_amplitudes,
-    )
-
-    # Validate text header
-    assert ds.attrs["attributes"]["text_header"] == text_header_f3()
-
-    # Validate binary header
-    assert ds.attrs["attributes"]["binary_header"] == binary_header_f3()
-
-
-@pytest.mark.skip(reason="Bug reproducer for the issue 582")
-def test_bug_reproducer_structured_xr_to_zar() -> None:
-    """Bug reproducer for the issue https://github.com/TGSAI/mdio-python/issues/582.
-
-    Will be removed in the when the bug is fixed
-    """
-    shape = (4, 4, 2)
-    dim_names = ["inline", "crossline", "depth"]
-    chunks = (2, 2, 2)
-    # Pretend that we created a pydantic model from a template
-    structured_type = StructuredType(
-        fields=[
-            StructuredField(name="cdp_x", format=ScalarType.INT32),
-            StructuredField(name="cdp_y", format=ScalarType.INT32),
-            StructuredField(name="elevation", format=ScalarType.FLOAT16),
-            StructuredField(name="some_scalar", format=ScalarType.FLOAT16),
-        ]
-    )
-
-    xr_dataset = xr.Dataset()
-
-    # Add traces to the dataset, shape = (4, 4, 2) of floats
-    traces_zarr = zarr.zeros(shape=shape, dtype=np.float32, zarr_format=2)
-    traces_xr = xr.DataArray(traces_zarr, dims=dim_names)
-    traces_xr.encoding = {
-        "_FillValue": np.nan,
-        "chunks": chunks,
-        "chunk_key_encoding": V2ChunkKeyEncoding(separator="/").to_dict(),
-        "compressor": numcodecs.Blosc(cname="zstd", clevel=5, shuffle=1, blocksize=0),
-    }
-    xr_dataset["traces"] = traces_xr
-
-    # Add headers to the dataset, shape = (4, 4) of structured type
-    data_type = to_numpy_dtype(structured_type)
-
-    # Validate the conversion
-    assert data_type == np.dtype([("cdp_x", "<i4"), ("cdp_y", "<i4"), ("elevation", "<f2"), ("some_scalar", "<f2")])
-    fill_value = np.zeros((), dtype=data_type)
-    headers_zarr = zarr.zeros(shape=shape[:-1], dtype=data_type, zarr_format=2)
-    headers_xr = xr.DataArray(headers_zarr, dims=dim_names[:-1])
-    headers_xr.encoding = {
-        "_FillValue": fill_value,
-        "chunks": chunks[:-1],
-        "chunk_key_encoding": V2ChunkKeyEncoding(separator="/").to_dict(),
-        "compressor": numcodecs.Blosc(cname="zstd", clevel=5, shuffle=1, blocksize=0),
-    }
-    xr_dataset["headers"] = headers_xr
-
-    # See _populate_dims_coords_and_write_to_zarr()
-    # The compute=True because we would also write to Zarr the coord values here
-    xr_dataset.to_zarr(
-        store="/tmp/reproducer_xr.zarr",  # noqa: S108
-        mode="w",
-        write_empty_chunks=False,
-        zarr_format=2,
-        compute=True,
-    )
-
-    # In _populate_trace_mask_and_write_to_zarr
-    # We do another write of "trace_mask" to the same Zarr store and remove it
-    # from the dataset
-
-    # ----------------------------------------------
-    # Now will will do parallel write of the data and the headers
-    # see blocked_io.to_zarr -> trace_worker
-
-    not_null = np.array(
-        [
-            [True, False, False, False],
-            [False, True, False, False],
-            [False, False, True, False],
-            [False, False, False, True],
-        ]
-    )
-    hdr = (11, 22, -33.0, 44.0)
-    headers = np.array([hdr, hdr, hdr, hdr], dtype=data_type)
-    trace = np.array([[100.0, 200.0], [300.0, 400.0], [500.0, 600.0], [700.0, 800.0]], dtype=np.float32)
-
-    # Here is one iteration of it:
-    ds_to_write = xr_dataset[["traces", "headers"]]
-    # We do not have any coords to reset
-    # ds_to_write = ds_to_write.reset_coords()
-
-    ds_to_write["headers"].data[not_null] = headers
-    ds_to_write["headers"].data[~not_null] = 0
-    ds_to_write["traces"].data[not_null] = trace
-
-    region = {
-        "inline": slice(0, 2, None),
-        "crossline": slice(0, 2, None),
-        "depth": slice(0, 2, None),
-    }
-
-    sub_dataset = ds_to_write.isel(region)
-    sub_dataset.to_zarr(
-        store="/tmp/reproducer_xr.zarr",  # noqa: S108
-        region=region,
-        mode="r+",
-        write_empty_chunks=False,
-        zarr_format=2,
-    )

From 3a9e67291bcb65989fa450b6ec391f67ff6510e2 Mon Sep 17 00:00:00 2001
From: Altay Sansal <tasansal@users.noreply.github.com>
Date: Tue, 12 Aug 2025 13:38:19 -0500
Subject: [PATCH 07/13] add back whitespace

---
 src/mdio/core/grid.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mdio/core/grid.py b/src/mdio/core/grid.py
index 2436c9448..ec66ee471 100644
--- a/src/mdio/core/grid.py
+++ b/src/mdio/core/grid.py
@@ -148,6 +148,7 @@ def build_map(self, index_headers: HeaderArray) -> None:
 
             # Assign trace indices
             trace_indices = np.arange(start, end, dtype=np.uint64)
+
             self.map.vindex[live_dim_indices] = trace_indices
             self.live_mask.vindex[live_dim_indices] = True
 

From 28d07b42583b014dab6ad2a7276127c8dfc7f5a1 Mon Sep 17 00:00:00 2001
From: Altay Sansal <tasansal@users.noreply.github.com>
Date: Tue, 12 Aug 2025 13:39:10 -0500
Subject: [PATCH 08/13] revert import changes

---
 tests/integration/testing_helpers.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/tests/integration/testing_helpers.py b/tests/integration/testing_helpers.py
index 68d47535b..0a33fbd69 100644
--- a/tests/integration/testing_helpers.py
+++ b/tests/integration/testing_helpers.py
@@ -1,18 +1,12 @@
 """This module provides testing helpers for integration testing."""
 
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
+from collections.abc import Callable
 
 import numpy as np
+import xarray as xr
 from segy.schema import HeaderField
 from segy.schema import SegySpec
 
-if TYPE_CHECKING:
-    from collections.abc import Callable
-
-    import xarray as xr
-
 
 def customize_segy_specs(
     segy_spec: SegySpec,

From 9a74f15724a231dd84548ad69679cf73b0831f63 Mon Sep 17 00:00:00 2001
From: Altay Sansal <tasansal@users.noreply.github.com>
Date: Tue, 12 Aug 2025 13:48:55 -0500
Subject: [PATCH 09/13] fix attribute initialization in
 `_add_text_binary_headers`

---
 src/mdio/converters/segy.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py
index d6211c204..8756ddd09 100644
--- a/src/mdio/converters/segy.py
+++ b/src/mdio/converters/segy.py
@@ -280,7 +280,7 @@ def _populate_coordinates(
     return dataset, drop_vars_delayed
 
 
-def _add_text_binary_headers(dataset: xr_Dataset, segy_file: SegyFile) -> None:
+def _add_text_binary_headers(dataset: Dataset, segy_file: SegyFile) -> None:
     text_header = segy_file.text_header.splitlines()
     # Validate:
     # text_header this should be a 40-items array of strings with width of 80 characters.
@@ -300,8 +300,18 @@ def _add_text_binary_headers(dataset: xr_Dataset, segy_file: SegyFile) -> None:
     if ext_text_header is not None:
         for ext_hdr in ext_text_header:
             text_header.append(ext_hdr.splitlines())
-    dataset.metadata.attributes["text_header"] = text_header
-    dataset.metadata.attributes["binary_header"] = segy_file.binary_header.to_dict()
+
+    # Handle case where it may not have any metadata yet
+    if dataset.metadata.attributes is None:
+        dataset.attrs["attributes"] = {}
+
+    # Update the attributes with the text and binary headers.
+    dataset.metadata.attributes.update(
+        {
+            "textHeader": text_header,
+            "binaryHeader": segy_file.binary_header.to_dict(),
+        }
+    )
 
 
 def segy_to_mdio(

From c6714cb229f24315501e874df0c8916af8161fa6 Mon Sep 17 00:00:00 2001
From: Dmitriy Repin <drepin@hotmail.com>
Date: Tue, 12 Aug 2025 19:31:51 +0000
Subject: [PATCH 10/13] Add tests

---
 tests/conftest.py                            | 11 ++-
 tests/integration/test_segy_import_export.py | 44 ++++++-----
 tests/integration/testing_data.py            | 82 ++++++++++++++++++++
 3 files changed, 111 insertions(+), 26 deletions(-)
 create mode 100644 tests/integration/testing_data.py

diff --git a/tests/conftest.py b/tests/conftest.py
index 6f4f2d1f3..b4a9804e2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,15 +3,11 @@
 from __future__ import annotations
 
 import warnings
-from typing import TYPE_CHECKING
+from pathlib import Path  # noqa TC003
 from urllib.request import urlretrieve
 
 import pytest
 
-if TYPE_CHECKING:
-    from pathlib import Path
-
-
 # Suppress Dask's chunk balancing warning
 warnings.filterwarnings(
     "ignore",
@@ -46,7 +42,10 @@ def segy_input(segy_input_uri: str, tmp_path_factory: pytest.TempPathFactory) ->
 @pytest.fixture(scope="module")
 def zarr_tmp(tmp_path_factory: pytest.TempPathFactory) -> Path:
     """Make a temp file for the output MDIO."""
-    return tmp_path_factory.mktemp(r"mdio")
+    path = tmp_path_factory.mktemp(r"mdio")
+    # For debugging purposes to use a fixed path, uncomment the following:
+    # path = Path("./TMP/zarr_tmp")
+    return path  # noqa RET504
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/integration/test_segy_import_export.py b/tests/integration/test_segy_import_export.py
index 40e10b489..50352d7c3 100644
--- a/tests/integration/test_segy_import_export.py
+++ b/tests/integration/test_segy_import_export.py
@@ -13,6 +13,8 @@
 import xarray as xr
 from segy import SegyFile
 from segy.standards import get_segy_standard
+from tests.integration.testing_data import binary_header_teapot_dome
+from tests.integration.testing_data import text_header_teapot_dome
 from tests.integration.testing_helpers import customize_segy_specs
 from tests.integration.testing_helpers import get_inline_header_values
 from tests.integration.testing_helpers import get_values
@@ -266,8 +268,8 @@ def test_3d_import(
     segy_to_mdio(
         segy_spec=segy_spec,
         mdio_template=TemplateRegistry().get("PostStack3DTime"),
-        input_location=StorageLocation(segy_input.__str__()),
-        output_location=StorageLocation(zarr_tmp.__str__()),
+        input_location=StorageLocation(str(segy_input)),
+        output_location=StorageLocation(str(zarr_tmp)),
         overwrite=True,
     )
 
@@ -278,11 +280,9 @@ class TestReader:
 
     def test_meta_dataset_read(self, zarr_tmp: Path) -> None:
         """Metadata reading tests."""
-        path = zarr_tmp.__str__()
-        # path = "/tmp/pytest-of-vscode/my-mdio/mdio0"
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
         expected_attrs = {
             "apiVersion": "1.0.0a1",
             "createdOn": "2025-08-06 16:21:54.747880+00:00",
@@ -297,13 +297,25 @@ def test_meta_dataset_read(self, zarr_tmp: Path) -> None:
             else:
                 assert actual_attrs_json[key] == value
 
+        attributes = ds.attrs["attributes"]
+        assert attributes is not None
+
+        # Validate attributes provided by the template
+        assert attributes["surveyDimensionality"] == "3D"
+        assert attributes["ensembleType"] == "line"
+        assert attributes["processingStage"] == "post-stack"
+
+        # Validate text header
+        assert attributes["textHeader"] == text_header_teapot_dome()
+
+        # Validate binary header
+        assert attributes["binaryHeader"] == binary_header_teapot_dome()
+
     def test_meta_variable_read(self, zarr_tmp: Path) -> None:
         """Metadata reading tests."""
-        path = zarr_tmp.__str__()
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        # path = "/tmp/pytest-of-vscode/my-mdio/mdio0"
-        ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
         expected_attrs = {
             "count": 97354860,
             "sum": -8594.551666259766,
@@ -318,11 +330,9 @@ def test_meta_variable_read(self, zarr_tmp: Path) -> None:
     def test_grid(self, zarr_tmp: Path) -> None:
         """Test validating MDIO variables."""
         # Load Xarray dataset from the MDIO file
-        path = zarr_tmp.__str__()
-        # path = "/tmp/pytest-of-vscode/my-mdio/mdio0"
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
 
         # Note: in order to create the dataset we used the Time template, so the
         # sample dimension is called "time"
@@ -366,22 +376,18 @@ def test_grid(self, zarr_tmp: Path) -> None:
 
     def test_inline(self, zarr_tmp: Path) -> None:
         """Read and compare every 75 inlines' mean and std. dev."""
-        path = zarr_tmp.__str__()
-        # path = "/tmp/pytest-of-vscode/my-mdio/mdio0"
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
         inlines = ds["amplitude"][::75, :, :]
         mean, std = inlines.mean(), inlines.std()
         npt.assert_allclose([mean, std], [1.0555277e-04, 6.0027051e-01])
 
     def test_crossline(self, zarr_tmp: Path) -> None:
         """Read and compare every 75 crosslines' mean and std. dev."""
-        path = zarr_tmp.__str__()
-        # path = "/tmp/pytest-of-vscode/my-mdio/mdio0"
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
         xlines = ds["amplitude"][:, ::75, :]
         mean, std = xlines.mean(), xlines.std()
 
@@ -389,11 +395,9 @@ def test_crossline(self, zarr_tmp: Path) -> None:
 
     def test_zslice(self, zarr_tmp: Path) -> None:
         """Read and compare every 225 z-slices' mean and std. dev."""
-        path = zarr_tmp.__str__()
-        # path = "/tmp/pytest-of-vscode/my-mdio/mdio0"
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(path, engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
         slices = ds["amplitude"][:, :, ::225]
         mean, std = slices.mean(), slices.std()
         npt.assert_allclose([mean, std], [0.005236923, 0.61279935])
diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py
new file mode 100644
index 000000000..4aeb6ef14
--- /dev/null
+++ b/tests/integration/testing_data.py
@@ -0,0 +1,82 @@
+"""Integration tests data"""
+
+from typing import Any
+
+def text_header_teapot_dome() -> list[str]:
+    return [
+        "C 1 CLIENT: ROCKY MOUNTAIN OILFIELD TESTING CENTER                              ",
+        "C 2 PROJECT: NAVAL PETROLEUM RESERVE #3 (TEAPOT DOME); NATRONA COUNTY, WYOMING  ",
+        "C 3 LINE: 3D                                                                    ",
+        "C 4                                                                             ",
+        "C 5 THIS IS THE FILTERED POST STACK MIGRATION                                   ",
+        "C 6                                                                             ",
+        "C 7 INLINE 1, XLINE 1:   X COORDINATE: 788937  Y COORDINATE: 938845             ",
+        "C 8 INLINE 1, XLINE 188: X COORDINATE: 809501  Y COORDINATE: 939333             ",
+        "C 9 INLINE 188, XLINE 1: X COORDINATE: 788039  Y COORDINATE: 976674             ",
+        "C10 INLINE NUMBER:    MIN: 1  MAX: 345  TOTAL: 345                              ",
+        "C11 CROSSLINE NUMBER: MIN: 1  MAX: 188  TOTAL: 188                              ",
+        "C12 TOTAL NUMBER OF CDPS: 64860   BIN DIMENSION: 110' X 110'                    ",
+        "C13                                                                             ",
+        "C14                                                                             ",
+        "C15                                                                             ",
+        "C16                                                                             ",
+        "C17                                                                             ",
+        "C18                                                                             ",
+        "C19 GENERAL SEGY INFORMATION                                                    ",
+        "C20 RECORD LENGHT (MS): 3000                                                    ",
+        "C21 SAMPLE RATE (MS): 2.0                                                       ",
+        "C22 DATA FORMAT: 4 BYTE IBM FLOATING POINT                                      ",
+        "C23 BYTES  13- 16: CROSSLINE NUMBER (TRACE)                                     ",
+        "C24 BYTES  17- 20: INLINE NUMBER (LINE)                                         ",
+        "C25 BYTES  81- 84: CDP_X COORD                                                  ",
+        "C26 BYTES  85- 88: CDP_Y COORD                                                  ",
+        "C27 BYTES 181-184: INLINE NUMBER (LINE)                                         ",
+        "C28 BYTES 185-188: CROSSLINE NUMBER (TRACE)                                     ",
+        "C29 BYTES 189-192: CDP_X COORD                                                  ",
+        "C30 BYTES 193-196: CDP_Y COORD                                                  ",
+        "C31                                                                             ",
+        "C32                                                                             ",
+        "C33                                                                             ",
+        "C34                                                                             ",
+        "C35                                                                             ",
+        "C36 Processed by: Excel Geophysical Services, Inc.                              ",
+        "C37               8301 East Prentice Ave. Ste. 402                              ",
+        "C38               Englewood, Colorado 80111                                     ",
+        "C39               (voice) 303.694.9629 (fax) 303.771.1646                       ",
+        "C40 END EBCDIC                                                                  "
+    ]
+
+def binary_header_teapot_dome()  -> dict[str, Any]:
+    return {
+          "job_id": 9999,
+          "line_num": 9999,
+          "reel_num": 1,
+          "data_traces_per_ensemble": 188,
+          "aux_traces_per_ensemble": 0,
+          "sample_interval": 2000,
+          "orig_sample_interval": 0,
+          "samples_per_trace": 1501,
+          "orig_samples_per_trace": 1501,
+          "data_sample_format": 1,
+          "ensemble_fold": 57,
+          "trace_sorting_code": 4,
+          "vertical_sum_code": 1,
+          "sweep_freq_start": 0,
+          "sweep_freq_end": 0,
+          "sweep_length": 0,
+          "sweep_type_code": 0,
+          "sweep_trace_num": 0,
+          "sweep_taper_start": 0,
+          "sweep_taper_end": 0,
+          "taper_type_code": 0,
+          "correlated_data_code": 2,
+          "binary_gain_code": 1,
+          "amp_recovery_code": 4,
+          "measurement_system_code": 2,
+          "impulse_polarity_code": 1,
+          "vibratory_polarity_code": 0,
+          "fixed_length_trace_flag": 0,
+          "num_extended_text_headers": 0,
+          "segy_revision_major": 0,
+          "segy_revision_minor": 0
+        }
\ No newline at end of file

From 024bed71b38564b23a14cef648661dc5518e2573 Mon Sep 17 00:00:00 2001
From: Altay Sansal <tasansal@users.noreply.github.com>
Date: Tue, 12 Aug 2025 15:04:06 -0500
Subject: [PATCH 11/13] refactor: improve type annotations and docstrings in
 test utilities

---
 tests/conftest.py                 |  7 +--
 tests/integration/testing_data.py | 74 ++++++++++++++++---------------
 2 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index b4a9804e2..e884cc84d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,11 +3,14 @@
 from __future__ import annotations
 
 import warnings
-from pathlib import Path  # noqa TC003
+from typing import TYPE_CHECKING
 from urllib.request import urlretrieve
 
 import pytest
 
+if TYPE_CHECKING:
+    from pathlib import Path
+
 # Suppress Dask's chunk balancing warning
 warnings.filterwarnings(
     "ignore",
@@ -43,8 +46,6 @@ def segy_input(segy_input_uri: str, tmp_path_factory: pytest.TempPathFactory) ->
 def zarr_tmp(tmp_path_factory: pytest.TempPathFactory) -> Path:
     """Make a temp file for the output MDIO."""
     path = tmp_path_factory.mktemp(r"mdio")
-    # For debugging purposes to use a fixed path, uncomment the following:
-    # path = Path("./TMP/zarr_tmp")
     return path  # noqa RET504
 
 
diff --git a/tests/integration/testing_data.py b/tests/integration/testing_data.py
index 4aeb6ef14..3696a7cf1 100644
--- a/tests/integration/testing_data.py
+++ b/tests/integration/testing_data.py
@@ -1,8 +1,8 @@
-"""Integration tests data"""
+"""Integration tests data for teapot dome SEG-Y."""
 
-from typing import Any
 
 def text_header_teapot_dome() -> list[str]:
+    """Return the teapot dome expected text header."""
     return [
         "C 1 CLIENT: ROCKY MOUNTAIN OILFIELD TESTING CENTER                              ",
         "C 2 PROJECT: NAVAL PETROLEUM RESERVE #3 (TEAPOT DOME); NATRONA COUNTY, WYOMING  ",
@@ -43,40 +43,42 @@ def text_header_teapot_dome() -> list[str]:
         "C37               8301 East Prentice Ave. Ste. 402                              ",
         "C38               Englewood, Colorado 80111                                     ",
         "C39               (voice) 303.694.9629 (fax) 303.771.1646                       ",
-        "C40 END EBCDIC                                                                  "
+        "C40 END EBCDIC                                                                  ",
     ]
 
-def binary_header_teapot_dome()  -> dict[str, Any]:
+
+def binary_header_teapot_dome() -> dict[str, int]:
+    """Return the teapot dome expected binary header."""
     return {
-          "job_id": 9999,
-          "line_num": 9999,
-          "reel_num": 1,
-          "data_traces_per_ensemble": 188,
-          "aux_traces_per_ensemble": 0,
-          "sample_interval": 2000,
-          "orig_sample_interval": 0,
-          "samples_per_trace": 1501,
-          "orig_samples_per_trace": 1501,
-          "data_sample_format": 1,
-          "ensemble_fold": 57,
-          "trace_sorting_code": 4,
-          "vertical_sum_code": 1,
-          "sweep_freq_start": 0,
-          "sweep_freq_end": 0,
-          "sweep_length": 0,
-          "sweep_type_code": 0,
-          "sweep_trace_num": 0,
-          "sweep_taper_start": 0,
-          "sweep_taper_end": 0,
-          "taper_type_code": 0,
-          "correlated_data_code": 2,
-          "binary_gain_code": 1,
-          "amp_recovery_code": 4,
-          "measurement_system_code": 2,
-          "impulse_polarity_code": 1,
-          "vibratory_polarity_code": 0,
-          "fixed_length_trace_flag": 0,
-          "num_extended_text_headers": 0,
-          "segy_revision_major": 0,
-          "segy_revision_minor": 0
-        }
\ No newline at end of file
+        "job_id": 9999,
+        "line_num": 9999,
+        "reel_num": 1,
+        "data_traces_per_ensemble": 188,
+        "aux_traces_per_ensemble": 0,
+        "sample_interval": 2000,
+        "orig_sample_interval": 0,
+        "samples_per_trace": 1501,
+        "orig_samples_per_trace": 1501,
+        "data_sample_format": 1,
+        "ensemble_fold": 57,
+        "trace_sorting_code": 4,
+        "vertical_sum_code": 1,
+        "sweep_freq_start": 0,
+        "sweep_freq_end": 0,
+        "sweep_length": 0,
+        "sweep_type_code": 0,
+        "sweep_trace_num": 0,
+        "sweep_taper_start": 0,
+        "sweep_taper_end": 0,
+        "taper_type_code": 0,
+        "correlated_data_code": 2,
+        "binary_gain_code": 1,
+        "amp_recovery_code": 4,
+        "measurement_system_code": 2,
+        "impulse_polarity_code": 1,
+        "vibratory_polarity_code": 0,
+        "fixed_length_trace_flag": 0,
+        "num_extended_text_headers": 0,
+        "segy_revision_major": 0,
+        "segy_revision_minor": 0,
+    }

From 3bb2d4094a1217aba640211fff3b62af15ba43de Mon Sep 17 00:00:00 2001
From: Altay Sansal <tasansal@users.noreply.github.com>
Date: Tue, 12 Aug 2025 15:05:34 -0500
Subject: [PATCH 12/13] fix formatting

---
 tests/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index e884cc84d..6f4f2d1f3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,6 +11,7 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
+
 # Suppress Dask's chunk balancing warning
 warnings.filterwarnings(
     "ignore",
@@ -45,8 +46,7 @@ def segy_input(segy_input_uri: str, tmp_path_factory: pytest.TempPathFactory) ->
 @pytest.fixture(scope="module")
 def zarr_tmp(tmp_path_factory: pytest.TempPathFactory) -> Path:
     """Make a temp file for the output MDIO."""
-    path = tmp_path_factory.mktemp(r"mdio")
-    return path  # noqa RET504
+    return tmp_path_factory.mktemp(r"mdio")
 
 
 @pytest.fixture(scope="module")

From e04bb7c2da46e08db3fd3a804de4331ac8771ce9 Mon Sep 17 00:00:00 2001
From: Altay Sansal <tasansal@users.noreply.github.com>
Date: Tue, 12 Aug 2025 15:47:52 -0500
Subject: [PATCH 13/13] remove redundant `str()` casting in `xr.open_dataset`
 calls

---
 tests/integration/test_segy_import_export.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_segy_import_export.py b/tests/integration/test_segy_import_export.py
index 50352d7c3..e80028df1 100644
--- a/tests/integration/test_segy_import_export.py
+++ b/tests/integration/test_segy_import_export.py
@@ -282,7 +282,7 @@ def test_meta_dataset_read(self, zarr_tmp: Path) -> None:
         """Metadata reading tests."""
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
         expected_attrs = {
             "apiVersion": "1.0.0a1",
             "createdOn": "2025-08-06 16:21:54.747880+00:00",
@@ -315,7 +315,7 @@ def test_meta_variable_read(self, zarr_tmp: Path) -> None:
         """Metadata reading tests."""
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
         expected_attrs = {
             "count": 97354860,
             "sum": -8594.551666259766,
@@ -332,7 +332,7 @@ def test_grid(self, zarr_tmp: Path) -> None:
         # Load Xarray dataset from the MDIO file
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
 
         # Note: in order to create the dataset we used the Time template, so the
         # sample dimension is called "time"
@@ -378,7 +378,7 @@ def test_inline(self, zarr_tmp: Path) -> None:
         """Read and compare every 75 inlines' mean and std. dev."""
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
         inlines = ds["amplitude"][::75, :, :]
         mean, std = inlines.mean(), inlines.std()
         npt.assert_allclose([mean, std], [1.0555277e-04, 6.0027051e-01])
@@ -387,7 +387,7 @@ def test_crossline(self, zarr_tmp: Path) -> None:
         """Read and compare every 75 crosslines' mean and std. dev."""
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
         xlines = ds["amplitude"][:, ::75, :]
         mean, std = xlines.mean(), xlines.std()
 
@@ -397,7 +397,7 @@ def test_zslice(self, zarr_tmp: Path) -> None:
         """Read and compare every 225 z-slices' mean and std. dev."""
         # NOTE: If mask_and_scale is not set,
         # Xarray will convert int to float and replace _FillValue with NaN
-        ds = xr.open_dataset(str(zarr_tmp), engine="zarr", mask_and_scale=False)
+        ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
         slices = ds["amplitude"][:, :, ::225]
         mean, std = slices.mean(), slices.std()
         npt.assert_allclose([mean, std], [0.005236923, 0.61279935])