diff --git a/autotest/test_binaryfile.py b/autotest/test_binaryfile.py index 1dff66332e..b8c7df7514 100644 --- a/autotest/test_binaryfile.py +++ b/autotest/test_binaryfile.py @@ -1,6 +1,7 @@ from itertools import repeat import numpy as np +import pandas as pd import pytest from matplotlib import pyplot as plt from matplotlib.axes import Axes @@ -14,6 +15,7 @@ CellBudgetFile, HeadFile, HeadUFile, + UcnFile, Util2d, ) from flopy.utils.binaryfile import ( @@ -71,6 +73,116 @@ def test_deprecated_binaryread_struct(example_data_path): assert res == 20 +def test_headfile_build_index(example_data_path): + # test low-level BinaryLayerFile._build_index() method + pth = example_data_path / "freyberg_multilayer_transient" / "freyberg.hds" + with HeadFile(pth) as hds: + pass + assert hds.nrow == 40 + assert hds.ncol == 20 + assert hds.nlay == 3 + assert not hasattr(hds, "nper") + assert hds.totalbytes == 10_676_004 + assert len(hds.recordarray) == 3291 + assert type(hds.recordarray) == np.ndarray + assert hds.recordarray.dtype == np.dtype( + [ + ("kstp", "i4"), + ("kper", "i4"), + ("pertim", "f4"), + ("totim", "f4"), + ("text", "S16"), + ("ncol", "i4"), + ("nrow", "i4"), + ("ilay", "i4"), + ] + ) + # check first and last recorddict + list_recordarray = hds.recordarray.tolist() + assert list_recordarray[0] == ( + (1, 1, 1.0, 1.0, b" HEAD", 20, 40, 1) + ) + assert list_recordarray[-1] == ( + (1, 1097, 1.0, 1097.0, b" HEAD", 20, 40, 3) + ) + assert hds.times == list((np.arange(1097) + 1).astype(np.float32)) + assert hds.kstpkper == [(1, kper + 1) for kper in range(1097)] + np.testing.assert_array_equal(hds.iposarray, np.arange(3291) * 3244 + 44) + assert hds.iposarray.dtype == np.int64 + # check first and last row of data frame + pd.testing.assert_frame_equal( + hds.headers.iloc[[0, -1]], + pd.DataFrame( + { + "kstp": np.array([1, 1], np.int32), + "kper": np.array([1, 1097], np.int32), + "pertim": np.array([1.0, 1.0], np.float32), + "totim": np.array([1.0, 1097.0], np.float32), + "text": ["HEAD", "HEAD"], + "ncol": np.array([20, 20], np.int32), + "nrow": np.array([40, 40], np.int32), + "ilay": np.array([1, 3], np.int32), + }, + index=[44, 10672804], + ), + ) + + +def test_concentration_build_index(example_data_path): + # test low-level BinaryLayerFile._build_index() method with UCN file + pth = example_data_path / "mt3d_test/mf2005mt3d/P07/MT3D001.UCN" + with UcnFile(pth) as ucn: + pass + assert ucn.nrow == 15 + assert ucn.ncol == 21 + assert ucn.nlay == 8 + assert not hasattr(ucn, "nper") + assert ucn.totalbytes == 10_432 + assert len(ucn.recordarray) == 8 + assert type(ucn.recordarray) == np.ndarray + assert ucn.recordarray.dtype == np.dtype( + [ + ("ntrans", "i4"), + ("kstp", "i4"), + ("kper", "i4"), + ("totim", "f4"), + ("text", "S16"), + ("ncol", "i4"), + ("nrow", "i4"), + ("ilay", "i4"), + ] + ) + # check first and last recorddict + list_recordarray = ucn.recordarray.tolist() + assert list_recordarray[0] == ( + (29, 1, 1, 100.0, b"CONCENTRATION ", 21, 15, 1) + ) + assert list_recordarray[-1] == ( + (29, 1, 1, 100.0, b"CONCENTRATION ", 21, 15, 8) + ) + assert ucn.times == [np.float32(100.0)] + assert ucn.kstpkper == [(1, 1)] + np.testing.assert_array_equal(ucn.iposarray, np.arange(8) * 1304 + 44) + assert ucn.iposarray.dtype == np.int64 + # check first and last row of data frame + pd.testing.assert_frame_equal( + ucn.headers.iloc[[0, -1]], + pd.DataFrame( + { + "ntrans": np.array([29, 29], np.int32), + "kstp": np.array([1, 1], np.int32), + "kper": np.array([1, 1], np.int32), + "totim": np.array([100.0, 100.0], np.float32), + "text": ["CONCENTRATION", "CONCENTRATION"], + "ncol": np.array([21, 21], np.int32), + "nrow": np.array([15, 15], np.int32), + "ilay": np.array([1, 8], np.int32), + }, + index=[44, 9172], + ), + ) + + def test_binaryfile_writeread(function_tmpdir, nwt_model_path): model = "Pr3_MFNWT_lower.nam" ml = flopy.modflow.Modflow.load( diff --git a/autotest/test_cellbudgetfile.py b/autotest/test_cellbudgetfile.py index 175df6037b..b7ff7c3474 100644 --- a/autotest/test_cellbudgetfile.py +++ b/autotest/test_cellbudgetfile.py @@ -1,11 +1,288 @@ import os import numpy as np +import pandas as pd import pytest from flopy.mf6.modflow.mfsimulation import MFSimulation from flopy.utils.binaryfile import CellBudgetFile +# test low-level CellBudgetFile._build_index() method + + +def test_cellbudgetfile_build_index_classic(example_data_path): + """Test reading "classic" budget file, without "COMPACT BUDGET" option.""" + pth = example_data_path / "mt3d_test/mf2kmt3d/mnw/t5.cbc" + with CellBudgetFile(pth) as cbc: + pass + assert cbc.nrow == 101 + assert cbc.ncol == 101 + assert cbc.nlay == 3 + assert cbc.nper == 1 + assert cbc.totalbytes == 122_448 + assert len(cbc.recordarray) == 1 + assert type(cbc.recordarray) == np.ndarray + assert cbc.recordarray.dtype == np.dtype( + [ + ("kstp", "i4"), + ("kper", "i4"), + ("text", "S16"), + ("ncol", "i4"), + ("nrow", "i4"), + ("nlay", "i4"), + ("imeth", "i4"), + ("delt", "f4"), + ("pertim", "f4"), + ("totim", "f4"), + ("modelnam", "S16"), + ("paknam", "S16"), + ("modelnam2", "S16"), + ("paknam2", "S16"), + ] + ) + assert len(cbc.recorddict) == 1 + list_recorddict = list(cbc.recorddict.items()) + # fmt: off + assert list_recorddict == [( + (1, 1, b" MNW", 101, 101, 3, 0, 0.0, 0.0, -1.0, b"", b"", b"", b""), + 36) + ] + # fmt: on + assert cbc.times == [] + assert cbc.kstpkper == [(1, 1)] + np.testing.assert_array_equal(cbc.iposheader, np.array([0])) + assert cbc.iposheader.dtype == np.int64 + np.testing.assert_array_equal(cbc.iposarray, np.array([36])) + assert cbc.iposarray.dtype == np.int64 + assert cbc.textlist == [b" MNW"] + assert cbc.imethlist == [0] + assert cbc.paknamlist_from == [b""] + assert cbc.paknamlist_to == [b""] + pd.testing.assert_frame_equal( + cbc.headers, + pd.DataFrame( + { + "kstp": np.array([1], np.int32), + "kper": np.array([1], np.int32), + "text": ["MNW"], + "ncol": np.array([101], np.int32), + "nrow": np.array([101], np.int32), + "nlay": np.array([3], np.int32), + }, + index=[36], + ), + ) + + +def test_cellbudgetfile_build_index_compact(example_data_path): + """Test reading mfntw budget file, with "COMPACT BUDGET" option.""" + pth = example_data_path / "freyberg_multilayer_transient" / "freyberg.cbc" + with CellBudgetFile(pth) as cbc: + pass + assert cbc.nrow == 40 + assert cbc.ncol == 20 + assert cbc.nlay == 3 + assert cbc.nper == 1097 + assert cbc.totalbytes == 42_658_384 + assert len(cbc.recordarray) == 5483 + assert type(cbc.recordarray) == np.ndarray + assert cbc.recordarray.dtype == np.dtype( + [ + ("kstp", "i4"), + ("kper", "i4"), + ("text", "S16"), + ("ncol", "i4"), + ("nrow", "i4"), + ("nlay", "i4"), + ("imeth", "i4"), + ("delt", "f4"), + ("pertim", "f4"), + ("totim", "f4"), + ("modelnam", "S16"), + ("paknam", "S16"), + ("modelnam2", "S16"), + ("paknam2", "S16"), + ] + ) + assert len(cbc.recorddict) == 5483 + # check first and last recorddict + list_recorddict = list(cbc.recorddict.items()) + # fmt: off + assert list_recorddict[0] == ( + (1, 1, b" CONSTANT HEAD", 20, 40, -3, 2, 1.0, 1.0, 1.0, b"", b"", b"", b""), + 52, + ) + assert list_recorddict[-1] == ( + (1, 1097, b"FLOW LOWER FACE ", 20, 40, -3, 1, 1.0, 1.0, 1097.0, b"", b"", b"", b""), + 42648784, + ) + # fmt: on + assert cbc.times == list((np.arange(1097) + 1).astype(np.float32)) + assert cbc.kstpkper == [(1, kper + 1) for kper in range(1097)] + # fmt: off + expected_iposheader = np.cumsum([0] + + ([296] + [9652] * 4) * 1095 + + [296] + [9652] * 3 + + [296] + [9652] * 2) + # fmt: on + np.testing.assert_array_equal(cbc.iposheader, expected_iposheader) + assert cbc.iposheader.dtype == np.int64 + np.testing.assert_array_equal(cbc.iposarray, expected_iposheader + 52) + assert cbc.iposarray.dtype == np.int64 + assert cbc.textlist == [ + b" CONSTANT HEAD", + b"FLOW RIGHT FACE ", + b"FLOW FRONT FACE ", + b"FLOW LOWER FACE ", + b" STORAGE", + ] + assert cbc.imethlist == [2, 1, 1, 1, 1] + assert cbc.paknamlist_from == [b""] + assert cbc.paknamlist_to == [b""] + # check first and last row of data frame + pd.testing.assert_frame_equal( + cbc.headers.iloc[[0, -1]], + pd.DataFrame( + { + "kstp": np.array([1, 1], np.int32), + "kper": np.array([1, 1097], np.int32), + "text": ["CONSTANT HEAD", "FLOW LOWER FACE"], + "ncol": np.array([20, 20], np.int32), + "nrow": np.array([40, 40], np.int32), + "nlay": np.array([-3, -3], np.int32), + "imeth": np.array([2, 1], np.int32), + "delt": np.array([1.0, 1.0], np.float32), + "pertim": np.array([1.0, 1.0], np.float32), + "totim": np.array([1.0, 1097.0], np.float32), + }, + index=[52, 42648784], + ), + ) + + +def test_cellbudgetfile_build_index_mf6(example_data_path): + cbb_file = ( + example_data_path + / "mf6" + / "test005_advgw_tidal" + / "expected_output" + / "AdvGW_tidal.cbc" + ) + with CellBudgetFile(cbb_file) as cbb: + pass + assert cbb.nrow == 15 + assert cbb.ncol == 10 + assert cbb.nlay == 3 + assert cbb.nper == 4 + assert cbb.totalbytes == 13_416_552 + assert len(cbb.recordarray) == 3610 + assert type(cbb.recordarray) == np.ndarray + assert cbb.recordarray.dtype == np.dtype( + [ + ("kstp", "i4"), + ("kper", "i4"), + ("text", "S16"), + ("ncol", "i4"), + ("nrow", "i4"), + ("nlay", "i4"), + ("imeth", "i4"), + ("delt", "f8"), + ("pertim", "f8"), + ("totim", "f8"), + ("modelnam", "S16"), + ("paknam", "S16"), + ("modelnam2", "S16"), + ("paknam2", "S16"), + ] + ) + assert len(cbb.recorddict) == 3610 + # check first and last recorddict + list_recorddict = list(cbb.recorddict.items()) + # fmt: off + assert list_recorddict[0] == ( + (1, 1, b" STO-SS", 10, 15, -3, 1, + 1.0, 1.0, 1.0, + b"", b"", b"", b""), + 64, + ) + assert list_recorddict[-1] == ( + (120, 4, b" EVT", 10, 15, -3, 6, + 0.08333333333333333, 10.000000000000002, 30.99999999999983, + b"GWF_1 ", b"GWF_1 ", b"GWF_1 ", b"EVT "), + 13414144, + ) + # fmt: on + assert isinstance(cbb.times, list) + np.testing.assert_allclose(cbb.times, np.linspace(1.0, 31, 361)) + # fmt: off + assert cbb.kstpkper == ( + [(1, 1)] + + [(kstp + 1, 2) for kstp in range(120)] + + [(kstp + 1, 3) for kstp in range(120)] + + [(kstp + 1, 4) for kstp in range(120)] + ) + # fmt: on + # this file has a complex structure, so just look at unique ipos spacings + assert set(np.diff(cbb.iposheader)) == ( + {184, 264, 304, 384, 456, 616, 632, 1448, 2168, 2536, 3664, 21664} + ) + assert cbb.iposheader[0] == 0 + assert cbb.iposheader.dtype == np.int64 + assert set(np.diff(cbb.iposarray)) == ( + {184, 264, 304, 384, 456, 616, 632, 1448, 2168, 2472, 3664, 21728} + ) + assert cbb.iposarray[0] == 64 + assert cbb.iposarray.dtype == np.int64 + # variable size headers depending on imeth + header_sizes = np.full(3610, 64) + header_sizes[cbb.recordarray["imeth"] == 6] = 128 + np.testing.assert_array_equal(cbb.iposheader + header_sizes, cbb.iposarray) + assert cbb.textlist == [ + b" STO-SS", + b" STO-SY", + b" FLOW-JA-FACE", + b" WEL", + b" RIV", + b" GHB", + b" RCH", + b" EVT", + ] + assert cbb.imethlist == [1, 1, 1, 6, 6, 6, 6, 6] + assert cbb.paknamlist_from == [b"", b"GWF_1 "] + assert cbb.paknamlist_to == [ + b"", + b"WEL ", + b"RIV ", + b"GHB-TIDAL ", + b"RCH-ZONE_1 ", + b"RCH-ZONE_2 ", + b"RCH-ZONE_3 ", + b"EVT ", + ] + # check first and last row of data frame + pd.testing.assert_frame_equal( + cbb.headers.iloc[[0, -1]], + pd.DataFrame( + { + "kstp": np.array([1, 120], np.int32), + "kper": np.array([1, 4], np.int32), + "text": ["STO-SS", "EVT"], + "ncol": np.array([10, 10], np.int32), + "nrow": np.array([15, 15], np.int32), + "nlay": np.array([-3, -3], np.int32), + "imeth": np.array([1, 6], np.int32), + "delt": [1.0, 0.08333333333333333], + "pertim": [1.0, 10.0], + "totim": [1.0, 31.0], + "modelnam": ["", "GWF_1"], + "paknam": ["", "GWF_1"], + "modelnam2": ["", "GWF_1"], + "paknam2": ["", "EVT"], + }, + index=[64, 13414144], + ), + ) + @pytest.fixture def zonbud_model_path(example_data_path): diff --git a/autotest/test_formattedfile.py b/autotest/test_formattedfile.py index 84cd88b5ee..f8ad286147 100644 --- a/autotest/test_formattedfile.py +++ b/autotest/test_formattedfile.py @@ -1,5 +1,6 @@ import matplotlib.pyplot as plt import numpy as np +import pandas as pd import pytest from matplotlib.axes import Axes @@ -11,6 +12,58 @@ def freyberg_model_path(example_data_path): return example_data_path / "freyberg" +def test_headfile_build_index(example_data_path): + # test low-level FormattedLayerFile._build_index() method + pth = example_data_path / "mf2005_test" / "test1tr.githds" + with FormattedHeadFile(pth) as hds: + pass + assert hds.nrow == 15 + assert hds.ncol == 10 + assert hds.nlay == 1 + assert not hasattr(hds, "nper") + assert hds.totalbytes == 1613 + assert len(hds.recordarray) == 1 + assert type(hds.recordarray) == np.ndarray + assert hds.recordarray.dtype == np.dtype( + [ + ("kstp", "i4"), + ("kper", "i4"), + ("pertim", "f4"), + ("totim", "f4"), + ("text", "S16"), + ("ncol", "i4"), + ("nrow", "i4"), + ("ilay", "i4"), + ] + ) + flt32time = np.float32(1577880000.0) + assert hds.recordarray.tolist() == [ + (50, 1, float(flt32time), float(flt32time), b"HEAD", 10, 15, 1) + ] + assert hds.times == [flt32time] + assert hds.kstpkper == [(50, 1)] + np.testing.assert_array_equal(hds.iposarray, [98]) + assert hds.iposarray.dtype == np.int64 + pd.testing.assert_frame_equal( + hds.headers, + pd.DataFrame( + [ + { + "kstp": np.int32(50), + "kper": np.int32(1), + "pertim": flt32time, + "totim": flt32time, + "text": "HEAD", + "ncol": np.int32(10), + "nrow": np.int32(15), + "ilay": np.int32(1), + } + ], + index=[98], + ), + ) + + def test_formattedfile_reference(example_data_path): h = FormattedHeadFile(example_data_path / "mf2005_test" / "test1tr.githds") assert isinstance(h, FormattedHeadFile) diff --git a/flopy/utils/binaryfile.py b/flopy/utils/binaryfile.py index a7d57c4e48..2bc604b9e7 100644 --- a/flopy/utils/binaryfile.py +++ b/flopy/utils/binaryfile.py @@ -15,6 +15,7 @@ from typing import List, Optional, Union import numpy as np +import pandas as pd from ..utils.datafile import Header, LayerFile from .gridutil import get_lni @@ -457,12 +458,6 @@ def __init__( ): super().__init__(filename, precision, verbose, kwargs) - def __enter__(self): - return self - - def __exit__(self, *exc): - self.close() - def _build_index(self): """ Build the recordarray and iposarray, which maps the header information @@ -508,9 +503,15 @@ def _build_index(self): # self.recordarray contains a recordarray of all the headers. self.recordarray = np.array(self.recordarray, dtype=self.header_dtype) - self.iposarray = np.array(self.iposarray) + self.iposarray = np.array(self.iposarray, dtype=np.int64) self.nlay = np.max(self.recordarray["ilay"]) + # provide headers as a pandas frame + self.headers = pd.DataFrame(self.recordarray, index=self.iposarray) + self.headers["text"] = ( + self.headers["text"].str.decode("ascii", "strict").str.strip() + ) + def get_databytes(self, header): """ @@ -1303,6 +1304,28 @@ def _build_index(self): self.iposarray = np.array(self.iposarray, dtype=np.int64) self.nper = self.recordarray["kper"].max() + # provide headers as a pandas frame + self.headers = pd.DataFrame(self.recordarray, index=self.iposarray) + # remove irrelevant columns + cols = self.headers.columns.to_list() + unique_imeth = self.headers["imeth"].unique() + if unique_imeth.max() == 0: + drop_cols = cols[cols.index("imeth") :] + elif 6 not in unique_imeth: + drop_cols = cols[cols.index("modelnam") :] + else: + drop_cols = [] + if drop_cols: + self.headers.drop(columns=drop_cols, inplace=True) + for name in self.headers.columns: + dtype = self.header_dtype[name] + if np.issubdtype(dtype, bytes): # convert to str + self.headers[name] = ( + self.headers[name] + .str.decode("ascii", "strict") + .str.strip() + ) + def _skip_record(self, header): """ Skip over this record, not counting header and header2. diff --git a/flopy/utils/datafile.py b/flopy/utils/datafile.py index ae6263cc77..9d3c186cbe 100644 --- a/flopy/utils/datafile.py +++ b/flopy/utils/datafile.py @@ -221,6 +221,12 @@ def __init__( angrot=0.0, ) + def __enter__(self): + return self + + def __exit__(self, *exc): + self.close() + def to_shapefile( self, filename: Union[str, os.PathLike], diff --git a/flopy/utils/formattedfile.py b/flopy/utils/formattedfile.py index 29de5d5fad..5fc256554a 100644 --- a/flopy/utils/formattedfile.py +++ b/flopy/utils/formattedfile.py @@ -7,6 +7,7 @@ """ import numpy as np +import pandas as pd from ..utils.datafile import Header, LayerFile @@ -153,9 +154,15 @@ def _build_index(self): # self.recordarray contains a recordarray of all the headers. self.recordarray = np.array(self.recordarray, self.header.get_dtype()) - self.iposarray = np.array(self.iposarray) + self.iposarray = np.array(self.iposarray, dtype=np.int64) self.nlay = np.max(self.recordarray["ilay"]) + # provide headers as a pandas frame + self.headers = pd.DataFrame(self.recordarray, index=self.iposarray) + self.headers["text"] = self.headers["text"].str.decode( + "ascii", "strict" + ) + def _store_record(self, header, ipos): """ Store file header information in various formats for quick retrieval