From df655eb29513954606297a1b117609f6accd6e8b Mon Sep 17 00:00:00 2001 From: spaulins-usgs Date: Tue, 5 Nov 2019 07:32:22 -0800 Subject: [PATCH 1/2] fix(#707): Special case optimized loading code for mflist data structure expanded to include more cases. Also optimized data verification code and added option to not verify data on load. --- flopy/mf6/data/mfdatastorage.py | 15 ++-- flopy/mf6/data/mffileaccess.py | 134 +++++++++++++++++++++--------- flopy/mf6/data/mfstructure.py | 8 +- flopy/mf6/modflow/mfsimulation.py | 7 +- 4 files changed, 120 insertions(+), 44 deletions(-) diff --git a/flopy/mf6/data/mfdatastorage.py b/flopy/mf6/data/mfdatastorage.py index e3353263f2..cd1eed72a6 100644 --- a/flopy/mf6/data/mfdatastorage.py +++ b/flopy/mf6/data/mfdatastorage.py @@ -890,7 +890,8 @@ def store_internal(self, data, layer=None, const=False, multiplier=None, self.layer_storage.first_item().data_storage_type = \ DataStorageType.internal_array if data is None or isinstance(data, np.recarray): - self._verify_list(data) + if self._simulation_data.verify_data: + self._verify_list(data) self.layer_storage.first_item().internal_data = data else: if data is None: @@ -977,7 +978,6 @@ def _build_recarray(self, data, key, autofill): # add placeholders to data so it agrees with # expected dimensions of recarray self._add_placeholders(data) - self._verify_list(data) try: new_data = np.rec.array(data, self._recarray_type_list) @@ -1006,7 +1006,8 @@ def _build_recarray(self, data, key, autofill): inspect.stack()[0][3], type_, value_, traceback_, message, self._simulation_data.debug) - self._verify_list(new_data) + if self._simulation_data.verify_data: + self._verify_list(new_data) return new_data def _resolve_multitype_fields(self, data): @@ -1510,6 +1511,8 @@ def _tupleize_data(data): def _verify_list(self, data): if data is not None: + model_grid = None + cellid_size = None for data_line in data: data_line_len = len(data_line) for index in range(0, min(data_line_len, @@ -1519,8 +1522,10 @@ def _verify_list(self, data): is not None and data_line[index] is not None: # this is a cell id. verify that it contains the # correct number of integers - model_grid = self.data_dimensions.get_model_grid() - cellid_size = model_grid.get_num_spatial_coordinates() + if cellid_size is None: + model_grid = self.data_dimensions.get_model_grid() + cellid_size = model_grid.\ + get_num_spatial_coordinates() if len(data_line[index]) != cellid_size: message = 'Cellid "{}" contains {} integer(s). ' \ 'Expected a cellid containing {} ' \ diff --git a/flopy/mf6/data/mffileaccess.py b/flopy/mf6/data/mffileaccess.py index 1aca8008c7..6ec6491497 100644 --- a/flopy/mf6/data/mffileaccess.py +++ b/flopy/mf6/data/mffileaccess.py @@ -826,15 +826,21 @@ def read_list_data_from_file(self, file_handle, storage, current_key, self._temp_dict = {} self._last_line_info = [] store_data = False - simple_line = False struct = self.structure + self.simple_line = \ + len(self._data_dimensions.package_dim.get_tsnames()) == 0 and \ + not struct.is_mname + for data_item in struct.data_item_structures: + if data_item.optional and data_item.name != 'boundname' and \ + data_item.name != 'aux': + self.simple_line = False if current_line is None: current_line = file_handle.readline() arr_line = PyListUtil.split_data_line(current_line) line_num = 0 try: - simple_line, data_line = self._load_list_line( + data_line = self._load_list_line( storage, arr_line, line_num, data_loaded, True, current_key=current_key, data_line=data_line)[1:] line_num += 1 @@ -901,6 +907,9 @@ def read_list_data_from_file(self, file_handle, storage, current_key, # loop until end of block line = ' ' + optional_line_info = [] + line_info_processed = False + data_structs = struct.data_item_structures while line != '': line = file_handle.readline() arr_line = PyListUtil.split_data_line(line) @@ -936,7 +945,20 @@ def read_list_data_from_file(self, file_handle, storage, current_key, True) storage.data_dimensions.unlock() return data_rec - if simple_line and struct.num_optional == 0: + if self.simple_line: + line_len = len(self._last_line_info) + if struct.num_optional > 0 and not line_info_processed: + line_info_processed = True + for index, data_item in \ + enumerate(struct.data_item_structures): + if index < line_len: + if data_item.optional: + self._last_line_info = \ + self._last_line_info[:index] + line_len = len(self._last_line_info) + optional_line_info.append(data_item) + else: + optional_line_info.append(data_item) if MFComment.is_comment(arr_line, True): arr_line.insert(0, '\n') @@ -946,18 +968,14 @@ def read_list_data_from_file(self, file_handle, storage, current_key, self._data_line = () cellid_index = 0 cellid_tuple = () + data_index = 0 for index, entry in enumerate(self._last_line_info): for sub_entry in entry: if sub_entry[1] is not None: - data_structs = struct.data_item_structures if sub_entry[2] > 0: # is a cellid - cell_num = convert_data( - arr_line[sub_entry[0]], - self._data_dimensions, - sub_entry[1], - data_structs[index]) - cellid_tuple += (cell_num - 1,) + cellid_tuple += \ + (int(arr_line[sub_entry[0]]) - 1,) # increment index cellid_index += 1 if cellid_index == sub_entry[2]: @@ -974,13 +992,46 @@ def read_list_data_from_file(self, file_handle, storage, current_key, data_structs[index]),) else: self._data_line += (None,) - data_loaded.append(self._data_line) + data_index = sub_entry[0] + arr_line_len = len(arr_line) + if arr_line_len > data_index + 1: + # more data on the end of the line. see if it can + # be loaded as optional data + data_index += 1 + for data_item in struct.data_item_structures[ + len(self._last_line_info):]: + if arr_line_len <= data_index: + break + if arr_line[data_index][0] == '#': + break + elif data_item.name == 'aux': + data_index, self._data_line, \ + more_data_expected = \ + self._process_aux( + storage, arr_line, arr_line_len, + data_item, data_index, None, + current_key, self._data_line, False) + elif data_item.name == 'boundnames' and \ + self._data_dimensions.package_dim.\ + boundnames(): + self._data_line += (convert_data( + arr_line[data_index], + self._data_dimensions, + data_item.type, + data_item),) + if arr_line_len > data_index + 1: + # FEATURE: Keep number of white space characters used in + # comments section + storage.comments[line_num] = MFComment( + ' '.join(arr_line[data_index + 1:]), struct.path, + self._simulation_data, line_num) + data_loaded.append(self._data_line) else: try: data_line = self._load_list_line( storage, arr_line, line_num, data_loaded, False, - current_key=current_key, data_line=data_line)[2] + current_key=current_key, data_line=data_line)[1] except Exception as ex: comment = 'Unable to process line {} of data list: ' \ '"{}"'.format(line_num + 1, line) @@ -1011,7 +1062,6 @@ def _load_list_line(self, storage, arr_line, line_num, data_loaded, data_item_ks = None struct = self.structure org_data_line = data_line - simple_line = True # only initialize if we are at the start of a new line if data_index_start == 0: data_set = struct @@ -1020,7 +1070,7 @@ def _load_list_line(self, storage, arr_line, line_num, data_loaded, # determine if at end of block if arr_line and arr_line[0][:3].upper() == 'END': self.enabled = True - return 0, simple_line, data_line + return 0, data_line data_index = data_index_start arr_line_len = len(arr_line) if MFComment.is_comment(arr_line, True) and data_index_start == 0: @@ -1045,16 +1095,16 @@ def _load_list_line(self, storage, arr_line, line_num, data_loaded, not storage.in_model: if data_item.type == DatumType.keyword: data_index += 1 - simple_line = False + self.simple_line = False elif data_item.type == DatumType.record: # this is a record within a record, recurse into # _load_line to load it - data_index, simple_line, data_line = \ + data_index, data_line = \ self._load_list_line( storage, arr_line, line_num, data_loaded, build_type_list, current_key, data_index, data_item, False, data_line=data_line) - simple_line = False + self.simple_line = False elif data_item.name != 'boundname' or \ self._data_dimensions.package_dim.boundnames(): if data_item.optional and data == '#': @@ -1106,7 +1156,7 @@ def _load_list_line(self, storage, arr_line, line_num, data_loaded, data = arr_line[data_index] repeat_count += 1 if data_item.type == DatumType.keystring: - simple_line = False + self.simple_line = False if repeat_count <= 1: # only process the # keyword on the first repeat find # data item associated with correct @@ -1265,14 +1315,14 @@ def _load_list_line(self, storage, arr_line, line_num, data_loaded, # keep reading data until eoln more_data_expected = \ (data_index < arr_line_len) - simple_line = simple_line and \ + self.simple_line = self.simple_line and \ not unknown_repeats and \ - len(data_item.shape) == 0 + (len(data_item.shape) == 0 or + data_item.is_cellid) var_index += 1 # populate unused optional variables with None type for data_item in data_set.data_item_structures[var_index:]: - simple_line = False if data_item.name == 'aux': data_line = self._process_aux( storage, arr_line, arr_line_len, data_item, data_index, @@ -1294,10 +1344,11 @@ def _load_list_line(self, storage, arr_line, line_num, data_loaded, ' '.join(arr_line[data_index+1:]), struct.path, self._simulation_data, line_num) data_loaded.append(data_line) - return data_index, simple_line, data_line + return data_index, data_line - def _process_aux(self, storage, arr_line, arr_line_len, data_item, data_index, - var_index, current_key, data_line): + def _process_aux(self, storage, arr_line, arr_line_len, data_item, + data_index, var_index, current_key, data_line, + add_to_last_line=True): aux_var_names = self._data_dimensions.package_dim.get_aux_variables() more_data_expected = False if aux_var_names is not None: @@ -1308,23 +1359,25 @@ def _process_aux(self, storage, arr_line, arr_line_len, data_item, data_index, data_index, more_data_expected, data_line = \ self._append_data_list( storage, data_item, None, 0, data_index, - var_index, 1, current_key, data_line)[0:3] + var_index, 1, current_key, data_line, + add_to_last_line)[0:3] else: # read in aux variables data_index, more_data_expected, data_line = \ self._append_data_list( storage, data_item, arr_line, arr_line_len, data_index, var_index, 0, current_key, - data_line)[0:3] + data_line, add_to_last_line)[0:3] return data_index, data_line, more_data_expected def _append_data_list(self, storage, data_item, arr_line, arr_line_len, data_index, var_index, repeat_count, current_key, - data_line): + data_line, add_to_last_line=True): # append to a 2-D list which will later be converted to a numpy # rec array struct = self.structure - self._last_line_info.append([]) + if add_to_last_line: + self._last_line_info.append([]) if data_item.is_cellid or (data_item.possible_cellid and storage._validate_cellid( arr_line, data_index)): @@ -1352,8 +1405,10 @@ def _append_data_list(self, storage, data_item, arr_line, arr_line_len, # special case where cellid is 'none', store as tuple of # 'none's cellid_tuple = ('none',) * cellid_size - self._last_line_info[-1].append([data_index, DatumType.string, - cellid_size]) + if add_to_last_line: + self._last_line_info[-1].append([data_index, + data_item.type, + cellid_size]) new_index = data_index + 1 else: # handle regular cellid @@ -1397,8 +1452,10 @@ def _append_data_list(self, storage, data_item, arr_line, arr_line_len, self._data_dimensions, data_item.type) cellid_tuple = cellid_tuple + (int(data_converted) - 1,) - self._last_line_info[-1].append([index, DatumType.integer, - cellid_size]) + if add_to_last_line: + self._last_line_info[-1].append([index, + data_item.type, + cellid_size]) new_index = data_index + cellid_size data_line = data_line + (cellid_tuple,) if data_item.shape is not None and len(data_item.shape) > 0 and \ @@ -1414,7 +1471,8 @@ def _append_data_list(self, storage, data_item, arr_line, arr_line_len, else: if arr_line is None: data_converted = None - self._last_line_info[-1].append([data_index, None, 0]) + if add_to_last_line: + self._last_line_info[-1].append([data_index, None, 0]) else: if arr_line[data_index].lower() in \ self._data_dimensions.package_dim.get_tsnames(): @@ -1423,15 +1481,17 @@ def _append_data_list(self, storage, data_item, arr_line, arr_line_len, # override recarray data type to support writing # string values storage.override_data_type(var_index, object) - self._last_line_info[-1].append([data_index, - DatumType.string, 0]) + if add_to_last_line: + self._last_line_info[-1].append([data_index, + DatumType.string, 0]) else: data_converted = convert_data(arr_line[data_index], self._data_dimensions, data_item.type, data_item) - self._last_line_info[-1].append([data_index, - data_item.type, 0]) + if add_to_last_line: + self._last_line_info[-1].append([data_index, + data_item.type, 0]) data_line = data_line + (data_converted,) more_data_expected, unknown_repeats = \ storage.resolve_shape_list( diff --git a/flopy/mf6/data/mfstructure.py b/flopy/mf6/data/mfstructure.py index fbffd56b27..46cc66f898 100644 --- a/flopy/mf6/data/mfstructure.py +++ b/flopy/mf6/data/mfstructure.py @@ -1209,7 +1209,6 @@ def __init__(self, data_item, model_data, package_type, dfn_list): self.name_length = len(self.name) self.is_aux = data_item.is_aux self.is_boundname = data_item.is_boundname - self.is_mname = data_item.is_mname self.name_list = data_item.name_list self.python_name = data_item.python_name self.longname = data_item.longname @@ -1246,6 +1245,13 @@ def __init__(self, data_item, model_data, package_type, dfn_list): self.expected_data_items[data_item.name] = len( self.expected_data_items) + @property + def is_mname(self): + for item in self.data_item_structures: + if item.is_mname: + return True + return False + def get_item(self, item_name): for item in self.data_item_structures: if item.name.lower() == item_name.lower(): diff --git a/flopy/mf6/modflow/mfsimulation.py b/flopy/mf6/modflow/mfsimulation.py index d7ad1fb81c..696f322556 100644 --- a/flopy/mf6/modflow/mfsimulation.py +++ b/flopy/mf6/modflow/mfsimulation.py @@ -173,6 +173,7 @@ def __init__(self, path): self.fast_write = True self.comments_on = False self.auto_set_sizes = True + self.verify_data = True self.debug = False self.verbose = True self.verbosity_level = VerbosityLevel.normal @@ -460,7 +461,8 @@ def model_names(self): @classmethod def load(cls, sim_name='modflowsim', version='mf6', exe_name='mf6.exe', - sim_ws='.', strict=True, verbosity_level=1, load_only=None): + sim_ws='.', strict=True, verbosity_level=1, load_only=None, + verify_data=True): """ Load an existing model. @@ -491,6 +493,8 @@ def load(cls, sim_name='modflowsim', version='mf6', exe_name='mf6.exe', setting. subpackages, like time series and observations, will also load regardless of this setting. example list: ['ic', 'maw', 'npf', 'oc', 'ims', 'gwf6-gwf6'] + verify_data : bool + verify data when it is loaded. this can slow down loading Returns ------- @@ -503,6 +507,7 @@ def load(cls, sim_name='modflowsim', version='mf6', exe_name='mf6.exe', # initialize instance = cls(sim_name, version, exe_name, sim_ws, verbosity_level) verbosity_level = instance.simulation_data.verbosity_level + instance.simulation_data.verify_data = verify_data if verbosity_level.value >= VerbosityLevel.normal.value: print('loading simulation...') From 1326a3bcb6bfe254bea3648166092cf7d115aee4 Mon Sep 17 00:00:00 2001 From: spaulins-usgs Date: Tue, 5 Nov 2019 07:56:38 -0800 Subject: [PATCH 2/2] fix(formatting) --- flopy/mf6/data/mffileaccess.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/flopy/mf6/data/mffileaccess.py b/flopy/mf6/data/mffileaccess.py index 6ec6491497..ecebc82571 100644 --- a/flopy/mf6/data/mffileaccess.py +++ b/flopy/mf6/data/mffileaccess.py @@ -1005,12 +1005,12 @@ def read_list_data_from_file(self, file_handle, storage, current_key, if arr_line[data_index][0] == '#': break elif data_item.name == 'aux': - data_index, self._data_line, \ - more_data_expected = \ + data_index, self._data_line = \ self._process_aux( storage, arr_line, arr_line_len, data_item, data_index, None, - current_key, self._data_line, False) + current_key, self._data_line, + False)[0:2] elif data_item.name == 'boundnames' and \ self._data_dimensions.package_dim.\ boundnames(): @@ -1020,8 +1020,8 @@ def read_list_data_from_file(self, file_handle, storage, current_key, data_item.type, data_item),) if arr_line_len > data_index + 1: - # FEATURE: Keep number of white space characters used in - # comments section + # FEATURE: Keep number of white space characters used + # in comments section storage.comments[line_num] = MFComment( ' '.join(arr_line[data_index + 1:]), struct.path, self._simulation_data, line_num)