Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def parse_operations(operation_list):

@staticmethod
def prep_data(df):
""" Replace all n/a entries in the data frame by np.NaN for processing.
""" Make a copy and replace all n/a entries in the data frame by np.NaN for processing.

Parameters:
df (DataFrame) - The DataFrame to be processed.
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/base_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):

"""

return df
return df.copy()

@staticmethod
def _check_list_type(param_value, param_type):
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/factor_column_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
DataFrame: A new DataFrame with the factor columns appended.
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/merge_consecutive_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
Dataframe: A new dataframe after processing.
Expand Down
13 changes: 7 additions & 6 deletions hed/tools/remodeling/operations/remap_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
Dataframe: A new dataframe after processing.
Expand All @@ -109,12 +109,13 @@ def do_op(self, dispatcher, df, name, sidecar=None):
- If ignore_missing is false and source values from the data are not in the map.

"""
df[self.source_columns] = df[self.source_columns].replace(np.NaN, 'n/a')
df1 = df.copy()
df1[self.source_columns] = df1[self.source_columns].replace(np.NaN, 'n/a')
for column in self.integer_sources:
int_mask = df[column] != 'n/a'
df.loc[int_mask, column] = df.loc[int_mask, column].astype(int)
df[self.source_columns] = df[self.source_columns].astype(str)
df_new, missing = self.key_map.remap(df)
int_mask = df1[column] != 'n/a'
df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int)
df1[self.source_columns] = df1[self.source_columns].astype(str)
df_new, missing = self.key_map.remap(df1)
if missing and not self.ignore_missing:
raise ValueError("MapSourceValueMissing",
f"{name}: Ignore missing is false, but source values [{missing}] are in data but not map")
Expand Down
9 changes: 5 additions & 4 deletions hed/tools/remodeling/operations/remove_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
Dataframe: A new dataframe after processing.
Expand All @@ -58,10 +58,11 @@ def do_op(self, dispatcher, df, name, sidecar=None):
- If ignore_missing is False and a column not in the data is to be removed.

"""

df_new = df.copy()
try:
return df.drop(self.column_names, axis=1, errors=self.error_handling)
return df_new.drop(self.column_names, axis=1, errors=self.error_handling)
except KeyError:
raise KeyError("MissingColumnCannotBeRemoved",
f"{name}: Ignore missing is False but a column in {str(self.column_names)} is "
f"not in the data columns [{str(df.columns)}]")
f"not in the data columns [{str(df_new.columns)}]")
return df_new
12 changes: 6 additions & 6 deletions hed/tools/remodeling/operations/remove_rows_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
Dataframe: A new dataframe after processing.

"""

if self.column_name not in df.columns:
return df
df_new = df.copy()
if self.column_name not in df_new.columns:
return df_new
for value in self.remove_values:
df = df.loc[df[self.column_name] != value, :]
return df
df_new = df_new.loc[df_new[self.column_name] != value, :]
return df_new
6 changes: 3 additions & 3 deletions hed/tools/remodeling/operations/rename_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
Dataframe: A new dataframe after processing.
Expand All @@ -58,9 +58,9 @@ def do_op(self, dispatcher, df, name, sidecar=None):
- When ignore_missing is false and column_mapping has columns not in the data.

"""

df_new = df.copy()
try:
return df.rename(columns=self.column_mapping, errors=self.error_handling)
return df_new.rename(columns=self.column_mapping, errors=self.error_handling)
except KeyError:
raise KeyError("MappedColumnsMissingFromData",
f"{name}: ignore_missing is False, mapping columns [{self.column_mapping}]"
Expand Down
14 changes: 7 additions & 7 deletions hed/tools/remodeling/operations/reorder_columns_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
Dataframe: A new dataframe after processing.
Expand All @@ -57,17 +57,17 @@ def do_op(self, dispatcher, df, name, sidecar=None):
- When ignore_missing is false and column_order has columns not in the data.

"""

current_columns = list(df.columns)
missing_columns = set(self.column_order).difference(set(df.columns))
df_new = df.copy()
current_columns = list(df_new.columns)
missing_columns = set(self.column_order).difference(set(df_new.columns))
ordered = self.column_order
if missing_columns and not self.ignore_missing:
raise ValueError("MissingReorderedColumns",
f"{str(missing_columns)} are not in dataframe columns "
f" [{str(df.columns)}] and not ignored.")
f" [{str(df_new.columns)}] and not ignored.")
elif missing_columns:
ordered = [elem for elem in self.column_order if elem not in list(missing_columns)]
if self.keep_others:
ordered += [elem for elem in current_columns if elem not in ordered]
df = df.loc[:, ordered]
return df
df_new = df_new.loc[:, ordered]
return df_new
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/split_rows_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
Dataframe: A new dataframe after processing.
Expand Down
12 changes: 6 additions & 6 deletions hed/tools/remodeling/operations/summarize_column_names_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,28 +49,28 @@ def __init__(self, parameters):
self.append_timecode = parameters.get('append_timecode', False)

def do_op(self, dispatcher, df, name, sidecar=None):
""" Create factor columns corresponding to values in a specified column.
""" Create a column name summary for df.

Parameters:
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
DataFrame: A new DataFrame with the factor columns appended.
DataFrame: A copy of df.

Side-effect:
Updates the relevant summary.

"""

df_new = df.copy()
summary = dispatcher.summary_dicts.get(self.summary_name, None)
if not summary:
summary = ColumnNameSummary(self)
dispatcher.summary_dicts[self.summary_name] = summary
summary.update_summary({"name": name, "column_names": list(df.columns)})
return df
summary.update_summary({"name": name, "column_names": list(df_new.columns)})
return df_new


class ColumnNameSummary(BaseSummary):
Expand Down
13 changes: 7 additions & 6 deletions hed/tools/remodeling/operations/summarize_column_values_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,28 +65,29 @@ def __init__(self, parameters):
self.values_per_line = parameters.get('values_per_line', self.VALUES_PER_LINE)

def do_op(self, dispatcher, df, name, sidecar=None):
""" Create factor columns corresponding to values in a specified column.
""" Create a summary of the column values in df.

Parameters:
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.
sidecar (Sidecar or file-like): Not needed for this operation.

Returns:
DataFrame: A new DataFrame with the factor columns appended.
DataFrame: A copy of df.

Side-effect:
Updates the relevant summary.

"""


df_new = df.copy()
summary = dispatcher.summary_dicts.get(self.summary_name, None)
if not summary:
summary = ColumnValueSummary(self)
dispatcher.summary_dicts[self.summary_name] = summary
summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name})
return df
summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name})
return df_new


class ColumnValueSummary(BaseSummary):
Expand Down
7 changes: 4 additions & 3 deletions hed/tools/remodeling/operations/summarize_definitions_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,18 @@ def do_op(self, dispatcher, df, name, sidecar=None):
sidecar (Sidecar or file-like): Only needed for HED operations.

Returns:
DataFrame: the same datafarme
DataFrame: a copy of df

Side-effect:
Updates the relevant summary.

"""
df_new = df.copy()
summary = dispatcher.summary_dicts.setdefault(self.summary_name,
DefinitionSummary(self, dispatcher.hed_schema))
summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name, 'sidecar': sidecar,
summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name, 'sidecar': sidecar,
'schema': dispatcher.hed_schema})
return df
return df_new


class DefinitionSummary(BaseSummary):
Expand Down
11 changes: 6 additions & 5 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(self, parameters):
self.expand_context = parameters.get('expand_context', False)

def do_op(self, dispatcher, df, name, sidecar=None):
""" Create factor columns corresponding to values in a specified column.
""" Summarize the HED tags present in the dataset.

Parameters:
dispatcher (Dispatcher): Manages the operation I/O.
Expand All @@ -72,19 +72,20 @@ def do_op(self, dispatcher, df, name, sidecar=None):
sidecar (Sidecar or file-like): Only needed for HED operations.

Returns:
DataFrame: A new DataFrame with the factor columns appended.
DataFrame: A copy of df.

Side-effect:
Updates the context.

"""
df_new = df.copy()
summary = dispatcher.summary_dicts.get(self.summary_name, None)
if not summary:
summary = HedTagSummary(self)
dispatcher.summary_dicts[self.summary_name] = summary
summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name,
summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name,
'schema': dispatcher.hed_schema, 'sidecar': sidecar})
return df
return df_new


class HedTagSummary(BaseSummary):
Expand All @@ -100,7 +101,7 @@ def update_summary(self, new_info):
Parameters:
new_info (dict): A dictionary with the parameters needed to update a summary.

Notes:
Notes:
- The summary needs a "name" str, a "schema", a "df, and a "Sidecar".

"""
Expand Down
7 changes: 4 additions & 3 deletions hed/tools/remodeling/operations/summarize_hed_type_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,20 @@ def do_op(self, dispatcher, df, name, sidecar=None):
sidecar (Sidecar or file-like): Usually required unless event file has a HED column.

Returns:
DataFrame: Input DataFrame, unchanged.
DataFrame: A copy of df

Side-effect:
Updates the relevant summary.

"""
df_new = df.copy()
summary = dispatcher.summary_dicts.get(self.summary_name, None)
if not summary:
summary = HedTypeSummary(self)
dispatcher.summary_dicts[self.summary_name] = summary
summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name,
summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name,
'schema': dispatcher.hed_schema, 'sidecar': sidecar})
return df
return df_new


class HedTypeSummary(BaseSummary):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class SummarizeHedValidationOp(BaseOp):
"operation": "summarize_hed_validation",
"required_parameters": {
"summary_name": str,
"summary_filename": str
"summary_filename": str
},
"optional_parameters": {
"append_timecode": bool,
Expand Down Expand Up @@ -64,19 +64,20 @@ def do_op(self, dispatcher, df, name, sidecar=None):
sidecar (Sidecar or file-like): Usually needed unless only HED tags in HED column of event file.

Returns:
DataFrame: Input DataFrame, unchanged.
DataFrame: A copy of df

Side-effect:
Updates the relevant summary.

"""
df_new = df.copy()
summary = dispatcher.summary_dicts.get(self.summary_name, None)
if not summary:
summary = HedValidationSummary(self)
dispatcher.summary_dicts[self.summary_name] = summary
summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name,
summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name,
'schema': dispatcher.hed_schema, 'sidecar': sidecar})
return df
return df_new


class HedValidationSummary(BaseSummary):
Expand Down
Loading