From dae19f9e8fe5e5be617da24f0d0ff33095a4faf4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 19:20:23 +0000 Subject: [PATCH 1/4] Initial plan From ecdf3335cbe48fdd31fabf9e6a67d6b5e6b2f9b4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 19:26:59 +0000 Subject: [PATCH 2/4] Implement number_rows_op and number_groups_op functionality Co-authored-by: VisLab <1189050+VisLab@users.noreply.github.com> --- .../remodeling/operations/number_groups_op.py | 61 +++++++++++++++++-- .../remodeling/operations/number_rows_op.py | 34 +++++++---- 2 files changed, 77 insertions(+), 18 deletions(-) diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index 2fc7a8b31..23059369f 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -1,13 +1,26 @@ -""" Implementation in progress. """ +""" Number groups of rows in a dataframe based on start and stop markers. """ +import numpy as np from hed.tools.remodeling.operations.base_op import BaseOp -# TODO: This class is under development - - class NumberGroupsOp(BaseOp): - """ Implementation in progress. """ + """ Number groups of rows in a dataframe based on start and stop markers. + + Required remodeling parameters: + - **number_column_name** (*str*): The name of the column to add with the group numbers. + - **source_column** (*str*): The column to check for start and stop markers. + - **start** (*dict*): Specification for start markers. + - **values** (*list*): List of values that mark the start of a group. + - **inclusion** (*str*): Either "include" or "exclude" to specify whether the start marker row should be included in the group. + - **stop** (*dict*): Specification for stop markers. + - **values** (*list*): List of values that mark the end of a group. + - **inclusion** (*str*): Either "include" or "exclude" to specify whether the stop marker row should be included in the group. + + Optional remodeling parameters: + - **overwrite** (*bool*): If true, overwrite an existing column with the same name. + + """ NAME = "number_groups" PARAMS = { @@ -83,7 +96,7 @@ def __init__(self, parameters): self.overwrite = parameters.get('overwrite', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Add numbers to groups of events in dataframe. + """ Add numbers to groups of rows in the events dataframe. Parameters: dispatcher (Dispatcher): Manages the operation I/O. @@ -124,6 +137,42 @@ def do_op(self, dispatcher, df, name, sidecar=None): f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}") df_new = df.copy() + df_new[self.number_column_name] = np.nan + + # Track current group number and whether we're inside a group + current_group = 0 + in_group = False + + for idx in range(len(df_new)): + value = df_new.iloc[idx][self.source_column] + + # Check if this is a start marker + if value in self.start['values']: + if not in_group: # Start a new group only if not already in one + current_group += 1 + in_group = True + if self.start['inclusion'] == 'include': + df_new.at[idx, self.number_column_name] = current_group + # If already in a group and this is a start marker: + # - If inclusion is 'exclude', it acts as both end and start + elif self.start['inclusion'] == 'exclude': + # This marker ends the previous group and starts a new one + current_group += 1 + # Don't assign the number to this row (it's excluded) + continue + + # Check if this is a stop marker + if value in self.stop['values']: + if in_group: + if self.stop['inclusion'] == 'include': + df_new.at[idx, self.number_column_name] = current_group + in_group = False + continue + + # Regular row - if in group, assign current group number + if in_group: + df_new.at[idx, self.number_column_name] = current_group + return df_new @staticmethod diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index e122be27e..ef3d572f6 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -1,12 +1,22 @@ -""" Implementation in progress. """ +""" Number rows in a dataframe based on optional criteria. """ +import numpy as np from hed.tools.remodeling.operations.base_op import BaseOp -# TODO: This class is under development - class NumberRowsOp(BaseOp): - """ Implementation in progress. """ + """ Number rows in a dataframe based on optional criteria. + + Required remodeling parameters: + - **number_column_name** (*str*): The name of the column to add with the row numbers. + + Optional remodeling parameters: + - **overwrite** (*bool*): If true, overwrite an existing column with the same name. + - **match_value** (*dict*): If provided, only number rows where the specified column matches the specified value. + - **column** (*str*): The column name to match. + - **value** (*str* or *number*): The value to match. + + """ NAME = "number_rows" PARAMS = { @@ -51,7 +61,7 @@ def __init__(self, parameters): self.match_value = parameters.get('match_value', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Add numbers events dataframe. + """ Add numbers to rows in the events dataframe. Parameters: dispatcher (Dispatcher): Manages operation I/O. @@ -78,13 +88,13 @@ def do_op(self, dispatcher, df, name, sidecar=None): f"{self.match_value['column']}.", "") df_new = df.copy() - # df_new[self.number_column_name] = np.nan - # if self.match_value: - # filter = df[self.match_value['column']] == self.match_value['value'] - # numbers = [*range(1, sum(filter)+1)] - # df_new.loc[filter, self.number_column_name] = numbers - # else: - # df_new[self.number_column_name] = df_new.index + 1 + df_new[self.number_column_name] = np.nan + if self.match_value: + filter = df[self.match_value['column']] == self.match_value['value'] + numbers = [*range(1, sum(filter)+1)] + df_new.loc[filter, self.number_column_name] = numbers + else: + df_new[self.number_column_name] = df_new.index + 1 return df_new From 8138b9bde11af2a6efaa97d85cf12e6d5295e63f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 19:30:59 +0000 Subject: [PATCH 3/4] Uncomment test assertions and fix number_groups_op source column handling Co-authored-by: VisLab <1189050+VisLab@users.noreply.github.com> --- .../remodeling/operations/number_groups_op.py | 3 +- .../operations/test_number_groups.py | 106 +++++++++--------- .../operations/test_number_rows_op.py | 42 +++---- 3 files changed, 80 insertions(+), 71 deletions(-) diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index 23059369f..0421e4b05 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -144,7 +144,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): in_group = False for idx in range(len(df_new)): - value = df_new.iloc[idx][self.source_column] + # Use the original df to read source values in case we're overwriting the source column + value = df.iloc[idx][self.source_column] # Check if this is a start marker if value in self.start['values']: diff --git a/tests/tools/remodeling/operations/test_number_groups.py b/tests/tools/remodeling/operations/test_number_groups.py index 6aa4cad9f..092c5d618 100644 --- a/tests/tools/remodeling/operations/test_number_groups.py +++ b/tests/tools/remodeling/operations/test_number_groups.py @@ -1,5 +1,7 @@ from copy import deepcopy import json +import numpy as np +import pandas as pd import unittest from hed.tools.remodeling.operations.number_groups_op import NumberGroupsOp @@ -151,54 +153,58 @@ def tearDownClass(cls): # test working def test_number_groups_new_column(self): - pass # Test when new column name is given with overwrite unspecified (=False) - # parms = json.loads(self.json_parms) - # op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # - # self.assertTrue(list(df_new.columns) == list(self.numbered_columns), - # "numbered_events should have the expected columns") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(np.nanmax(df_new["number"]) == 5.0, - # "max value in numbered_events should match the number of groups") - # - # # fill na to match postprocessing dispatcher - # df_new = df_new.fillna('n/a') - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - # - # # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "number_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "number_rows should not change the input df values") - # - # def test_existing_column_overwrite_true(self): - # # Test when existing column name is given with overwrite True - # parms = json.loads(self.json_overwrite_true_parms) - # op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_check = pd.DataFrame(self.overwritten_data, columns=self.existing_sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # - # self.assertTrue(list(df_new.columns) == list(self.existing_sample_columns), - # "numbered_events should have the same columns as original dataframe in case of overwrite") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(np.nanmax(df_new["number"]) == 5.0, - # "max value in numbered_events should match the number of groups") - # df_new = df_new.fillna('n/a') - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - # - # # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "split_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "split_rows should not change the input df values") + parms = json.loads(self.json_parms) + op = NumberGroupsOp(parms) + df = pd.DataFrame(self.sample_data, columns=self.sample_columns) + df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns) + df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) + df_new = op.do_op(self.dispatcher, df_test, self.file_name) + + self.assertTrue(list(df_new.columns) == list(self.numbered_columns), + "numbered_events should have the expected columns") + self.assertTrue(len(df_new) == len(df_test), + "numbered_events should have same length as original dataframe") + self.assertTrue(np.nanmax(df_new["number"]) == 5.0, + "max value in numbered_events should match the number of groups") + + # fill na to match postprocessing dispatcher + df_new = df_new.fillna('n/a') + self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), + "numbered_events should not differ from check") + + # Test that df has not been changed by the op + self.assertTrue(list(df.columns) == list(df_test.columns), + "number_rows should not change the input df columns") + self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), + "number_rows should not change the input df values") + + def test_existing_column_overwrite_true(self): + # Test when existing column name is given with overwrite True + parms = json.loads(self.json_overwrite_true_parms) + op = NumberGroupsOp(parms) + df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) + df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) + df_check = pd.DataFrame(self.overwritten_data, columns=self.existing_sample_columns) + df_new = op.do_op(self.dispatcher, df_test, self.file_name) + + self.assertTrue(list(df_new.columns) == list(self.existing_sample_columns), + "numbered_events should have the same columns as original dataframe in case of overwrite") + self.assertTrue(len(df_new) == len(df_test), + "numbered_events should have same length as original dataframe") + self.assertTrue(np.nanmax(df_new["number"]) == 5.0, + "max value in numbered_events should match the number of groups") + df_new = df_new.fillna('n/a') + self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), + "numbered_events should not differ from check") + + # Test that df has not been changed by the op + self.assertTrue(list(df.columns) == list(df_test.columns), + "split_rows should not change the input df columns") + self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), + "split_rows should not change the input df values") + + +if __name__ == '__main__': + unittest.main() + diff --git a/tests/tools/remodeling/operations/test_number_rows_op.py b/tests/tools/remodeling/operations/test_number_rows_op.py index 26cf50acc..6c446e872 100644 --- a/tests/tools/remodeling/operations/test_number_rows_op.py +++ b/tests/tools/remodeling/operations/test_number_rows_op.py @@ -1,4 +1,6 @@ import json +import numpy as np +import pandas as pd import unittest from hed.tools.remodeling.operations.number_rows_op import NumberRowsOp @@ -178,26 +180,26 @@ def test_number_rows_new_column(self): parms = json.loads(self.json_parms) op = NumberRowsOp(parms) self.assertIsInstance(op, NumberRowsOp) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(df_check.columns), - # "numbered_events should have the expected columns") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(all([i + 1 == value for (i, value) in enumerate(df_new[parms['number_column_name']])]), - # "event should be numbered consecutively from 1 to length of the dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "number_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "number_rows should not change the input df values") + df = pd.DataFrame(self.sample_data, columns=self.sample_columns) + df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns) + df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) + df_new = op.do_op(self.dispatcher, df_test, self.file_name) + df_new = df_new.fillna('n/a') + + self.assertTrue(list(df_new.columns) == list(df_check.columns), + "numbered_events should have the expected columns") + self.assertTrue(len(df_new) == len(df_test), + "numbered_events should have same length as original dataframe") + self.assertTrue(all([i + 1 == value for (i, value) in enumerate(df_new[parms['number_column_name']])]), + "event should be numbered consecutively from 1 to length of the dataframe") + self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), + "numbered_events should not differ from check") + + # Test that df has not been changed by the op + self.assertTrue(list(df.columns) == list(df_test.columns), + "number_rows should not change the input df columns") + self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), + "number_rows should not change the input df values") if __name__ == '__main__': From c205231873e8826928446c0bda91797a43ef4b93 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 19:33:27 +0000 Subject: [PATCH 4/4] Fix flake8 linting issues in number_rows_op and number_groups_op Co-authored-by: VisLab <1189050+VisLab@users.noreply.github.com> --- .../remodeling/operations/number_groups_op.py | 92 ++++++++++++------- .../remodeling/operations/number_rows_op.py | 47 ++++++---- 2 files changed, 90 insertions(+), 49 deletions(-) diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index 0421e4b05..6c8bfb0b5 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -5,20 +5,30 @@ class NumberGroupsOp(BaseOp): - """ Number groups of rows in a dataframe based on start and stop markers. + """ Number groups of rows in a dataframe based on start and stop + markers. Required remodeling parameters: - - **number_column_name** (*str*): The name of the column to add with the group numbers. - - **source_column** (*str*): The column to check for start and stop markers. + - **number_column_name** (*str*): The name of the column to add + with the group numbers. + - **source_column** (*str*): The column to check for start and + stop markers. - **start** (*dict*): Specification for start markers. - - **values** (*list*): List of values that mark the start of a group. - - **inclusion** (*str*): Either "include" or "exclude" to specify whether the start marker row should be included in the group. + - **values** (*list*): List of values that mark the start of + a group. + - **inclusion** (*str*): Either "include" or "exclude" to + specify whether the start marker row should be included in + the group. - **stop** (*dict*): Specification for stop markers. - - **values** (*list*): List of values that mark the end of a group. - - **inclusion** (*str*): Either "include" or "exclude" to specify whether the stop marker row should be included in the group. + - **values** (*list*): List of values that mark the end of + a group. + - **inclusion** (*str*): Either "include" or "exclude" to + specify whether the stop marker row should be included in + the group. Optional remodeling parameters: - - **overwrite** (*bool*): If true, overwrite an existing column with the same name. + - **overwrite** (*bool*): If true, overwrite an existing column + with the same name. """ NAME = "number_groups" @@ -101,82 +111,100 @@ def do_op(self, dispatcher, df, name, sidecar=None): Parameters: dispatcher (Dispatcher): Manages the operation I/O. df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. + name (str): Unique identifier for the dataframe -- often the + original file path. sidecar (Sidecar or file-like): Only needed for HED operations. Returns: Dataframe: A new dataframe after processing. """ - # check if number_column_name exists and if so, check overwrite setting + # check if number_column_name exists and if so, check overwrite + # setting if self.number_column_name in df.columns: if self.overwrite is False: - raise ValueError("ExistingNumberColumn", - f"Column {self.number_column_name} already exists in event file.", "") + raise ValueError( + "ExistingNumberColumn", + f"Column {self.number_column_name} already exists " + f"in event file.", "") # check if source_column exists if self.source_column not in df.columns: - raise ValueError("MissingSourceColumn", - f"Column {self.source_column} does not exist in event file {name}.", "") + raise ValueError( + "MissingSourceColumn", + f"Column {self.source_column} does not exist in event " + f"file {name}.", "") - # check if all elements in value lists start and stop exist in the source_column + # check if all elements in value lists start and stop exist in + # the source_column missing = [] for element in self.start['values']: if element not in df[self.source_column].tolist(): missing.append(element) if len(missing) > 0: - raise ValueError("MissingValue", - f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}") + raise ValueError( + "MissingValue", + f"Start value(s) {missing} does not exist in " + f"{self.source_column} of event file {name}") missing = [] for element in self.stop['values']: if element not in df[self.source_column].tolist(): missing.append(element) if len(missing) > 0: - raise ValueError("MissingValue", - f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}") + raise ValueError( + "MissingValue", + f"Start value(s) {missing} does not exist in " + f"{self.source_column} of event file {name}") df_new = df.copy() df_new[self.number_column_name] = np.nan - + # Track current group number and whether we're inside a group current_group = 0 in_group = False - + for idx in range(len(df_new)): - # Use the original df to read source values in case we're overwriting the source column + # Use the original df to read source values in case we're + # overwriting the source column value = df.iloc[idx][self.source_column] - + # Check if this is a start marker if value in self.start['values']: - if not in_group: # Start a new group only if not already in one + if not in_group: # Start a new group only if not already + # in one current_group += 1 in_group = True if self.start['inclusion'] == 'include': - df_new.at[idx, self.number_column_name] = current_group + df_new.at[idx, self.number_column_name] = \ + current_group # If already in a group and this is a start marker: - # - If inclusion is 'exclude', it acts as both end and start + # - If inclusion is 'exclude', it acts as both end and + # start elif self.start['inclusion'] == 'exclude': - # This marker ends the previous group and starts a new one + # This marker ends the previous group and starts a + # new one current_group += 1 # Don't assign the number to this row (it's excluded) continue - + # Check if this is a stop marker if value in self.stop['values']: if in_group: if self.stop['inclusion'] == 'include': - df_new.at[idx, self.number_column_name] = current_group + df_new.at[idx, self.number_column_name] = \ + current_group in_group = False continue - + # Regular row - if in group, assign current group number if in_group: df_new.at[idx, self.number_column_name] = current_group - + return df_new @staticmethod def validate_input_data(parameters): - """ Additional validation required of operation parameters not performed by JSON schema validator. """ + """ Additional validation required of operation parameters not + performed by JSON schema validator. """ return [] diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index ef3d572f6..fb327a68c 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -8,11 +8,14 @@ class NumberRowsOp(BaseOp): """ Number rows in a dataframe based on optional criteria. Required remodeling parameters: - - **number_column_name** (*str*): The name of the column to add with the row numbers. + - **number_column_name** (*str*): The name of the column to add + with the row numbers. Optional remodeling parameters: - - **overwrite** (*bool*): If true, overwrite an existing column with the same name. - - **match_value** (*dict*): If provided, only number rows where the specified column matches the specified value. + - **overwrite** (*bool*): If true, overwrite an existing column + with the same name. + - **match_value** (*dict*): If provided, only number rows where + the specified column matches the specified value. - **column** (*str*): The column name to match. - **value** (*str* or *number*): The value to match. @@ -66,8 +69,9 @@ def do_op(self, dispatcher, df, name, sidecar=None): Parameters: dispatcher (Dispatcher): Manages operation I/O. df (DataFrame): - The DataFrame to be remodeled. - name (str): - Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. + name (str): - Unique identifier for the dataframe -- often + the original file path. + sidecar (Sidecar or file-like): Only needed for HED operations. Returns: Dataframe: A new dataframe after processing. @@ -75,24 +79,32 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ if self.number_column_name in df.columns: if self.overwrite is False: - raise ValueError("ExistingNumberColumn", - f"Column {self.number_column_name} already exists in event file.", "") + raise ValueError( + "ExistingNumberColumn", + f"Column {self.number_column_name} already exists " + f"in event file.", "") if self.match_value: if self.match_value['column'] not in df.columns: - raise ValueError("MissingMatchColumn", - f"Column {self.match_value['column']} does not exist in event file.", "") - if self.match_value['value'] not in df[self.match_value['column']].tolist(): - raise ValueError("MissingMatchValue", - f"Value {self.match_value['value']} does not exist in event file column" - f"{self.match_value['column']}.", "") + raise ValueError( + "MissingMatchColumn", + f"Column {self.match_value['column']} does not " + f"exist in event file.", "") + if self.match_value['value'] not in \ + df[self.match_value['column']].tolist(): + raise ValueError( + "MissingMatchValue", + f"Value {self.match_value['value']} does not exist " + f"in event file column " + f"{self.match_value['column']}.", "") df_new = df.copy() df_new[self.number_column_name] = np.nan if self.match_value: - filter = df[self.match_value['column']] == self.match_value['value'] - numbers = [*range(1, sum(filter)+1)] - df_new.loc[filter, self.number_column_name] = numbers + filter_mask = \ + df[self.match_value['column']] == self.match_value['value'] + numbers = [*range(1, sum(filter_mask)+1)] + df_new.loc[filter_mask, self.number_column_name] = numbers else: df_new[self.number_column_name] = df_new.index + 1 @@ -100,5 +112,6 @@ def do_op(self, dispatcher, df, name, sidecar=None): @staticmethod def validate_input_data(parameters): - """ Additional validation required of operation parameters not performed by JSON schema validator. """ + """ Additional validation required of operation parameters not + performed by JSON schema validator. """ return []