From 744a237a9fdda1e316ba2ef97d2a178fca003eca Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 12 Jun 2024 15:23:35 -0500
Subject: [PATCH 1/3] optimize KeyMap:_remap

---
 hed/tools/analysis/key_map.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py
index 39a0ccf19..62b9bd25f 100644
--- a/hed/tools/analysis/key_map.py
+++ b/hed/tools/analysis/key_map.py
@@ -2,6 +2,7 @@
 
 
 import pandas as pd
+import numpy as np
 from hed.errors.exceptions import HedFileError
 from hed.tools.util import data_util
 
@@ -128,27 +129,32 @@ def remap(self, data):
         return df_new, missing_indices
 
     def _remap(self, df):
-        """ Utility method that iterates through dataframes to do the remapping.
+        """ Utility method that does the remapping
 
         Parameters:
             df (DataFrame):    DataFrame in which to perform the mapping.
 
         Returns:
             list:  The row numbers that had no correspondence in the mapping.
-
         """
+        key_series = df.apply(lambda row: data_util.get_row_hash(row, self.key_cols), axis=1)
+
+        # Add a column containing the mapped index for each row
+        map_series = pd.Series(self.map_dict)
+        key_values = key_series.map(map_series)
+        merged_df = df.assign(key_value=key_values.values)
+
+        # Add new columns with the updated values
+        remapped_df = pd.merge(merged_df, self.col_map, left_on='key_value', right_index=True,
+                               suffixes=('', '_new'), how='left').fillna("n/a")
+
+        # Override the original columns with our newly calculated ones
+        for col in self.target_cols:
+            df[col] = remapped_df[col + '_new']
+
+        # Finally calculate missing indices
+        missing_indices = key_series.index[key_values.isna()].tolist()
 
-        missing_indices = []
-        for index, row in df.iterrows():
-            key = data_util.get_row_hash(row, self.key_cols)
-            key_value = self.map_dict.get(key, None)
-            if key_value is not None:
-                result = self.col_map.iloc[key_value]
-                row[self.target_cols] = result[self.target_cols].values
-                new_index = df.index.get_loc(index)  # In case index and location don't agree.
-                df.iloc[new_index] = row
-            else:
-                missing_indices.append(index)
         return missing_indices
 
     def resort(self):

From 51c54a22946bf17030a2bc2c68c68b820bfe2fc6 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 12 Jun 2024 15:24:21 -0500
Subject: [PATCH 2/3] Remove extraneous import

---
 hed/tools/analysis/key_map.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py
index 62b9bd25f..160279942 100644
--- a/hed/tools/analysis/key_map.py
+++ b/hed/tools/analysis/key_map.py
@@ -2,7 +2,6 @@
 
 
 import pandas as pd
-import numpy as np
 from hed.errors.exceptions import HedFileError
 from hed.tools.util import data_util
 

From 69e627594429662dc42ec95e0b757acb44a0f359 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 12 Jun 2024 16:32:27 -0500
Subject: [PATCH 3/3] Add some more clarifying comments

---
 hed/tools/analysis/key_map.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py
index 160279942..21dc7137b 100644
--- a/hed/tools/analysis/key_map.py
+++ b/hed/tools/analysis/key_map.py
@@ -1,6 +1,5 @@
 """ A map of column value keys into new column values. """
 
-
 import pandas as pd
 from hed.errors.exceptions import HedFileError
 from hed.tools.util import data_util
@@ -18,6 +17,7 @@ class KeyMap:
     The remapping does not support other types of columns.
 
     """
+
     def __init__(self, key_cols, target_cols=None, name=''):
         """ Information for remapping columns of tabular files.
 
@@ -137,13 +137,17 @@ def _remap(self, df):
             list:  The row numbers that had no correspondence in the mapping.
         """
         key_series = df.apply(lambda row: data_util.get_row_hash(row, self.key_cols), axis=1)
+        # Key series now contains row_number: hash for each row in the dataframe
 
         # Add a column containing the mapped index for each row
-        map_series = pd.Series(self.map_dict)
-        key_values = key_series.map(map_series)
+        map_series = pd.Series(self.map_dict)  # map_series is hash:row_index for each entry in the map_dict index
+        key_values = key_series.map(map_series)  # key_values is df_row_number:map_dict_index
+        # e.g. a key_value entry of 0:79 means row 0 maps to row 79 in the map_dict
+
+        # This adds the map_dict_index column, to merged_df as a new column "key_value"
         merged_df = df.assign(key_value=key_values.values)
 
-        # Add new columns with the updated values
+        # Copy all the map_dict data into merged_df as new columns, merging on the map_dict_index number of both
         remapped_df = pd.merge(merged_df, self.col_map, left_on='key_value', right_index=True,
                                suffixes=('', '_new'), how='left').fillna("n/a")