From 44e4a5db1b19a03d748f4d2dcc4a1e59cc1b680a Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Wed, 19 Nov 2025 09:59:05 -0800
Subject: [PATCH 01/11] add return type hints

---
 openfecli/commands/gather.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index 65da35fe0..80098351c 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -254,7 +254,7 @@ def _generate_bad_legs_error_message(bad_legs: list[tuple[set[str], tuple[str]]]
     return msg
 
 
-def _get_ddgs(legs: dict, allow_partial=False) -> None:
+def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]:
     import numpy as np
 
     from openfe.protocols.openmm_rfe.equil_rfe_methods import (
@@ -319,7 +319,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> None:
     return DDGs
 
 
-def _generate_ddg(legs: dict, allow_partial: bool) -> None:
+def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame:
     """Compute and write out DDG values for the given legs.
 
     Parameters
@@ -348,7 +348,7 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> None:
     return df
 
 
-def _generate_raw(legs: dict, allow_partial=True) -> None:
+def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame:
     """
     Write out all legs found and their DG values, or indicate that they have failed.
 
@@ -394,7 +394,7 @@ def _check_legs_have_sufficient_repeats(legs):
                 sys.exit(1)
 
 
-def _generate_dg_mle(legs: dict, allow_partial: bool) -> None:
+def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame:
     """Compute and write out DG values for the given legs.
 
     Parameters

From 7f70c4d8953eb943af6453957f66aedd598d1074 Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Wed, 19 Nov 2025 11:31:55 -0800
Subject: [PATCH 02/11] first pass at using pd dataframes

---
 openfecli/commands/gather.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index 80098351c..fdedc75e1 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -254,7 +254,7 @@ def _generate_bad_legs_error_message(bad_legs: list[tuple[set[str], tuple[str]]]
     return msg
 
 
-def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]:
+def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame:
     import numpy as np
 
     from openfe.protocols.openmm_rfe.equil_rfe_methods import (
@@ -263,7 +263,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]:
 
     # TODO: if there's a failed edge but other valid results in a leg, ddgs will be computed
     # only fails if there are no valid results
-    DDGs = []
+    data = []
     bad_legs = []
     for ligpair, vals in sorted(legs.items()):
         leg_types = set(vals)
@@ -299,9 +299,9 @@ def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]:
 
         if not do_rbfe and not do_rhfe:
             bad_legs.append((*ligpair, leg_types))
-            DDGs.append((*ligpair, None, None, None, None))
+            data.append((*ligpair, None, None, None, None))
         else:
-            DDGs.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc))
+            data.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc))
 
     if bad_legs:
         err_msg = _generate_bad_legs_error_message(bad_legs)
@@ -316,7 +316,11 @@ def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]:
             )
             click.secho(err_msg, err=True, fg="red")
             sys.exit(1)
-    return DDGs
+    df_ddg = pd.DataFrame(
+        data,
+        columns=["ligand_i", "ligand_j", "DDG_bind", "bind_unc", "DDG_hyd", "hyd_unc"],
+    )
+    return df_ddg
 
 
 def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame:
@@ -332,14 +336,15 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame:
     """
     DDGs = _get_ddgs(legs, allow_partial=allow_partial)
     data = []
-    for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs:
-        if DDGbind is not None:
+    for _, row in DDGs.iterrows():
+        ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc = row.to_list()
+        if not pd.isna(DDGbind):
             DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc)
             data.append((ligA, ligB, DDGbind, bind_unc))
-        if DDGhyd is not None:
+        if not pd.isna(DDGhyd):
             DDGhyd, hyd_unc = format_estimate_uncertainty(DDGhyd, hyd_unc)
             data.append((ligA, ligB, DDGhyd, hyd_unc))
-        elif DDGbind is None and DDGhyd is None:
+        elif pd.isna(DDGbind) and pd.isna(DDGhyd):
             data.append((ligA, ligB, FAIL_STR, FAIL_STR))
     df = pd.DataFrame(
         data,
@@ -419,12 +424,13 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame:
     g = nx.DiGraph()
     nm_to_idx = {}
     DDGbind_count = 0
-    for ligA, ligB, DDGbind, bind_unc, _, _ in DDGs:
+    for _, row in DDGs.iterrows():
+        ligA, ligB, DDGbind, bind_unc, _, _ = row.to_list()
         for lig in (ligA, ligB):
             if lig not in expected_ligs:
                 expected_ligs.append(lig)
 
-        if DDGbind is None or DDGbind == FAIL_STR:
+        if pd.isna(DDGbind) or DDGbind == FAIL_STR:
             continue
         DDGbind_count += 1
 

From 79383e25dcf2f621e4b42fc48d479a0b8e0e3ebe Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Wed, 19 Nov 2025 12:42:39 -0800
Subject: [PATCH 03/11] add helper function

---
 openfecli/commands/gather.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index fdedc75e1..77b35b41b 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -80,6 +80,40 @@ def format_estimate_uncertainty(
     return est_str, unc_str
 
 
+def format_df_with_precision(
+    df: pd.DataFrame, est_col_name: str, unc_col_name: str, precision: int = 1
+) -> pd.DataFrame:
+    """
+    Formats the columns `est_col_name` and `unc_col_name` as strings reported to the given precision.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        _description_
+    est_col_name : str
+        _description_
+    unc_col_name : str
+        _description_
+    precision : int, optional
+        _description_, by default 1
+
+    Returns
+    -------
+    pd.DataFrame
+        _description_
+    """
+    df_out = df.copy()  # we don't want to modify the original df
+    df_out[["DG (kcal/mol)", "uncertainty (kcal/mol)"]] = df_out.apply(
+        lambda row: format_estimate_uncertainty(
+            est=row[est_col_name], unc=row[unc_col_name], unc_prec=precision
+        ),
+        axis=1,
+        result_type="expand",
+    )
+
+    return df_out
+
+
 def is_results_json(fpath: os.PathLike | str) -> bool:
     """Sanity check that file is a result json before we try to deserialize"""
     return "estimate" in open(fpath, "r").read(20)

From 961d7bccec4d515f42365661d9b31fa00b8ef7c0 Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Wed, 19 Nov 2025 16:29:10 -0800
Subject: [PATCH 04/11] it works but is ugly

---
 openfecli/commands/gather.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index 77b35b41b..32e6160c7 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -102,15 +102,24 @@ def format_df_with_precision(
     pd.DataFrame
         _description_
     """
+
+    def format_results_entries(est, unc, unc_prec):
+        if isinstance(est, float) and isinstance(unc, float):
+            return format_estimate_uncertainty(est=est, unc=unc, unc_prec=unc_prec)
+        else:
+            return est, unc
+
     df_out = df.copy()  # we don't want to modify the original df
-    df_out[["DG (kcal/mol)", "uncertainty (kcal/mol)"]] = df_out.apply(
-        lambda row: format_estimate_uncertainty(
+
+    # only_floats =  df_out[est_col_name].apply(lambda x: isinstance(x,float))
+
+    df_out[[est_col_name, unc_col_name]] = df_out.apply(
+        lambda row: format_results_entries(
             est=row[est_col_name], unc=row[unc_col_name], unc_prec=precision
         ),
         axis=1,
         result_type="expand",
     )
-
     return df_out
 
 
@@ -373,10 +382,8 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame:
     for _, row in DDGs.iterrows():
         ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc = row.to_list()
         if not pd.isna(DDGbind):
-            DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc)
             data.append((ligA, ligB, DDGbind, bind_unc))
         if not pd.isna(DDGhyd):
-            DDGhyd, hyd_unc = format_estimate_uncertainty(DDGhyd, hyd_unc)
             data.append((ligA, ligB, DDGhyd, hyd_unc))
         elif pd.isna(DDGbind) and pd.isna(DDGhyd):
             data.append((ligA, ligB, FAIL_STR, FAIL_STR))
@@ -384,7 +391,8 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame:
         data,
         columns=["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)", "uncertainty (kcal/mol)"],
     )
-    return df
+    df_out = format_df_with_precision(df, "DDG(i->j) (kcal/mol)", "uncertainty (kcal/mol)")
+    return df_out
 
 
 def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame:
@@ -406,7 +414,7 @@ def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame:
                     if m is None:
                         m, u = FAIL_STR, FAIL_STR
                     else:
-                        m, u = format_estimate_uncertainty(m.m, u.m)
+                        m, u = (m.m, u.m)
                     data.append((simtype, ligpair[0], ligpair[1], m, u))
 
     df = pd.DataFrame(
@@ -419,7 +427,9 @@ def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame:
             "MBAR uncertainty (kcal/mol)",
         ],
     )
-    return df
+    df_out = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)")
+
+    return df_out
 
 
 def _check_legs_have_sufficient_repeats(legs):
@@ -516,7 +526,6 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame:
 
     data = []
     for ligA, DG, unc_DG in MLEs:
-        DG, unc_DG = format_estimate_uncertainty(DG, unc_DG)
         data.append({"ligand": ligA, "DG(MLE) (kcal/mol)": DG, "uncertainty (kcal/mol)": unc_DG})
         expected_ligs.remove(ligA)
 
@@ -524,7 +533,9 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame:
         data.append({"ligand": ligA, "DG(MLE) (kcal/mol)": FAIL_STR, "uncertainty (kcal/mol)": FAIL_STR})  # fmt: skip
 
     df = pd.DataFrame(data)
-    return df
+    df_out = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)")
+
+    return df_out
 
 
 def _collect_result_jsons(results: List[os.PathLike | str]) -> List[pathlib.Path]:

From fa812ccbcc235097123492b3800c899d224c3104 Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Wed, 19 Nov 2025 16:34:41 -0800
Subject: [PATCH 05/11] fix copy pasta

---
 openfecli/commands/gather.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index 32e6160c7..8e85faa31 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -533,7 +533,7 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame:
         data.append({"ligand": ligA, "DG(MLE) (kcal/mol)": FAIL_STR, "uncertainty (kcal/mol)": FAIL_STR})  # fmt: skip
 
     df = pd.DataFrame(data)
-    df_out = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)")
+    df_out = format_df_with_precision(df, "DG(MLE) (kcal/mol)", "uncertainty (kcal/mol)")
 
     return df_out
 

From a717611142263cb975a6be2fd0438e74f7819016 Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Wed, 19 Nov 2025 17:50:43 -0800
Subject: [PATCH 06/11] do it better with pandas

---
 openfecli/commands/gather.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index 8e85faa31..8b0cd178d 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -81,7 +81,7 @@ def format_estimate_uncertainty(
 
 
 def format_df_with_precision(
-    df: pd.DataFrame, est_col_name: str, unc_col_name: str, precision: int = 1
+    df: pd.DataFrame, est_col_name: str, unc_col_name: str, unc_prec: int = 1
 ) -> pd.DataFrame:
     """
     Formats the columns `est_col_name` and `unc_col_name` as strings reported to the given precision.
@@ -103,24 +103,19 @@ def format_df_with_precision(
         _description_
     """
 
-    def format_results_entries(est, unc, unc_prec):
-        if isinstance(est, float) and isinstance(unc, float):
-            return format_estimate_uncertainty(est=est, unc=unc, unc_prec=unc_prec)
-        else:
-            return est, unc
-
-    df_out = df.copy()  # we don't want to modify the original df
-
-    # only_floats =  df_out[est_col_name].apply(lambda x: isinstance(x,float))
-
-    df_out[[est_col_name, unc_col_name]] = df_out.apply(
-        lambda row: format_results_entries(
-            est=row[est_col_name], unc=row[unc_col_name], unc_prec=precision
-        ),
+    float_mask = df[est_col_name].apply(lambda x: isinstance(x, float))
+    df_floats_formatted = df[float_mask].apply(
+        lambda row: format_estimate_uncertainty(row[est_col_name], row[unc_col_name], unc_prec),
         axis=1,
         result_type="expand",
     )
-    return df_out
+    df[[est_col_name, unc_col_name]] = df[[est_col_name, unc_col_name]].astype(str)
+    if df_floats_formatted.empty:
+        pass
+    else:
+        df.loc[float_mask, [est_col_name, unc_col_name]] = df_floats_formatted.values
+
+    return df
 
 
 def is_results_json(fpath: os.PathLike | str) -> bool:

From 75b0f239d4262c64cd94cee6a71a3edcb2a57f77 Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Wed, 19 Nov 2025 17:52:56 -0800
Subject: [PATCH 07/11] docstrings

---
 openfecli/commands/gather.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index 8b0cd178d..a5e93f7e0 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -102,14 +102,18 @@ def format_df_with_precision(
     pd.DataFrame
         _description_
     """
-
+    # we only want to round/format the floats (not any error strings getting passed through)
     float_mask = df[est_col_name].apply(lambda x: isinstance(x, float))
     df_floats_formatted = df[float_mask].apply(
         lambda row: format_estimate_uncertainty(row[est_col_name], row[unc_col_name], unc_prec),
         axis=1,
         result_type="expand",
     )
+
+    # explicitly cast to string to make pandas happy
     df[[est_col_name, unc_col_name]] = df[[est_col_name, unc_col_name]].astype(str)
+
+    # if there are no floats, assigning an empty array will break things
     if df_floats_formatted.empty:
         pass
     else:

From cafa5ef61d0f5e3a84af77e99ed3a222cb9b3bfa Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Thu, 20 Nov 2025 09:34:32 -0800
Subject: [PATCH 08/11] var names

---
 openfecli/commands/gather.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index a5e93f7e0..adbd44da0 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -305,7 +305,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame:
 
     # TODO: if there's a failed edge but other valid results in a leg, ddgs will be computed
     # only fails if there are no valid results
-    data = []
+    DDGs = []
     bad_legs = []
     for ligpair, vals in sorted(legs.items()):
         leg_types = set(vals)
@@ -341,9 +341,9 @@ def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame:
 
         if not do_rbfe and not do_rhfe:
             bad_legs.append((*ligpair, leg_types))
-            data.append((*ligpair, None, None, None, None))
+            DDGs.append((*ligpair, None, None, None, None))
         else:
-            data.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc))
+            DDGs.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc))
 
     if bad_legs:
         err_msg = _generate_bad_legs_error_message(bad_legs)
@@ -359,7 +359,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame:
             click.secho(err_msg, err=True, fg="red")
             sys.exit(1)
     df_ddg = pd.DataFrame(
-        data,
+        DDGs,
         columns=["ligand_i", "ligand_j", "DDG_bind", "bind_unc", "DDG_hyd", "hyd_unc"],
     )
     return df_ddg

From c498b8f62f9d5c14885ac2a607fd01e7e97ce99b Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Fri, 21 Nov 2025 10:46:25 -0800
Subject: [PATCH 09/11] add docstring

---
 openfecli/commands/gather.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index adbd44da0..af01cb4c5 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -84,23 +84,32 @@ def format_df_with_precision(
     df: pd.DataFrame, est_col_name: str, unc_col_name: str, unc_prec: int = 1
 ) -> pd.DataFrame:
     """
-    Formats the columns `est_col_name` and `unc_col_name` as strings reported to the given precision.
+    Returns a new DataFrame with the columns `est_col_name` and `unc_col_name` formatted as strings reported to `unc_prec` precision.
+
+    The uncertainty column will be rounded to `unc_prec` precision, then the estimate column will be reported to the same precision.
 
     Parameters
     ----------
     df : pd.DataFrame
-        _description_
+        DataFrame to format
     est_col_name : str
-        _description_
+        Name of the column containing estimates to format.
     unc_col_name : str
-        _description_
-    precision : int, optional
-        _description_, by default 1
+        Name of the column containing uncertainties to format.
+
+    unc_prec : int, optional
+        Precision to round the uncertainty column to, by default 1.
 
     Returns
     -------
     pd.DataFrame
-        _description_
+        DataFrame with formatted uncertainty and estimate columns.
+
+    Example
+    -------
+
+
+
     """
     # we only want to round/format the floats (not any error strings getting passed through)
     float_mask = df[est_col_name].apply(lambda x: isinstance(x, float))

From d791fabb201ba8ca1b815bdbab8389b8f02c60f9 Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Fri, 21 Nov 2025 10:56:29 -0800
Subject: [PATCH 10/11] add example to docstring

---
 openfecli/commands/gather.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index af01cb4c5..ca78c33b5 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -87,6 +87,7 @@ def format_df_with_precision(
     Returns a new DataFrame with the columns `est_col_name` and `unc_col_name` formatted as strings reported to `unc_prec` precision.
 
     The uncertainty column will be rounded to `unc_prec` precision, then the estimate column will be reported to the same precision.
+    Any entries that are not floats (such as strings indicating errors), will not be modified.
 
     Parameters
     ----------
@@ -107,8 +108,29 @@ def format_df_with_precision(
 
     Example
     -------
-
-
+    >>> df
+            ligand_i    ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
+        0  lig_ejm_31  lig_ejm_42                Error                  Error
+        1  lig_ejm_31  lig_ejm_46                -0.89                   0.06
+        2  lig_ejm_31  lig_ejm_47                  0.0                    0.1
+        3  lig_ejm_31  lig_ejm_48                 0.61                   0.09
+        4  lig_ejm_31  lig_ejm_50                 1.00                   0.04
+        5  lig_ejm_42  lig_ejm_43                  1.4                    0.2
+        6  lig_ejm_46  lig_jmc_23                 0.29                   0.09
+        7  lig_ejm_46  lig_jmc_27                 -0.1                    0.1
+        8  lig_ejm_46  lig_jmc_28                Error                  Error
+    >>> df_formatted = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)")
+    >>> df_formatted
+            ligand_i    ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
+        0  lig_ejm_31  lig_ejm_42                Error                  Error
+        1  lig_ejm_31  lig_ejm_46                -0.89                   0.06
+        2  lig_ejm_31  lig_ejm_47                  0.0                    0.1
+        3  lig_ejm_31  lig_ejm_48                 0.61                   0.09
+        4  lig_ejm_31  lig_ejm_50                 1.00                   0.04
+        5  lig_ejm_42  lig_ejm_43                  1.4                    0.2
+        6  lig_ejm_46  lig_jmc_23                 0.29                   0.09
+        7  lig_ejm_46  lig_jmc_27                 -0.1                    0.1
+        8  lig_ejm_46  lig_jmc_28                Error                  Error
 
     """
     # we only want to round/format the floats (not any error strings getting passed through)

From b5a79c69e8b48fb9f83c49810e5af90927741f59 Mon Sep 17 00:00:00 2001
From: Alyssa Travitz <alyssa.travitz@omsf.io>
Date: Fri, 21 Nov 2025 12:16:39 -0800
Subject: [PATCH 11/11] make string check more rigorous

---
 openfecli/commands/gather.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index ca78c33b5..489367cb0 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -111,15 +111,15 @@ def format_df_with_precision(
     >>> df
             ligand_i    ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
         0  lig_ejm_31  lig_ejm_42                Error                  Error
-        1  lig_ejm_31  lig_ejm_46                -0.89                   0.06
-        2  lig_ejm_31  lig_ejm_47                  0.0                    0.1
-        3  lig_ejm_31  lig_ejm_48                 0.61                   0.09
-        4  lig_ejm_31  lig_ejm_50                 1.00                   0.04
-        5  lig_ejm_42  lig_ejm_43                  1.4                    0.2
-        6  lig_ejm_46  lig_jmc_23                 0.29                   0.09
-        7  lig_ejm_46  lig_jmc_27                 -0.1                    0.1
+        1  lig_ejm_31  lig_ejm_46            -0.891077               0.064825
+        2  lig_ejm_31  lig_ejm_47             0.023341               0.145625
+        3  lig_ejm_31  lig_ejm_48             0.614103               0.088704
+        4  lig_ejm_31  lig_ejm_50             0.999904               0.044457
+        5  lig_ejm_42  lig_ejm_43             1.354348               0.156009
+        6  lig_ejm_46  lig_jmc_23             0.294761               0.086632
+        7  lig_ejm_46  lig_jmc_27            -0.101737               0.100997
         8  lig_ejm_46  lig_jmc_28                Error                  Error
-    >>> df_formatted = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)")
+    >>> df_out = format_df_with_precision(df, "DDG(i->j) (kcal/mol)", "uncertainty (kcal/mol)")
     >>> df_formatted
             ligand_i    ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
         0  lig_ejm_31  lig_ejm_42                Error                  Error
@@ -133,9 +133,15 @@ def format_df_with_precision(
         8  lig_ejm_46  lig_jmc_28                Error                  Error
 
     """
-    # we only want to round/format the floats (not any error strings getting passed through)
-    float_mask = df[est_col_name].apply(lambda x: isinstance(x, float))
-    df_floats_formatted = df[float_mask].apply(
+
+    # find all entries in both columns that contain strings:
+    df_is_string = df[[est_col_name, unc_col_name]].applymap(lambda x: isinstance(x, str))
+
+    # if either the estimate or uncertainty entries are strings, dont format
+    no_strings_mask = ~(df_is_string[est_col_name] | df_is_string[unc_col_name])
+
+    # skip rows that contain striangs and only round and format numerical vals
+    df_floats_formatted = df[no_strings_mask].apply(
         lambda row: format_estimate_uncertainty(row[est_col_name], row[unc_col_name], unc_prec),
         axis=1,
         result_type="expand",
@@ -148,7 +154,7 @@ def format_df_with_precision(
     if df_floats_formatted.empty:
         pass
     else:
-        df.loc[float_mask, [est_col_name, unc_col_name]] = df_floats_formatted.values
+        df.loc[no_strings_mask, [est_col_name, unc_col_name]] = df_floats_formatted.values
 
     return df