From 44e4a5db1b19a03d748f4d2dcc4a1e59cc1b680a Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Wed, 19 Nov 2025 09:59:05 -0800 Subject: [PATCH 01/11] add return type hints --- openfecli/commands/gather.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 65da35fe0..80098351c 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -254,7 +254,7 @@ def _generate_bad_legs_error_message(bad_legs: list[tuple[set[str], tuple[str]]] return msg -def _get_ddgs(legs: dict, allow_partial=False) -> None: +def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]: import numpy as np from openfe.protocols.openmm_rfe.equil_rfe_methods import ( @@ -319,7 +319,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> None: return DDGs -def _generate_ddg(legs: dict, allow_partial: bool) -> None: +def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame: """Compute and write out DDG values for the given legs. Parameters @@ -348,7 +348,7 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> None: return df -def _generate_raw(legs: dict, allow_partial=True) -> None: +def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame: """ Write out all legs found and their DG values, or indicate that they have failed. @@ -394,7 +394,7 @@ def _check_legs_have_sufficient_repeats(legs): sys.exit(1) -def _generate_dg_mle(legs: dict, allow_partial: bool) -> None: +def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame: """Compute and write out DG values for the given legs. Parameters From 7f70c4d8953eb943af6453957f66aedd598d1074 Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Wed, 19 Nov 2025 11:31:55 -0800 Subject: [PATCH 02/11] first pass at using pd dataframes --- openfecli/commands/gather.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 80098351c..fdedc75e1 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -254,7 +254,7 @@ def _generate_bad_legs_error_message(bad_legs: list[tuple[set[str], tuple[str]]] return msg -def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]: +def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame: import numpy as np from openfe.protocols.openmm_rfe.equil_rfe_methods import ( @@ -263,7 +263,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]: # TODO: if there's a failed edge but other valid results in a leg, ddgs will be computed # only fails if there are no valid results - DDGs = [] + data = [] bad_legs = [] for ligpair, vals in sorted(legs.items()): leg_types = set(vals) @@ -299,9 +299,9 @@ def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]: if not do_rbfe and not do_rhfe: bad_legs.append((*ligpair, leg_types)) - DDGs.append((*ligpair, None, None, None, None)) + data.append((*ligpair, None, None, None, None)) else: - DDGs.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc)) + data.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc)) if bad_legs: err_msg = _generate_bad_legs_error_message(bad_legs) @@ -316,7 +316,11 @@ def _get_ddgs(legs: dict, allow_partial=False) -> list[tuple]: ) click.secho(err_msg, err=True, fg="red") sys.exit(1) - return DDGs + df_ddg = pd.DataFrame( + data, + columns=["ligand_i", "ligand_j", "DDG_bind", "bind_unc", "DDG_hyd", "hyd_unc"], + ) + return df_ddg def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame: @@ -332,14 +336,15 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame: """ DDGs = _get_ddgs(legs, allow_partial=allow_partial) data = [] - for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: - if DDGbind is not None: + for _, row in DDGs.iterrows(): + ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc = row.to_list() + if not pd.isna(DDGbind): DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc) data.append((ligA, ligB, DDGbind, bind_unc)) - if DDGhyd is not None: + if not pd.isna(DDGhyd): DDGhyd, hyd_unc = format_estimate_uncertainty(DDGhyd, hyd_unc) data.append((ligA, ligB, DDGhyd, hyd_unc)) - elif DDGbind is None and DDGhyd is None: + elif pd.isna(DDGbind) and pd.isna(DDGhyd): data.append((ligA, ligB, FAIL_STR, FAIL_STR)) df = pd.DataFrame( data, @@ -419,12 +424,13 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame: g = nx.DiGraph() nm_to_idx = {} DDGbind_count = 0 - for ligA, ligB, DDGbind, bind_unc, _, _ in DDGs: + for _, row in DDGs.iterrows(): + ligA, ligB, DDGbind, bind_unc, _, _ = row.to_list() for lig in (ligA, ligB): if lig not in expected_ligs: expected_ligs.append(lig) - if DDGbind is None or DDGbind == FAIL_STR: + if pd.isna(DDGbind) or DDGbind == FAIL_STR: continue DDGbind_count += 1 From 79383e25dcf2f621e4b42fc48d479a0b8e0e3ebe Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Wed, 19 Nov 2025 12:42:39 -0800 Subject: [PATCH 03/11] add helper function --- openfecli/commands/gather.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index fdedc75e1..77b35b41b 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -80,6 +80,40 @@ def format_estimate_uncertainty( return est_str, unc_str +def format_df_with_precision( + df: pd.DataFrame, est_col_name: str, unc_col_name: str, precision: int = 1 +) -> pd.DataFrame: + """ + Formats the columns `est_col_name` and `unc_col_name` as strings reported to the given precision. + + Parameters + ---------- + df : pd.DataFrame + _description_ + est_col_name : str + _description_ + unc_col_name : str + _description_ + precision : int, optional + _description_, by default 1 + + Returns + ------- + pd.DataFrame + _description_ + """ + df_out = df.copy() # we don't want to modify the original df + df_out[["DG (kcal/mol)", "uncertainty (kcal/mol)"]] = df_out.apply( + lambda row: format_estimate_uncertainty( + est=row[est_col_name], unc=row[unc_col_name], unc_prec=precision + ), + axis=1, + result_type="expand", + ) + + return df_out + + def is_results_json(fpath: os.PathLike | str) -> bool: """Sanity check that file is a result json before we try to deserialize""" return "estimate" in open(fpath, "r").read(20) From 961d7bccec4d515f42365661d9b31fa00b8ef7c0 Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Wed, 19 Nov 2025 16:29:10 -0800 Subject: [PATCH 04/11] it works but is ugly --- openfecli/commands/gather.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 77b35b41b..32e6160c7 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -102,15 +102,24 @@ def format_df_with_precision( pd.DataFrame _description_ """ + + def format_results_entries(est, unc, unc_prec): + if isinstance(est, float) and isinstance(unc, float): + return format_estimate_uncertainty(est=est, unc=unc, unc_prec=unc_prec) + else: + return est, unc + df_out = df.copy() # we don't want to modify the original df - df_out[["DG (kcal/mol)", "uncertainty (kcal/mol)"]] = df_out.apply( - lambda row: format_estimate_uncertainty( + + # only_floats = df_out[est_col_name].apply(lambda x: isinstance(x,float)) + + df_out[[est_col_name, unc_col_name]] = df_out.apply( + lambda row: format_results_entries( est=row[est_col_name], unc=row[unc_col_name], unc_prec=precision ), axis=1, result_type="expand", ) - return df_out @@ -373,10 +382,8 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame: for _, row in DDGs.iterrows(): ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc = row.to_list() if not pd.isna(DDGbind): - DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc) data.append((ligA, ligB, DDGbind, bind_unc)) if not pd.isna(DDGhyd): - DDGhyd, hyd_unc = format_estimate_uncertainty(DDGhyd, hyd_unc) data.append((ligA, ligB, DDGhyd, hyd_unc)) elif pd.isna(DDGbind) and pd.isna(DDGhyd): data.append((ligA, ligB, FAIL_STR, FAIL_STR)) @@ -384,7 +391,8 @@ def _generate_ddg(legs: dict, allow_partial: bool) -> pd.DataFrame: data, columns=["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)", "uncertainty (kcal/mol)"], ) - return df + df_out = format_df_with_precision(df, "DDG(i->j) (kcal/mol)", "uncertainty (kcal/mol)") + return df_out def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame: @@ -406,7 +414,7 @@ def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame: if m is None: m, u = FAIL_STR, FAIL_STR else: - m, u = format_estimate_uncertainty(m.m, u.m) + m, u = (m.m, u.m) data.append((simtype, ligpair[0], ligpair[1], m, u)) df = pd.DataFrame( @@ -419,7 +427,9 @@ def _generate_raw(legs: dict, allow_partial=True) -> pd.DataFrame: "MBAR uncertainty (kcal/mol)", ], ) - return df + df_out = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)") + + return df_out def _check_legs_have_sufficient_repeats(legs): @@ -516,7 +526,6 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame: data = [] for ligA, DG, unc_DG in MLEs: - DG, unc_DG = format_estimate_uncertainty(DG, unc_DG) data.append({"ligand": ligA, "DG(MLE) (kcal/mol)": DG, "uncertainty (kcal/mol)": unc_DG}) expected_ligs.remove(ligA) @@ -524,7 +533,9 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame: data.append({"ligand": ligA, "DG(MLE) (kcal/mol)": FAIL_STR, "uncertainty (kcal/mol)": FAIL_STR}) # fmt: skip df = pd.DataFrame(data) - return df + df_out = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)") + + return df_out def _collect_result_jsons(results: List[os.PathLike | str]) -> List[pathlib.Path]: From fa812ccbcc235097123492b3800c899d224c3104 Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Wed, 19 Nov 2025 16:34:41 -0800 Subject: [PATCH 05/11] fix copy pasta --- openfecli/commands/gather.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 32e6160c7..8e85faa31 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -533,7 +533,7 @@ def _generate_dg_mle(legs: dict, allow_partial: bool) -> pd.DataFrame: data.append({"ligand": ligA, "DG(MLE) (kcal/mol)": FAIL_STR, "uncertainty (kcal/mol)": FAIL_STR}) # fmt: skip df = pd.DataFrame(data) - df_out = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)") + df_out = format_df_with_precision(df, "DG(MLE) (kcal/mol)", "uncertainty (kcal/mol)") return df_out From a717611142263cb975a6be2fd0438e74f7819016 Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Wed, 19 Nov 2025 17:50:43 -0800 Subject: [PATCH 06/11] do it better with pandas --- openfecli/commands/gather.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 8e85faa31..8b0cd178d 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -81,7 +81,7 @@ def format_estimate_uncertainty( def format_df_with_precision( - df: pd.DataFrame, est_col_name: str, unc_col_name: str, precision: int = 1 + df: pd.DataFrame, est_col_name: str, unc_col_name: str, unc_prec: int = 1 ) -> pd.DataFrame: """ Formats the columns `est_col_name` and `unc_col_name` as strings reported to the given precision. @@ -103,24 +103,19 @@ def format_df_with_precision( _description_ """ - def format_results_entries(est, unc, unc_prec): - if isinstance(est, float) and isinstance(unc, float): - return format_estimate_uncertainty(est=est, unc=unc, unc_prec=unc_prec) - else: - return est, unc - - df_out = df.copy() # we don't want to modify the original df - - # only_floats = df_out[est_col_name].apply(lambda x: isinstance(x,float)) - - df_out[[est_col_name, unc_col_name]] = df_out.apply( - lambda row: format_results_entries( - est=row[est_col_name], unc=row[unc_col_name], unc_prec=precision - ), + float_mask = df[est_col_name].apply(lambda x: isinstance(x, float)) + df_floats_formatted = df[float_mask].apply( + lambda row: format_estimate_uncertainty(row[est_col_name], row[unc_col_name], unc_prec), axis=1, result_type="expand", ) - return df_out + df[[est_col_name, unc_col_name]] = df[[est_col_name, unc_col_name]].astype(str) + if df_floats_formatted.empty: + pass + else: + df.loc[float_mask, [est_col_name, unc_col_name]] = df_floats_formatted.values + + return df def is_results_json(fpath: os.PathLike | str) -> bool: From 75b0f239d4262c64cd94cee6a71a3edcb2a57f77 Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Wed, 19 Nov 2025 17:52:56 -0800 Subject: [PATCH 07/11] docstrings --- openfecli/commands/gather.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 8b0cd178d..a5e93f7e0 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -102,14 +102,18 @@ def format_df_with_precision( pd.DataFrame _description_ """ - + # we only want to round/format the floats (not any error strings getting passed through) float_mask = df[est_col_name].apply(lambda x: isinstance(x, float)) df_floats_formatted = df[float_mask].apply( lambda row: format_estimate_uncertainty(row[est_col_name], row[unc_col_name], unc_prec), axis=1, result_type="expand", ) + + # explicitly cast to string to make pandas happy df[[est_col_name, unc_col_name]] = df[[est_col_name, unc_col_name]].astype(str) + + # if there are no floats, assigning an empty array will break things if df_floats_formatted.empty: pass else: From cafa5ef61d0f5e3a84af77e99ed3a222cb9b3bfa Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Thu, 20 Nov 2025 09:34:32 -0800 Subject: [PATCH 08/11] var names --- openfecli/commands/gather.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index a5e93f7e0..adbd44da0 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -305,7 +305,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame: # TODO: if there's a failed edge but other valid results in a leg, ddgs will be computed # only fails if there are no valid results - data = [] + DDGs = [] bad_legs = [] for ligpair, vals in sorted(legs.items()): leg_types = set(vals) @@ -341,9 +341,9 @@ def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame: if not do_rbfe and not do_rhfe: bad_legs.append((*ligpair, leg_types)) - data.append((*ligpair, None, None, None, None)) + DDGs.append((*ligpair, None, None, None, None)) else: - data.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc)) + DDGs.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc)) if bad_legs: err_msg = _generate_bad_legs_error_message(bad_legs) @@ -359,7 +359,7 @@ def _get_ddgs(legs: dict, allow_partial=False) -> pd.DataFrame: click.secho(err_msg, err=True, fg="red") sys.exit(1) df_ddg = pd.DataFrame( - data, + DDGs, columns=["ligand_i", "ligand_j", "DDG_bind", "bind_unc", "DDG_hyd", "hyd_unc"], ) return df_ddg From c498b8f62f9d5c14885ac2a607fd01e7e97ce99b Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Fri, 21 Nov 2025 10:46:25 -0800 Subject: [PATCH 09/11] add docstring --- openfecli/commands/gather.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index adbd44da0..af01cb4c5 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -84,23 +84,32 @@ def format_df_with_precision( df: pd.DataFrame, est_col_name: str, unc_col_name: str, unc_prec: int = 1 ) -> pd.DataFrame: """ - Formats the columns `est_col_name` and `unc_col_name` as strings reported to the given precision. + Returns a new DataFrame with the columns `est_col_name` and `unc_col_name` formatted as strings reported to `unc_prec` precision. + + The uncertainty column will be rounded to `unc_prec` precision, then the estimate column will be reported to the same precision. Parameters ---------- df : pd.DataFrame - _description_ + DataFrame to format est_col_name : str - _description_ + Name of the column containing estimates to format. unc_col_name : str - _description_ - precision : int, optional - _description_, by default 1 + Name of the column containing uncertainties to format. + + unc_prec : int, optional + Precision to round the uncertainty column to, by default 1. Returns ------- pd.DataFrame - _description_ + DataFrame with formatted uncertainty and estimate columns. + + Example + ------- + + + """ # we only want to round/format the floats (not any error strings getting passed through) float_mask = df[est_col_name].apply(lambda x: isinstance(x, float)) From d791fabb201ba8ca1b815bdbab8389b8f02c60f9 Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Fri, 21 Nov 2025 10:56:29 -0800 Subject: [PATCH 10/11] add example to docstring --- openfecli/commands/gather.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index af01cb4c5..ca78c33b5 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -87,6 +87,7 @@ def format_df_with_precision( Returns a new DataFrame with the columns `est_col_name` and `unc_col_name` formatted as strings reported to `unc_prec` precision. The uncertainty column will be rounded to `unc_prec` precision, then the estimate column will be reported to the same precision. + Any entries that are not floats (such as strings indicating errors), will not be modified. Parameters ---------- @@ -107,8 +108,29 @@ def format_df_with_precision( Example ------- - - + >>> df + ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol) + 0 lig_ejm_31 lig_ejm_42 Error Error + 1 lig_ejm_31 lig_ejm_46 -0.89 0.06 + 2 lig_ejm_31 lig_ejm_47 0.0 0.1 + 3 lig_ejm_31 lig_ejm_48 0.61 0.09 + 4 lig_ejm_31 lig_ejm_50 1.00 0.04 + 5 lig_ejm_42 lig_ejm_43 1.4 0.2 + 6 lig_ejm_46 lig_jmc_23 0.29 0.09 + 7 lig_ejm_46 lig_jmc_27 -0.1 0.1 + 8 lig_ejm_46 lig_jmc_28 Error Error + >>> df_formatted = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)") + >>> df_formatted + ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol) + 0 lig_ejm_31 lig_ejm_42 Error Error + 1 lig_ejm_31 lig_ejm_46 -0.89 0.06 + 2 lig_ejm_31 lig_ejm_47 0.0 0.1 + 3 lig_ejm_31 lig_ejm_48 0.61 0.09 + 4 lig_ejm_31 lig_ejm_50 1.00 0.04 + 5 lig_ejm_42 lig_ejm_43 1.4 0.2 + 6 lig_ejm_46 lig_jmc_23 0.29 0.09 + 7 lig_ejm_46 lig_jmc_27 -0.1 0.1 + 8 lig_ejm_46 lig_jmc_28 Error Error """ # we only want to round/format the floats (not any error strings getting passed through) From b5a79c69e8b48fb9f83c49810e5af90927741f59 Mon Sep 17 00:00:00 2001 From: Alyssa Travitz Date: Fri, 21 Nov 2025 12:16:39 -0800 Subject: [PATCH 11/11] make string check more rigorous --- openfecli/commands/gather.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index ca78c33b5..489367cb0 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -111,15 +111,15 @@ def format_df_with_precision( >>> df ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol) 0 lig_ejm_31 lig_ejm_42 Error Error - 1 lig_ejm_31 lig_ejm_46 -0.89 0.06 - 2 lig_ejm_31 lig_ejm_47 0.0 0.1 - 3 lig_ejm_31 lig_ejm_48 0.61 0.09 - 4 lig_ejm_31 lig_ejm_50 1.00 0.04 - 5 lig_ejm_42 lig_ejm_43 1.4 0.2 - 6 lig_ejm_46 lig_jmc_23 0.29 0.09 - 7 lig_ejm_46 lig_jmc_27 -0.1 0.1 + 1 lig_ejm_31 lig_ejm_46 -0.891077 0.064825 + 2 lig_ejm_31 lig_ejm_47 0.023341 0.145625 + 3 lig_ejm_31 lig_ejm_48 0.614103 0.088704 + 4 lig_ejm_31 lig_ejm_50 0.999904 0.044457 + 5 lig_ejm_42 lig_ejm_43 1.354348 0.156009 + 6 lig_ejm_46 lig_jmc_23 0.294761 0.086632 + 7 lig_ejm_46 lig_jmc_27 -0.101737 0.100997 8 lig_ejm_46 lig_jmc_28 Error Error - >>> df_formatted = format_df_with_precision(df, "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)") + >>> df_out = format_df_with_precision(df, "DDG(i->j) (kcal/mol)", "uncertainty (kcal/mol)") >>> df_formatted ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol) 0 lig_ejm_31 lig_ejm_42 Error Error @@ -133,9 +133,15 @@ def format_df_with_precision( 8 lig_ejm_46 lig_jmc_28 Error Error """ - # we only want to round/format the floats (not any error strings getting passed through) - float_mask = df[est_col_name].apply(lambda x: isinstance(x, float)) - df_floats_formatted = df[float_mask].apply( + + # find all entries in both columns that contain strings: + df_is_string = df[[est_col_name, unc_col_name]].applymap(lambda x: isinstance(x, str)) + + # if either the estimate or uncertainty entries are strings, dont format + no_strings_mask = ~(df_is_string[est_col_name] | df_is_string[unc_col_name]) + + # skip rows that contain striangs and only round and format numerical vals + df_floats_formatted = df[no_strings_mask].apply( lambda row: format_estimate_uncertainty(row[est_col_name], row[unc_col_name], unc_prec), axis=1, result_type="expand", @@ -148,7 +154,7 @@ def format_df_with_precision( if df_floats_formatted.empty: pass else: - df.loc[float_mask, [est_col_name, unc_col_name]] = df_floats_formatted.values + df.loc[no_strings_mask, [est_col_name, unc_col_name]] = df_floats_formatted.values return df