From 9d96b611b677fe2a8de96438f924791ccda7fd0d Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Mon, 18 Aug 2025 11:54:32 +0200
Subject: [PATCH 01/20] update POPC Amber14sb parameters

---
 src/QligFEP/FF/AMBER14sb.lib | 423 +++++++++++++++++++++++------------
 1 file changed, 286 insertions(+), 137 deletions(-)

diff --git a/src/QligFEP/FF/AMBER14sb.lib b/src/QligFEP/FF/AMBER14sb.lib
index 56ce8a2d..06dc6ffb 100644
--- a/src/QligFEP/FF/AMBER14sb.lib
+++ b/src/QligFEP/FF/AMBER14sb.lib
@@ -1,7 +1,6 @@
 *--------------------------------------------------------------------------------
 * Q-library: Amber ff14SB ported from AmberTools16 by M. Purg (October 2016)
-* by D. Araripe: N- & C- terminus amino acids, TPO
-* and by Wessel Porschen, POPC
+* by D. Araripe: N- & C- terminus amino acids, TPO, POPC (Lipid21)
 * v0.2
 *   Minor formatting changes
 * v0.1
@@ -3453,141 +3452,291 @@
         head N
         tail C
 *--------------------------------------------------------------------------------
-{POP} ! POPC values from Cordomí et al., 2012 https://doi.org/10.1021/ct200491c (Berger Lipid Parameters)
-    [info]
-        SYBYLtype  RESIDUE     !SYBYL substructure type
-    [atoms]
-         1 N4 	   LNL	      -0.5000
-         2 P8 	   LP          1.700
-         3 C12	   LC2	       0.400
-         4 O7 	   LOS	      -0.800
-         5 C13	   LH1	       0.300
-         6 O11 	   LOS	      -0.700
-         7 C32	   LC2	       0.50
-         8 O9 	   LOM	      -0.800
-         9 O10 	   LOM	      -0.800
-        10 C5	   LH2	       0.3000
-        11 C1	   LC3	       0.4000
-        12 C2	   LC3	       0.4000
-        13 C3	   LC3	       0.4000
-        14 C6	   LC2	       0.4000
-        15 C15	   LC	       0.7000
-        16 O14	   LOS	      -0.700
-        17 C17	   LP2	       0.0
-        18 O16	   LO	      -0.700
-        19 C18	   LP2         0.0    
-        20 C19	   LP2         0.0		  
-        21 C20	   LP2         0.0
-        22 C21	   LP2         0.0
-        23 C22     LP2         0.0
-        24 C23     LP2         0.0
-        25 C24	   LH1	       0.0
-        26 C34	   LC	       0.800
-        27 O33	   LOS	      -0.70
-        28 C36	   LP2	       0.0
-        29 O35	   LO	      -0.60
-        30 C37     LP2         0.0
-        31 C38     LP2         0.0
-        32 C39	   LP2         0.0
-        33 C40     LP2         0.0
-        34 C41     LP2         0.0
-        35 C42     LP2         0.0
-        36 C43     LP2         0.0
-        37 C25     LH1         0.0
-        38 C26     LP2         0.0
-        39 C27     LP2         0.0
-        40 C28     LP2         0.0
-        41 C29     LP2         0.0
-        42 C30     LP2         0.0
-        43 C31     LP2         0.0
-        44 CA1     LP2         0.0
-        45 CA2	   LP3	       0.0
-        46 C44	   LP2         0.0
-        47 C45	   LP2         0.0
-        48 C46	   LP2         0.0
-        49 C47	   LP2         0.0
-        50 C48	   LP2         0.0
-        51 C49	   LP2         0.0
-        52 C50     LP3         0.0                     
-    
-    [bonds]                !connect i -- j
-       N4   C1
-       N4   C2
-       N4   C3
-       N4   C5
-       C5   C6
-       C6   O7
-       P8   O7
-       P8   O11
-       P8   O9
-       P8   O10
-       C12  O11
-       C12  C13
-       C13  O14
-       C15  O14
-       C15  O16
-       C15  C17
-       C17  C18
-       C18  C19
-       C19  C20
-       C20  C21
-       C21  C22
-       C22  C23
-       C23  C24
-       C24  C25
-       C25 C26
-       C26 C27
-       C27 C28
-       C28 C29
-       C29 C30
-       C30 C31
-       C31 CA1
-       CA1 CA2
-       C13  C32
-       C32  O33
-       C34  O33
-       C34  O35
-       C34  C36
-       C36  C37
-       C37  C38
-       C38  C39
-       C39  C40
-       C40  C41
-       C41  C42
-       C42  C43
-       C43  C44
-       C44 C45
-       C45 C46
-       C46 C47
-       C47 C48
-       C48 C49
-       C49 C50
-
-    [connections]   !tail and head connections
-
-    [impropers]
-        O14  C15    O16   C17
-        O33  C34    O35   C36
-        O14  C13    C12   C32
-    [charge_groups] !charge groups, with switch atom first                  
-        C6  N4  C1  C2  C3  C5 P8  O7  O9  O10  O11
-        O14  C12  C13  C15  O16
-        C17  C18  C19        
-        C20  C21        
-        C22  C23        
-        C24  C25                       
-        C26  C27        
-        C28  C29        
-        C30  C31        
-        CA1  CA2                             
-        O33  C32  C34  O35
-        C36  C37  C38        
-        C39  C40        
-        C41  C42        
-        C43  C44
-        C45  C46               
-        C47  C48	
-        C49  C50
+{POPC}
+    [atoms]
+        ! Phosphocholine head group (PC residue atoms)
+        1  C11    cC             0.910403
+        2  O12    oC            -0.671566
+        3  O11    oS            -0.575489
+        4  C1     cA             0.196939
+        5  HR     hE             0.070469
+        6  HS     hE             0.070469
+        7  C2     cA             0.281242
+        8  HX     hE             0.063253
+        9  C3     cA             0.017459
+        10 HA     hE             0.095591
+        11 HB     hE             0.095591
+        12 O31    oT            -0.509480
+        13 P31    pA             1.339721
+        14 O32    oT            -0.508176
+        15 C31    cA             0.166801
+        16 H1A    hE             0.078534
+        17 H1B    hE             0.078534
+        18 C32    cA            -0.170824
+        19 H2A    hX             0.136415
+        20 H2B    hX             0.136415
+        21 N31    nA             0.245262
+        22 C33    cA            -0.338973
+        23 H3A    hX             0.172230
+        24 H3B    hX             0.172230
+        25 H3C    hX             0.172230
+        26 C34    cA            -0.338973
+        27 H4A    hX             0.172230
+        28 H4B    hX             0.172230
+        29 H4C    hX             0.172230
+        30 C35    cA            -0.338973
+        31 H5A    hX             0.172230
+        32 H5B    hX             0.172230
+        33 H5C    hX             0.172230
+        34 O33    oP            -0.875812
+        35 O34    oP            -0.875812
+        36 O21    oS            -0.552948
+        37 C21    cC             0.897613
+        38 O22    oC            -0.673755
+        ! Palmitic acid chain (PA residue atoms) - sn-1 position
+        39 C12    cD            -0.149259    ! Connection point to PC.C11 via O11
+        40 H2R    hL             0.047345
+        41 H2S    hL             0.047345
+        42 C13    cD            -0.004882
+        43 H3R    hL             0.019007
+        44 H3S    hL             0.019007
+        45 C14    cD            -0.030099
+        46 H4R    hL             0.020877
+        47 H4S    hL             0.020877
+        48 C15    cD            -0.029387
+        49 H5R    hL             0.016763
+        50 H5S    hL             0.016763
+        51 C16    cD            -0.024427
+        52 H6R    hL             0.013334
+        53 H6S    hL             0.013334
+        54 C17    cD            -0.019630
+        55 H7R    hL             0.011041
+        56 H7S    hL             0.011041
+        57 C18    cD            -0.015793
+        58 H8R    hL             0.009067
+        59 H8S    hL             0.009067
+        60 C19    cD            -0.030472
+        61 H9R    hL             0.013897
+        62 H9S    hL             0.013897
+        63 C110   cD            -0.028831
+        64 H10R   hL             0.014691
+        65 H10S   hL             0.014691
+        66 C111   cD            -0.025206
+        67 H11R   hL             0.014334
+        68 H11S   hL             0.014334
+        69 C112   cD            -0.027633
+        70 H12R   hL             0.011368
+        71 H12S   hL             0.011368
+        72 C113   cD            -0.033096
+        73 H13R   hL             0.014621
+        74 H13S   hL             0.014621
+        75 C114   cD            -0.020086
+        76 H14R   hL             0.015929
+        77 H14S   hL             0.015929
+        78 C115   cD             0.013975
+        79 H15R   hL             0.009292
+        80 H15S   hL             0.009292
+        81 C116   cD            -0.125447
+        82 H16R   hL             0.029047
+        83 H16S   hL             0.029047
+        84 H16T   hL             0.029047
+        ! Oleic acid chain (OL residue atoms) - sn-2 position  
+        85 C22    cD            -0.143418    ! Connection point to PC.C21 via O21 (renamed from C12)
+        86 H22R   hL             0.042861    ! Renamed from H2R
+        87 H22S   hL             0.042861    ! Renamed from H2S
+        88 C23    cD             0.001619    ! Renamed from C13
+        89 H23R   hL             0.019529    ! Renamed from H3R
+        90 H23S   hL             0.019529    ! Renamed from H3S
+        91 C24    cD            -0.022960    ! Renamed from C14
+        92 H24R   hL             0.018134    ! Renamed from H4R
+        93 H24S   hL             0.018134    ! Renamed from H4S
+        94 C25    cD            -0.016692    ! Renamed from C15
+        95 H25R   hL             0.008690    ! Renamed from H5R
+        96 H25S   hL             0.008690    ! Renamed from H5S
+        97 C26    cD            -0.023789    ! Renamed from C16
+        98 H26R   hL             0.009266    ! Renamed from H6R
+        99 H26S   hL             0.009266    ! Renamed from H6S
+        100 C27   cD            -0.017495    ! Renamed from C17
+        101 H27R  hL             0.019598    ! Renamed from H7R
+        102 H27S  hL             0.019598    ! Renamed from H7S
+        103 C28   cD             0.031775    ! Renamed from C18
+        104 H28R  hL             0.032685    ! Renamed from H8R
+        105 H28S  hL             0.032685    ! Renamed from H8S
+        106 C29   cB            -0.244228    ! Renamed from C19
+        107 H29R  hB             0.131512    ! Renamed from H9R
+        108 C210  cB            -0.239284    ! Renamed from C110
+        109 H20R  hB             0.126878    ! Renamed from H10R
+        110 C211  cD             0.029059    ! Renamed from C111
+        111 H21R  hL             0.033531    ! Renamed from H11R
+        112 H21S  hL             0.033531    ! Renamed from H11S
+        113 C212  cD            -0.026353    ! Renamed from C112
+        114 H22A  hL             0.019717    ! Renamed from H12R
+        115 H22B  hL             0.019717    ! Renamed from H12S
+        116 C213  cD            -0.018257    ! Renamed from C113
+        117 H23A  hL             0.013800    ! Renamed from H13R
+        118 H23B  hL             0.013800    ! Renamed from H13S
+        119 C214  cD            -0.024849    ! Renamed from C114
+        120 H24A  hL             0.008572    ! Renamed from H14R
+        121 H24B  hL             0.008572    ! Renamed from H14S
+        122 C215  cD            -0.023247    ! Renamed from C115
+        123 H25A  hL             0.010403    ! Renamed from H15R
+        124 H25B  hL             0.010403    ! Renamed from H15S
+        125 C216  cD            -0.012488    ! Renamed from C116
+        126 H26A  hL             0.013659    ! Renamed from H16R
+        127 H26B  hL             0.013659    ! Renamed from H16S
+        128 C217  cD             0.008684    ! Renamed from C117
+        129 H27A  hL             0.010026    ! Renamed from H17R
+        130 H27B  hL             0.010026    ! Renamed from H17S
+        131 C218  cD            -0.117290    ! Renamed from C118
+        132 H28A  hL             0.026627    ! Renamed from H18R
+        133 H28B  hL             0.026627    ! Renamed from H18S
+        134 H28C  hL             0.026627    ! Renamed from H18T
+    [bonds]
+        ! PC head group bonds
+        C11   O12
+        C11   O11
+        O11   C1
+        C1    HR
+        C1    HS
+        C1    C2
+        C2    HX
+        C2    C3
+        C2    O21
+        C3    HA
+        C3    HB
+        C3    O31
+        O31   P31
+        P31   O32
+        P31   O33
+        P31   O34
+        O32   C31
+        C31   H1A
+        C31   H1B
+        C31   C32
+        C32   H2A
+        C32   H2B
+        C32   N31
+        N31   C33
+        N31   C34
+        N31   C35
+        C33   H3A
+        C33   H3B
+        C33   H3C
+        C34   H4A
+        C34   H4B
+        C34   H4C
+        C35   H5A
+        C35   H5B
+        C35   H5C
+        O21   C21
+        C21   O22
+        ! Connection: PC.C11 connects to PA chain
+        C11   C12
+        ! PA chain bonds (sn-1)
+        C12   H2R
+        C12   H2S
+        C12   C13
+        C13   H3R
+        C13   H3S
+        C13   C14
+        C14   H4R
+        C14   H4S
+        C14   C15
+        C15   H5R
+        C15   H5S
+        C15   C16
+        C16   H6R
+        C16   H6S
+        C16   C17
+        C17   H7R
+        C17   H7S
+        C17   C18
+        C18   H8R
+        C18   H8S
+        C18   C19
+        C19   H9R
+        C19   H9S
+        C19   C110
+        C110  H10R
+        C110  H10S
+        C110  C111
+        C111  H11R
+        C111  H11S
+        C111  C112
+        C112  H12R
+        C112  H12S
+        C112  C113
+        C113  H13R
+        C113  H13S
+        C113  C114
+        C114  H14R
+        C114  H14S
+        C114  C115
+        C115  H15R
+        C115  H15S
+        C115  C116
+        C116  H16R
+        C116  H16S
+        C116  H16T
+        ! Connection: PC.C21 connects to OL chain
+        C21   C22
+        ! OL chain bonds (sn-2)
+        C22   H22R
+        C22   H22S
+        C22   C23
+        C23   H23R
+        C23   H23S
+        C23   C24
+        C24   H24R
+        C24   H24S
+        C24   C25
+        C25   H25R
+        C25   H25S
+        C25   C26
+        C26   H26R
+        C26   H26S
+        C26   C27
+        C27   H27R
+        C27   H27S
+        C27   C28
+        C28   H28R
+        C28   H28S
+        C28   C29
+        C29   H29R
+        C29   C210
+        C210  H20R
+        C210  C211
+        C211  H21R
+        C211  H21S
+        C211  C212
+        C212  H22A
+        C212  H22B
+        C212  C213
+        C213  H23A
+        C213  H23B
+        C213  C214
+        C214  H24A
+        C214  H24B
+        C214  C215
+        C215  H25A
+        C215  H25B
+        C215  C216
+        C216  H26A
+        C216  H26B
+        C216  C217
+        C217  H27A
+        C217  H27B
+        C217  C218
+        C218  H28A
+        C218  H28B
+        C218  H28C
+    [impropers]
+        ! Double bond impropers for oleic acid
+        C211  C210  C29   H20R
+        C210  C29   C28   H29R
+    [connections]
+        head
+        tail
 *--------------------------------------------------------------------------------
 {HOH} !TIP3P water
     [info]

From ece21fc47a4924a6b237043faf634078705da134 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Tue, 19 Aug 2025 17:17:41 +0200
Subject: [PATCH 02/20] change create_ddG_plot stylling

---
 src/QligFEP/analyze_FEP.py | 84 ++++++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 18 deletions(-)

diff --git a/src/QligFEP/analyze_FEP.py b/src/QligFEP/analyze_FEP.py
index 1bd656a5..0a803489 100644
--- a/src/QligFEP/analyze_FEP.py
+++ b/src/QligFEP/analyze_FEP.py
@@ -7,6 +7,8 @@
 from pathlib import Path
 from typing import Optional
 
+import matplotlib.cm as cm
+import matplotlib.colors as mcolors
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -510,10 +512,12 @@ def create_ddG_plot(
         output_path: str | None = None,
         target_name: str | None = None,
         savefig: bool = False,
+        font: str | None = None,
     ):
         """Creates the ddG plot for the FEP that has already been analyzed. The plot will
         show the experimental (X axis) vs mean predicted values (Y axis), with error bars
-        representing the standard error of the mean (SEM).
+        representing the standard error of the mean (SEM). Points are colored based on
+        their deviation from experimental values (blue = 0 deviation, red = 3+ kcal/mol).
 
         Args:
             reuslts_df: pd.DataFrame with the results from the FEP, output from `prepare_df`.
@@ -538,6 +542,13 @@ def create_ddG_plot(
             sem_values = sem_values[mask]
             exp_values = exp_values[mask]
 
+        # Calculate absolute deviations for coloring
+        deviations = np.abs(avg_values - exp_values)
+
+        # Create colormap from blue to red, with max at 3 kcal/mol
+        norm = mcolors.Normalize(vmin=0, vmax=4)
+        cmap = cm.RdBu_r
+
         ## CALCULATE STATISTICS
         def result_to_latex(res, latexify_each=False):  # TODO: move this out of this method?
             """Round cinnabar's output to one decimal case and return a LaTeX string."""
@@ -566,19 +577,39 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
             min_val = min(all_values) - margin
             max_val = max(all_values) + margin
 
-        fig, ax = plt.subplots()
+        fig, ax = plt.subplots(figsize=(7, 4.5))
 
-        plt.errorbar(
+        # Plot colored points with error bars
+        scatter = plt.errorbar(
             exp_values,
             avg_values,
-            yerr=sem_values,
             fmt="o",
-            color="black",
-            ecolor="black",
-            elinewidth=2.0,
-            capsize=0,
+            yerr=sem_values,
+            ecolor="gray",
+            elinewidth=1.5,
+            capsize=2,
             zorder=4,
+            linestyle="None",
+            markersize=8,
         )
+
+        # Remove the default markers and add colored ones
+        scatter[0].remove()
+
+        # Add the colored scatter points
+        scatter_points = plt.scatter(
+            exp_values,
+            avg_values,
+            c=deviations,
+            cmap=cmap,
+            norm=norm,
+            s=45,
+            zorder=5,
+            edgecolors="black",
+            linewidths=0.5,
+            alpha=0.8,
+        )
+
         plt.plot([min_val, max_val], [min_val, max_val], "k-", linewidth=1.5, zorder=3)  # Black identity line
 
         # Highlight predictions within 1 and 2 kcal/mol of the experimental affinity
@@ -587,7 +618,7 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
             [min_val - 1, max_val - 1],
             [min_val + 1, max_val + 1],
             color="darkgray",
-            alpha=0.5,
+            alpha=0.3,
             zorder=2,
         )
         ax.fill_between(
@@ -595,17 +626,27 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
             [min_val - 2, max_val - 2],
             [min_val + 2, max_val + 2],
             color="lightgray",
-            alpha=0.5,
+            alpha=0.3,
             zorder=1,
         )
 
+        # Add colorbar
+        cbar = plt.colorbar(scatter_points, ax=ax, shrink=0.40, aspect=10, anchor=(0.0, 0.85), pad=0.05)
+
+        cbar.set_label("|Deviation| (kcal/mol)", rotation=270, labelpad=20)
+        cbar.ax.tick_params(labelsize=10)
+        cbar.set_ticks([0, 1, 2, 3, 4])
+        cbar.set_ticklabels(["0", "1", "2", "3", "≥4"])
+        cbar.ax.tick_params(labelsize=10)
+
         # set labels, make it square and add legend
         plt.title(
-            f"{(target_name + ' - ' if target_name is not None else '')}"
-            rf"$\Delta\Delta Gbar$ plot ($N={len(exp_values)}$)"
+            f"{(target_name + ' ' if target_name is not None else '')}"
+            r"$\Delta\Delta \text{G}_{\text{BAR}}$ ($\mathrm{N}="
+            f"{len(exp_values)}$)"
         )
-        plt.xlabel("$\Delta\Delta G_{exp} [kcal/mol]$")  # noqa: W605
-        plt.ylabel("$\Delta\Delta G_{pred} [kcal/mol]$")  # noqa: W605
+        plt.xlabel("$\Delta\Delta G_{exp} (kcal/mol)$")  # noqa: W605
+        plt.ylabel("$\Delta\Delta G_{pred} (kcal/mol)$")  # noqa: W605
         plt.xlim(min_val, max_val)
         plt.ylim(min_val, max_val)
         ax.set_aspect("equal", adjustable="box")
@@ -613,7 +654,7 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
         # add statistics to the plot
         unit = r"\frac{kcal}{mol}"
         text_body = (
-            f"$\\tau = {stats_dict['KTAU']}$ (Kendall's $\\tau$)",
+            f"$\\tau = {stats_dict['KTAU']}$",
             f"RMSE = ${stats_dict['RMSE']}  {unit}$",
             f"MUE = ${stats_dict['MUE']}  {unit}$",
         )
@@ -633,12 +674,13 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
                 verticalalignment="bottom",
                 horizontalalignment="left",
                 transform=ax.transAxes,
+                fontproperties=font,
             )
 
         legend_elements = [
             Line2D([0], [0], color="k", linestyle="-", label="Identity line"),
-            Patch(facecolor="darkgray", alpha=0.5, label="Within 1 kcal/mol"),
-            Patch(facecolor="lightgray", alpha=0.5, label="Within 2 kcal/mol"),
+            Patch(facecolor="darkgray", alpha=0.3, label="Within 1 kcal/mol"),
+            Patch(facecolor="lightgray", alpha=0.3, label="Within 2 kcal/mol"),
         ]
 
         ax.legend(
@@ -648,6 +690,12 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
             borderaxespad=0,
             frameon=False,
         )
+        # Remove top and right spines using matplotlib
+        ax.spines["top"].set_visible(False)
+        ax.spines["right"].set_visible(False)
+        ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.5)
+        ax.set_axisbelow(True)
+
         if savefig:
             if output_path is None:
                 output_path = Path().cwd()
@@ -660,7 +708,7 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
             if output_path.isdir():
                 output_path = output_path / f"{target_name}_ddG_plot.png"
                 logger.info(f"Using default name to save the plot at {output_path}")
-            elif output_path.exits():
+            elif output_path.exists():  # Fixed typo: exits() -> exists()
                 logger.warning(f"File {output_path} already exists. Overwriting...")
             fig.savefig(output_path, dpi=300, bbox_inches="tight")
         return fig, ax

From 4d10c8d90b5cbbdfcfcc40ab7107a7817522378f Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Fri, 22 Aug 2025 14:58:07 +0200
Subject: [PATCH 03/20] update cog to use the correct logger and setup_logger

---
 src/QligFEP/CLI/cog_cli.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/QligFEP/CLI/cog_cli.py b/src/QligFEP/CLI/cog_cli.py
index 8b2e7520..b8071357 100644
--- a/src/QligFEP/CLI/cog_cli.py
+++ b/src/QligFEP/CLI/cog_cli.py
@@ -4,8 +4,7 @@
 import re
 from pathlib import Path
 
-from loguru import logger
-
+from ..logger import logger, setup_logger
 
 class MolecularCOG:
     """Class implemented to calculate the center of geometry of a molecule. It supports
@@ -107,10 +106,20 @@ def parse_arguments():
         help="For PDB files, specify the atom types to include (e.g., 'ATOM,HETATM').",
         type=str,
     )
+    parser.add_argument(
+        "-log",
+        "--log-level",
+        dest="log",
+        required=False,
+        default="info",
+        help="Set the log level for the logger. Defaults to `info`.",
+        choices=["trace", "debug", "info", "warning", "error", "critical"],
+    )
     return parser.parse_args()
 
 
 def main(args: argparse.Namespace) -> None:
+    setup_logger(args.log)
     cog = MolecularCOG(args.input)
     center = cog(args.include)
     logger.info(f"Center of geometry: {center}")

From 0f539d74812dc72c3e25f9e4c45276689913f9f2 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Fri, 22 Aug 2025 14:58:23 +0200
Subject: [PATCH 04/20] change potential pandas warning when writing dataframe

---
 src/QligFEP/pdb_utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/QligFEP/pdb_utils.py b/src/QligFEP/pdb_utils.py
index e96b9d12..23b1e9fc 100644
--- a/src/QligFEP/pdb_utils.py
+++ b/src/QligFEP/pdb_utils.py
@@ -420,13 +420,14 @@ def write_dataframe_to_pdb(df, output_file, header: Optional[str] = None):
         output_file: name of the output file (include .pdb extension).
         header: if desired, a header to be added to the PDB file. Defaults to None.
     """
+    df = df.copy()
     with open(output_file, "w") as file:
         if header is not None:
             file.write(f"{header}\n")
-        if df.temp_factor.dtype == "float64":
-            df.loc[:, "temp_factor"] = df.temp_factor.apply(lambda x: f"{x:.2f}")
-        if df.occupancy.dtype == "float64":
-            df.loc[:, "occupancy"] = df.occupancy.apply(lambda x: f"{x:.2f}")
+        if df["temp_factor"].dtype == "float64":
+            df["temp_factor"] = df["temp_factor"].apply(lambda x: f"{x:.2f}")
+        if df["occupancy"].dtype == "float64":
+            df["occupancy"] = df["occupancy"].apply(lambda x: f"{x:.2f}")
         for _, row in df.iterrows():
             pdb_line = (
                 f"{row['record_type']:<6}{row['atom_serial_number']:>5} "

From fd0d03b6330e77be09189aaf64fa248f35767c96 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Fri, 22 Aug 2025 16:26:12 +0200
Subject: [PATCH 05/20] update qprep_cli to neutralize out-of-sphere amino
 acids

---
 src/QligFEP/CLI/qprep_cli.py | 317 +++++++++++++++++++++++++++++++++++
 1 file changed, 317 insertions(+)

diff --git a/src/QligFEP/CLI/qprep_cli.py b/src/QligFEP/CLI/qprep_cli.py
index 021750d3..8e958ab0 100644
--- a/src/QligFEP/CLI/qprep_cli.py
+++ b/src/QligFEP/CLI/qprep_cli.py
@@ -46,6 +46,258 @@ class QprepAtomLibMissingError(Exception):
     pass
 
 
+class ProteinNeutralizer:
+    """Protein neutralizer for out-of-sphere charged residues in qprep workflow"""
+
+    def __init__(self, center_coords, radius=25.0, boundary_offset=3.0):
+        self.center = np.array(center_coords)
+        self.radius = float(radius)
+        self.boundary_offset = boundary_offset
+        self.rest_bound = self.radius - self.boundary_offset  # Neutralize residues OUTSIDE this boundary
+
+        # Define charged residues and their neutral forms + key atoms for distance calculation
+        # Format: 'charged': ['neutral', 'key_atom', charge]
+        self.charged_residues = {
+            "GLU": ["GLH", "CD", -1],  # Glutamic acid -> neutral glutamic acid
+            "ASP": ["ASH", "CG", -1],  # Aspartic acid -> neutral aspartic acid
+            "ARG": ["ARN", "CZ", +1],  # Arginine -> neutral arginine
+            "LYS": ["LYN", "NZ", +1],  # Lysine -> neutral lysine
+            "HIP": ["HID", "CG", +1],  # Histidine (protonated) -> histidine (delta protonated)
+        }
+
+        # Statistics tracking
+        self.stats = {
+            "total_charged_residues": 0,
+            "residues_outside_boundary": 0,
+            "residues_neutralized": 0,
+            "salt_bridges_neutralized": 0,
+            "original_total_charge": 0,
+            "final_total_charge": 0,
+            "modifications": {},
+            "remaining_outside_charged": [],
+        }
+
+    def neutralize_outside_residues(self, pdb_file, salt_bridge_cutoff=4.0):
+        """Find and neutralize charged residues outside the sphere boundary"""
+        logger.info(f"Neutralizing charged residues outside {self.rest_bound:.1f}Å boundary")
+
+        df = read_pdb_to_dataframe(pdb_file)
+        charged_residues_info = self._find_charged_residues(df)
+
+        if not charged_residues_info:
+            logger.info("No charged residues found in the PDB file")
+            return df, self.stats
+
+        self.stats["total_charged_residues"] = len(charged_residues_info)
+        logger.info(f"Found {len(charged_residues_info)} charged residues")
+
+        # Classify residues by distance to center
+        outside_residues, inside_residues = self._classify_residues_by_distance(charged_residues_info)
+        self.stats["residues_outside_boundary"] = len(outside_residues)
+
+        # Track original charge
+        self.stats["original_total_charge"] = sum(res["charge"] for res in charged_residues_info)
+
+        # Find salt bridges between outside and inside residues
+        salt_bridge_pairs = self._find_salt_bridges(outside_residues, inside_residues, salt_bridge_cutoff)
+        self.stats["salt_bridges_neutralized"] = len(salt_bridge_pairs)
+
+        # Neutralize residues outside boundary and their salt bridge partners
+        residues_to_modify = outside_residues + salt_bridge_pairs
+        self.stats["residues_neutralized"] = len(residues_to_modify)
+
+        # Modify the dataframe
+        modified_df = self._modify_residues(df, residues_to_modify)
+
+        # Track final charge and remaining outside charged residues
+        self.stats["final_total_charge"] = sum(
+            res["charge"] for res in inside_residues if res not in salt_bridge_pairs
+        )
+
+        # Check for remaining charged residues outside boundary
+        self._check_remaining_outside_charged(inside_residues, salt_bridge_pairs)
+
+        # Log statistics
+        self._log_neutralization_stats()
+
+        return modified_df, self.stats
+
+    def _find_charged_residues(self, df):
+        """Find all charged residues in the PDB"""
+        charged_residues_info = []
+
+        for res_name in self.charged_residues.keys():
+            residues = df[df["residue_name"] == res_name]
+
+            if len(residues) == 0:
+                continue
+
+            for (chain, res_num), group in residues.groupby(["chain_id", "residue_seq_number"]):
+                key_atom = self.charged_residues[res_name][1]
+                charge = self.charged_residues[res_name][2]
+                key_atom_row = group[group["atom_name"] == key_atom]  # atom for distance calculation
+
+                if len(key_atom_row) == 0:
+                    logger.warning(f"Key atom {key_atom} not found in {res_name} {chain}:{res_num}")
+                    continue
+
+                key_atom_coords = key_atom_row[["x", "y", "z"]].values[0]
+                distance = np.linalg.norm(key_atom_coords - self.center)
+
+                residues_info = {
+                    "residue_name": res_name,
+                    "chain_id": chain,
+                    "residue_seq_number": res_num,
+                    "key_atom": key_atom,
+                    "key_atom_coords": key_atom_coords,
+                    "distance": distance,
+                    "charge": charge,
+                    "neutral_form": self.charged_residues[res_name][0],
+                }
+                charged_residues_info.append(residues_info)
+
+        return charged_residues_info
+
+    def _classify_residues_by_distance(self, charged_residues_info):
+        """Classify residues as inside or outside the boundary"""
+        outside_residues = []
+        inside_residues = []
+
+        for res_info in charged_residues_info:
+            if res_info["distance"] > self.rest_bound:
+                outside_residues.append(res_info)
+                logger.debug(
+                    f"Residue {res_info['chain_id']}:{res_info['residue_seq_number']} ({res_info['residue_name']}) "
+                    f"outside boundary: {res_info['distance']:.2f}Å > {self.rest_bound:.1f}Å"
+                )
+            else:
+                inside_residues.append(res_info)
+                logger.debug(
+                    f"Residue {res_info['chain_id']}:{res_info['residue_seq_number']} ({res_info['residue_name']}) "
+                    f"inside boundary: {res_info['distance']:.2f}Å <= {self.rest_bound:.1f}Å"
+                )
+
+        return outside_residues, inside_residues
+
+    def _find_salt_bridges(self, outside_residues, inside_residues, cutoff):
+        """Find salt bridges between outside and inside residues"""
+        salt_bridge_partners = []
+
+        for outside_res in outside_residues:
+            for inside_res in inside_residues:
+                # Only consider oppositely charged residues
+                if outside_res["charge"] * inside_res["charge"] >= 0:
+                    continue
+
+                distance = np.linalg.norm(outside_res["key_atom_coords"] - inside_res["key_atom_coords"])
+
+                if distance <= cutoff:
+                    salt_bridge_partners.append(inside_res)
+                    logger.info(
+                        f"Salt bridge detected: {outside_res['chain_id']}:{outside_res['residue_seq_number']} "
+                        f"({outside_res['residue_name']}) <-> {inside_res['chain_id']}:{inside_res['residue_seq_number']} "
+                        f"({inside_res['residue_name']}) at {distance:.2f}Å - neutralizing both"
+                    )
+
+        return salt_bridge_partners
+
+    def _modify_residues(self, df, residues_to_modify):
+        """Modify residues to their neutral forms and remove appropriate atoms"""
+        modified_df = df.copy()
+
+        for res_info in residues_to_modify:
+            chain = res_info["chain_id"]
+            res_num = res_info["residue_seq_number"]
+            old_name = res_info["residue_name"]
+            new_name = res_info["neutral_form"]
+            distance = res_info["distance"]
+
+            # Change residue nameand remove atoms based on residue type
+            mask = (modified_df["chain_id"] == chain) & (modified_df["residue_seq_number"] == res_num)
+            modified_df.loc[mask, "residue_name"] = new_name
+            atoms_to_remove = self._get_atoms_to_remove(old_name, new_name)
+
+            for atom_name in atoms_to_remove:
+                atom_mask = mask & (modified_df["atom_name"] == atom_name)
+                indices_to_remove = modified_df[atom_mask].index
+                modified_df = modified_df.drop(indices_to_remove)
+
+            mod_key = f"{chain}:{res_num}"
+            self.stats["modifications"][mod_key] = {
+                "original": old_name,
+                "modified": new_name,
+                "distance": distance,
+                "atoms_removed": atoms_to_remove,
+            }
+
+            logger.info(
+                f"Neutralized {old_name} -> {new_name} at {chain}:{res_num} (distance: {distance:.2f}Å)"
+            )
+            if atoms_to_remove:
+                logger.debug(f"Removed atoms: {', '.join(atoms_to_remove)}")
+
+        return modified_df
+
+    def _get_atoms_to_remove(self, old_name, new_name):
+        """Get atoms to remove when converting charged to neutral form"""
+        atoms_to_remove = []
+
+        if old_name == "ASP" and new_name == "ASH":
+            # ASP -> ASH: No atoms removed, HD2 is added (handled by forcefield)
+            pass
+        elif old_name == "GLU" and new_name == "GLH":
+            # GLU -> GLH: No atoms removed, HE2 is added (handled by forcefield)
+            pass
+        elif old_name == "LYS" and new_name == "LYN":
+            # LYS -> LYN: Remove HZ1 hydrogen from NZ
+            atoms_to_remove = ["HZ1"]
+        elif old_name == "ARG" and new_name == "ARN":
+            # ARG -> ARN: Remove HH22 hydrogen from NH2
+            atoms_to_remove = ["HH22"]
+        elif old_name == "HIP" and new_name == "HID":
+            # HIP -> HID: Remove HE2 hydrogen from NE2
+            atoms_to_remove = ["HE2"]
+
+        return atoms_to_remove
+
+    def _check_remaining_outside_charged(self, inside_residues, salt_bridge_partners):
+        """Check for remaining charged residues outside the boundary"""
+        for res in inside_residues:
+            if res not in salt_bridge_partners and res["distance"] > self.rest_bound:
+                self.stats["remaining_outside_charged"].append(res)
+
+    def _log_neutralization_stats(self):
+        """Log neutralization statistics"""
+        stats = self.stats
+
+        logger.info(
+            f"Neutralization statistics:\n"
+            f"  Total charged residues found: {stats['total_charged_residues']}"
+            f"  Residues outside boundary ({self.rest_bound:.1f}Å): {stats['residues_outside_boundary']}"
+            f"  Salt bridge pairs neutralized: {stats['salt_bridges_neutralized']}"
+            f"  Total residues neutralized: {stats['residues_neutralized']}"
+            f"  Original total charge: {stats['original_total_charge']:+d}"
+            f"  Final total charge: {stats['final_total_charge']:+d}"
+        )
+
+        if stats["remaining_outside_charged"]:
+            for res in stats["remaining_outside_charged"]:
+                logger.warning(
+                    f"Charged residue {res['chain_id']}:{res['residue_seq_number']} ({res['residue_name']}) "
+                    f"remains outside boundary at {res['distance']:.2f}Å"
+                )
+
+        if stats["modifications"]:
+            logger.debug("Detailed modifications:")
+            for res_id, mod_info in stats["modifications"].items():
+                logger.debug(
+                    f"  {res_id}: {mod_info['original']} -> {mod_info['modified']} "
+                    f"(distance: {mod_info['distance']:.2f}Å)"
+                )
+                if mod_info["atoms_removed"]:
+                    logger.debug(f"    Atoms removed: {', '.join(mod_info['atoms_removed'])}")
+
+
 def qprep_error_check(qprep_out_path: Path, ff_name) -> None:
     """Check for errors in the qprep.out file and raise an exception if any are found.
 
@@ -187,6 +439,39 @@ def parse_arguments() -> argparse.Namespace:
             "pdb files containing the cofactors to be added."
         ),
     )
+    parser.add_argument(
+        "-skip-n",
+        "--skip-neutralization",
+        dest="skip_neutralization",
+        action="store_true",
+        help=(
+            "Skip neutralizing charged residues outside the spherical boundary. Highly advised against."
+            "This neutralization is crucial for maintaining the stability of the system and prevents "
+            "electrostatic interference from distant charges. Defaults to False."
+        ),
+    )
+    parser.add_argument(
+        "-nbo",
+        "--neutralize_boundary_offset",
+        dest="neutralize_boundary_offset",
+        type=float,
+        default=3.0,
+        help=(
+            "Distance offset from sphere radius to define neutralization boundary. "
+            "Residues outside (radius - offset) will be neutralized. Defaults to 3.0Å."
+        ),
+    )
+    parser.add_argument(
+        "-sbc",
+        "--salt_bridge_cutoff",
+        dest="salt_bridge_cutoff",
+        type=float,
+        default=4.0,
+        help=(
+            "Distance cutoff for detecting salt bridges between inside and outside "
+            "residues. Salt bridge partners will also be neutralized. Defaults to 4.0Å."
+        ),
+    )
     return parser.parse_args()
 
 
@@ -236,6 +521,23 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
             protein_df.query("residue_name != 'HOH'"), Path(pdb_file).with_stem(f"{fname}_noHOH")
         )
 
+    neutralization_stats = None
+    if not args.skip_neutralization:
+        logger.info("Neutralizing charged residues outside spherical boundary")
+        center_coords = [float(coord) for coord in args.cog]
+        neutralizer = ProteinNeutralizer(center_coords, args.sphereradius, args.neutralize_boundary_offset)
+        # Neutralize the protein and get statistics
+        neutralized_df, neutralization_stats = neutralizer.neutralize_outside_residues(
+            pdb_file, args.salt_bridge_cutoff
+        )
+        # Write the neutralized protein file
+        neutralized_pdb_path = pdb_path.with_name(f"{pdb_path.stem}_neutralized.pdb")
+        write_dataframe_to_pdb(neutralized_df, neutralized_pdb_path)
+        # Update the pdb_file path to use the neutralized version
+        pdb_file = str(neutralized_pdb_path)
+        pdb_path = neutralized_pdb_path
+        logger.info(f"Neutralized protein saved as: {neutralized_pdb_path}")
+
     if args is not None:
         param_dict = {
             "pdb_file_path": pdb_file,
@@ -260,6 +562,21 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
     qprep_error_check(qprep_out_path, args.FF)
     logger.info("qprep run finished. Check the output `qprep.out` for more information.")
 
+    # Log neutralization summary if performed
+    if neutralization_stats and not args.skip_neutralization:
+        logger.info(
+            "NEUTRALIZATION SUMMARY"
+            f"Total charged residues processed: {neutralization_stats['total_charged_residues']}"
+            f"Residues outside boundary: {neutralization_stats['residues_outside_boundary']}"
+            f"Salt bridge pairs neutralized: {neutralization_stats['salt_bridges_neutralized']}"
+            f"Total residues neutralized: {neutralization_stats['residues_neutralized']}"
+            f"Charge change: {neutralization_stats['original_total_charge']:+d} -> {neutralization_stats['final_total_charge']:+d}"
+        )
+        if neutralization_stats["remaining_outside_charged"]:
+            logger.warning(
+                f"Warning: {len(neutralization_stats['remaining_outside_charged'])} charged residues remain outside boundary"
+            )
+
     waterfile = Path(cwd / "water.pdb")
     # Write water file and deal with possible errors
     if not Path("complexnotexcluded.pdb").exists():

From 4f3df65d2dfe1ebba4a14481cd3f724369638760 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Fri, 22 Aug 2025 17:21:21 +0200
Subject: [PATCH 06/20] improve logging, reduce verbosity

---
 src/QligFEP/CLI/qprep_cli.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/QligFEP/CLI/qprep_cli.py b/src/QligFEP/CLI/qprep_cli.py
index 8e958ab0..48044a86 100644
--- a/src/QligFEP/CLI/qprep_cli.py
+++ b/src/QligFEP/CLI/qprep_cli.py
@@ -230,7 +230,7 @@ def _modify_residues(self, df, residues_to_modify):
                 "atoms_removed": atoms_to_remove,
             }
 
-            logger.info(
+            logger.debug(
                 f"Neutralized {old_name} -> {new_name} at {chain}:{res_num} (distance: {distance:.2f}Å)"
             )
             if atoms_to_remove:
@@ -565,12 +565,12 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
     # Log neutralization summary if performed
     if neutralization_stats and not args.skip_neutralization:
         logger.info(
-            "NEUTRALIZATION SUMMARY"
-            f"Total charged residues processed: {neutralization_stats['total_charged_residues']}"
-            f"Residues outside boundary: {neutralization_stats['residues_outside_boundary']}"
-            f"Salt bridge pairs neutralized: {neutralization_stats['salt_bridges_neutralized']}"
-            f"Total residues neutralized: {neutralization_stats['residues_neutralized']}"
-            f"Charge change: {neutralization_stats['original_total_charge']:+d} -> {neutralization_stats['final_total_charge']:+d}"
+            "NEUTRALIZATION SUMMARY\n"
+            f"Total charged residues processed: {neutralization_stats['total_charged_residues']}\n"
+            f"Residues outside boundary: {neutralization_stats['residues_outside_boundary']}\n"
+            f"Salt bridge pairs neutralized: {neutralization_stats['salt_bridges_neutralized']}\n"
+            f"Total residues neutralized: {neutralization_stats['residues_neutralized']}\n"
+            f"Charge change: {neutralization_stats['original_total_charge']:+d} -> {neutralization_stats['final_total_charge']:+d}\n"
         )
         if neutralization_stats["remaining_outside_charged"]:
             logger.warning(

From 45a16122c80814edf417185e5c856cb0c4f7c5fd Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Sat, 23 Aug 2025 12:04:46 +0200
Subject: [PATCH 07/20] fix but where ASP was always converted to ASH

---
 src/QligFEP/CLI/pdb_to_amber.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/QligFEP/CLI/pdb_to_amber.py b/src/QligFEP/CLI/pdb_to_amber.py
index fc9c0d50..1b3f6f80 100644
--- a/src/QligFEP/CLI/pdb_to_amber.py
+++ b/src/QligFEP/CLI/pdb_to_amber.py
@@ -286,9 +286,8 @@ def asp_search(npdb):
     for i in range(len(npdb)):
         resname = npdb[i][0][17:21].rstrip()
         if resname == "ASP":
-            OD1_present = atom_is_present(npdb[i], "OD1")
-            OD2_present = atom_is_present(npdb[i], "OD2")
-            if all([OD1_present, OD2_present]):
+            HD2_present = atom_is_present(npdb[i], "HD2")
+            if HD2_present:
                 npdb[i] = [x.replace("ASP", "ASH") for x in npdb[i]]
     return npdb
 

From 8ffeed9a43b87870e97c952b29ec95c5130880c6 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Sat, 23 Aug 2025 12:40:15 +0200
Subject: [PATCH 08/20] update qprep-cli for more consistent output naming,
 depending on the processing steps

---
 src/QligFEP/CLI/qprep_cli.py | 79 +++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/src/QligFEP/CLI/qprep_cli.py b/src/QligFEP/CLI/qprep_cli.py
index 48044a86..b98a27ac 100644
--- a/src/QligFEP/CLI/qprep_cli.py
+++ b/src/QligFEP/CLI/qprep_cli.py
@@ -82,10 +82,16 @@ def neutralize_outside_residues(self, pdb_file, salt_bridge_cutoff=4.0):
         logger.info(f"Neutralizing charged residues outside {self.rest_bound:.1f}Å boundary")
 
         df = read_pdb_to_dataframe(pdb_file)
+        return self.neutralize_outside_residues_dataframe(df, salt_bridge_cutoff)
+
+    def neutralize_outside_residues_dataframe(self, df, salt_bridge_cutoff=4.0):
+        """Find and neutralize charged residues outside the sphere boundary for a DataFrame"""
+        logger.info(f"Neutralizing charged residues outside {self.rest_bound:.1f}Å boundary")
+
         charged_residues_info = self._find_charged_residues(df)
 
         if not charged_residues_info:
-            logger.info("No charged residues found in the PDB file")
+            logger.info("No charged residues found in the PDB data")
             return df, self.stats
 
         self.stats["total_charged_residues"] = len(charged_residues_info)
@@ -126,7 +132,7 @@ def _find_charged_residues(self, df):
         """Find all charged residues in the PDB"""
         charged_residues_info = []
 
-        for res_name in self.charged_residues.keys():
+        for res_name in self.charged_residues:
             residues = df[df["residue_name"] == res_name]
 
             if len(residues) == 0:
@@ -496,47 +502,56 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
     pdb_file = str(cwd / args.input_pdb_file)
     pdb_path = cwd / args.input_pdb_file
 
+    # Track processing steps for file naming
+    processing_steps = []
+    original_stem = pdb_path.stem
+
     ff_lib_path, ff_prm_path = get_force_field_paths(args.FF)
 
-    if args.cofactors:  # append cofactors to the protein if any are passed...
-        pdb_data = read_pdb_to_dataframe(pdb_file)
+    # Step 1: Remove crystal waters first (they will be replaced by sphere waters)
+    pdb_data = read_pdb_to_dataframe(pdb_file)
+    crystal_waters_df = pdb_data.query("residue_name == 'HOH'")
+    if not crystal_waters_df.empty:
+        logger.info(f"Removing {len(crystal_waters_df)} crystal water molecules")
+        pdb_data = pdb_data.query("residue_name != 'HOH'")
+        processing_steps.append("noHOH")
+
+    # Step 2: Add cofactors if provided
+    if args.cofactors:
+        logger.info(f"Adding {len(args.cofactors)} cofactor(s)")
         for cofactor in args.cofactors:
             pdb_data = append_pdb_to_another(pdb_data, cwd / cofactor, ignore_waters=True)
-        cofactor_path = pdb_path.with_name(f"{pdb_path.stem}_plus_cofactors.pdb")
-        write_dataframe_to_pdb(pdb_data, cofactor_path)
-        pdb_path = cofactor_path
-        pdb_file = str(pdb_path)
-
-    qprep_inp_path = cwd / "qprep.inp"
-    qprep_out_path = cwd / "qprep.out"
-
-    cysbonds = handle_cysbonds(args.cysbond, pdb_file, comment_out=True)
-
-    # write out without crystal waters - (will be in sphere)
-    protein_df = read_pdb_to_dataframe(pdb_file)
-    crystal_waters_df = protein_df.query("residue_name == 'HOH'")
-    if not crystal_waters_df.empty:
-        fname = args.input_pdb_file.split(".")[0]
-        write_dataframe_to_pdb(
-            protein_df.query("residue_name != 'HOH'"), Path(pdb_file).with_stem(f"{fname}_noHOH")
-        )
+        processing_steps.append("cofactors")
 
+    # Step 3: Neutralization
     neutralization_stats = None
     if not args.skip_neutralization:
         logger.info("Neutralizing charged residues outside spherical boundary")
         center_coords = [float(coord) for coord in args.cog]
         neutralizer = ProteinNeutralizer(center_coords, args.sphereradius, args.neutralize_boundary_offset)
         # Neutralize the protein and get statistics
-        neutralized_df, neutralization_stats = neutralizer.neutralize_outside_residues(
-            pdb_file, args.salt_bridge_cutoff
+        pdb_data, neutralization_stats = neutralizer.neutralize_outside_residues_dataframe(
+            pdb_data, args.salt_bridge_cutoff
         )
-        # Write the neutralized protein file
-        neutralized_pdb_path = pdb_path.with_name(f"{pdb_path.stem}_neutralized.pdb")
-        write_dataframe_to_pdb(neutralized_df, neutralized_pdb_path)
-        # Update the pdb_file path to use the neutralized version
-        pdb_file = str(neutralized_pdb_path)
-        pdb_path = neutralized_pdb_path
-        logger.info(f"Neutralized protein saved as: {neutralized_pdb_path}")
+        processing_steps.append("neutralized")
+        logger.info("Charged residues neutralized")
+
+    # Create final processed PDB file with descriptive name
+    if processing_steps:
+        processed_stem = f"{original_stem}_{'_'.join(processing_steps)}"
+        processed_pdb_path = pdb_path.with_name(f"{processed_stem}.pdb")
+    else:
+        processed_pdb_path = pdb_path
+
+    write_dataframe_to_pdb(pdb_data, processed_pdb_path)
+    pdb_file = str(processed_pdb_path)
+    pdb_path = processed_pdb_path
+    logger.info(f"Final processed protein saved as: {processed_pdb_path}")
+
+    qprep_inp_path = cwd / "qprep.inp"
+    qprep_out_path = cwd / "qprep.out"
+
+    cysbonds = handle_cysbonds(args.cysbond, pdb_file, comment_out=True)
 
     if args is not None:
         param_dict = {
@@ -606,8 +621,8 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
     oxygen_subset = water_df.query('atom_name == "O"')
     euclidean_distances = oxygen_subset[["x", "y", "z"]].sub(cog).pow(2).sum(1).apply(np.sqrt)
     outside = np.where(euclidean_distances > args.sphereradius * 1.05)[0]  # we add a tolerance of 5%
-    outside_HOH_residues = oxygen_subset.iloc[outside].residue_seq_number.unique()  # noqa: F841
     if outside.shape[0] > 0:
+        outside_HOH_residues = oxygen_subset.iloc[outside].residue_seq_number.unique()
         logger.warning(f"Found {outside.shape[0]} water molecules outside the sphere radius.")
         logger.warning("Removing these water molecules from the water.pdb file.")
         todrop_idxs = water_df.query("residue_seq_number in @outside_HOH_residues").index

From 60f4c0a358e05bc1d89d344d920e73f851ffe29e Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Mon, 25 Aug 2025 23:28:25 +0200
Subject: [PATCH 09/20] capture all missing library errors from qprep

---
 src/QligFEP/CLI/qprep_cli.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/QligFEP/CLI/qprep_cli.py b/src/QligFEP/CLI/qprep_cli.py
index b98a27ac..0b7acc48 100644
--- a/src/QligFEP/CLI/qprep_cli.py
+++ b/src/QligFEP/CLI/qprep_cli.py
@@ -317,6 +317,7 @@ def qprep_error_check(qprep_out_path: Path, ff_name) -> None:
     error_pat = re.compile(r"ERROR\:\s", re.IGNORECASE)
     missing_lib_pat = re.compile(
         r">>> Atom ...?.? in residue no\.\s+\d+ not found in library entry for [A-Z]+"
+        r"|>>> Heavy atom ...?.? missing in residue\s+ [0-9]+"
     )
     outfile_lines = qprep_out_path.read_text().split("\n")
     error_lines = []

From 97de28c129f4aa0e74828d55c9101997884e6f20 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Mon, 25 Aug 2025 23:43:59 +0200
Subject: [PATCH 10/20] more informative qprep error when ligands aren't
 present

---
 src/QligFEP/qligfep.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/QligFEP/qligfep.py b/src/QligFEP/qligfep.py
index 6daa1197..ff77513b 100644
--- a/src/QligFEP/qligfep.py
+++ b/src/QligFEP/qligfep.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from sklearn.neighbors import NearestNeighbors
 
-from .CLI.qprep_cli import qprep_error_check
+from .CLI.qprep_cli import QprepError, qprep_error_check
 from .CLI.utils import get_avail_restraint_methods, handle_cysbonds
 from .functions import COG, kT, overlapping_pairs, sigmoid
 from .IO import get_force_field_paths, replace, run_command
@@ -517,6 +517,13 @@ def set_restraints(self, writedir, restraint_method, strict_check: bool = True)
                 )
             subset_lig1 = pdb_df.query("residue_name == 'LIG'")
             subset_lig2 = pdb_df.query("residue_name == 'LID'")
+            logger.debug(f"lig1.shape: {subset_lig1.shape}")
+            logger.debug(f"lig2.shape: {subset_lig2.shape}")
+            if any([subset_lig1.shape[0] == 0, subset_lig2.shape[0] == 0]):
+                raise QprepError(
+                    "Something went wrong while preparing the protein edge. Please have a look at the "
+                    f"input files and at the qprep.out within {Path(writedir / 'qprep.inp')}"
+                )
             lig1_path = parent_write_dir / f"{self.lig1}.sdf"
             lig2_path = parent_write_dir / f"{self.lig2}.sdf"
             if not lig1_path.exists() or not lig2_path.exists():

From 712f9455e8ea64c7617780136f19383755d0ce1a Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Wed, 27 Aug 2025 12:08:17 +0200
Subject: [PATCH 11/20] isort

---
 src/QligFEP/CLI/cog_cli.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/QligFEP/CLI/cog_cli.py b/src/QligFEP/CLI/cog_cli.py
index b8071357..d9e258c8 100644
--- a/src/QligFEP/CLI/cog_cli.py
+++ b/src/QligFEP/CLI/cog_cli.py
@@ -6,6 +6,7 @@
 
 from ..logger import logger, setup_logger
 
+
 class MolecularCOG:
     """Class implemented to calculate the center of geometry of a molecule. It supports
     PDB and SDF file formats. The center of geometry is calculated as the average of all

From a477895f49b56a8d578d07a146575a194169ece5 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Wed, 27 Aug 2025 12:13:06 +0200
Subject: [PATCH 12/20] more informative error when generated topology doesn't
 contain the ligands

---
 src/QligFEP/qligfep.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/QligFEP/qligfep.py b/src/QligFEP/qligfep.py
index ff77513b..1ffc38a0 100644
--- a/src/QligFEP/qligfep.py
+++ b/src/QligFEP/qligfep.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from sklearn.neighbors import NearestNeighbors
 
-from .CLI.qprep_cli import QprepError, qprep_error_check
+from .CLI.qprep_cli import qprep_error_check
 from .CLI.utils import get_avail_restraint_methods, handle_cysbonds
 from .functions import COG, kT, overlapping_pairs, sigmoid
 from .IO import get_force_field_paths, replace, run_command
@@ -515,15 +515,10 @@ def set_restraints(self, writedir, restraint_method, strict_check: bool = True)
                     f"System {self.system} not supported by this distance "
                     "restraint method. Please use 'protein' or 'water'."
                 )
-            subset_lig1 = pdb_df.query("residue_name == 'LIG'")
-            subset_lig2 = pdb_df.query("residue_name == 'LID'")
-            logger.debug(f"lig1.shape: {subset_lig1.shape}")
-            logger.debug(f"lig2.shape: {subset_lig2.shape}")
-            if any([subset_lig1.shape[0] == 0, subset_lig2.shape[0] == 0]):
-                raise QprepError(
-                    "Something went wrong while preparing the protein edge. Please have a look at the "
-                    f"input files and at the qprep.out within {Path(writedir / 'qprep.inp')}"
-                )
+            subset_lig1 = pdb_df.query("residue_name == 'LIG'").reset_index()
+            subset_lig2 = pdb_df.query("residue_name == 'LID'").reset_index()
+            logger.debug(f"Subset LIG shape: {subset_lig1.shape}")
+            logger.debug(f"Subset LID shape: {subset_lig2.shape}")
             lig1_path = parent_write_dir / f"{self.lig1}.sdf"
             lig2_path = parent_write_dir / f"{self.lig2}.sdf"
             if not lig1_path.exists() or not lig2_path.exists():
@@ -552,8 +547,17 @@ def set_restraints(self, writedir, restraint_method, strict_check: bool = True)
                     rdLig1 = rsetter.molA.to_rdkit()
                     rdLig2 = rsetter.molB.to_rdkit()
                     for AtomIdx_Lig1, AtomIdx_Lig2 in restraints.items():
-                        rowLig1 = subset_lig1.iloc[AtomIdx_Lig1]
-                        rowLig2 = subset_lig2.iloc[AtomIdx_Lig2]
+                        try:
+                            rowLig1 = subset_lig1.iloc[AtomIdx_Lig1]
+                            rowLig2 = subset_lig2.iloc[AtomIdx_Lig2]
+                        except IndexError:
+                            qprep_out_path = Path(writedir) / "qprep.out"
+                            logger.error(
+                                f"Index error for atom {AtomIdx_Lig1} in LIG or {AtomIdx_Lig2} in LID. "
+                                "It's likely coming from a qprep failure.\nPlease check the qprep output at "
+                                f"{qprep_out_path}."
+                            )
+                            qprep_error_check(qprep_out_path, self.FF)
                         atom1_in_pdb = rowLig1["atom_name"].strip("1234567890")
                         atom1_in_rdkit = rdLig1.GetAtomWithIdx(AtomIdx_Lig1).GetSymbol()
                         atom2_in_pdb = rowLig2["atom_name"].strip("1234567890")

From 3b7f2987cdb0e8e6257e35e552e7676e57f88b62 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Sat, 30 Aug 2025 22:09:11 +0200
Subject: [PATCH 13/20] change typo

---
 src/QligFEP/CLI/qprep_cli.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/QligFEP/CLI/qprep_cli.py b/src/QligFEP/CLI/qprep_cli.py
index 0b7acc48..7a8ce426 100644
--- a/src/QligFEP/CLI/qprep_cli.py
+++ b/src/QligFEP/CLI/qprep_cli.py
@@ -218,7 +218,7 @@ def _modify_residues(self, df, residues_to_modify):
             new_name = res_info["neutral_form"]
             distance = res_info["distance"]
 
-            # Change residue nameand remove atoms based on residue type
+            # Change residue name and remove atoms based on residue type
             mask = (modified_df["chain_id"] == chain) & (modified_df["residue_seq_number"] == res_num)
             modified_df.loc[mask, "residue_name"] = new_name
             atoms_to_remove = self._get_atoms_to_remove(old_name, new_name)
@@ -276,16 +276,6 @@ def _log_neutralization_stats(self):
         """Log neutralization statistics"""
         stats = self.stats
 
-        logger.info(
-            f"Neutralization statistics:\n"
-            f"  Total charged residues found: {stats['total_charged_residues']}"
-            f"  Residues outside boundary ({self.rest_bound:.1f}Å): {stats['residues_outside_boundary']}"
-            f"  Salt bridge pairs neutralized: {stats['salt_bridges_neutralized']}"
-            f"  Total residues neutralized: {stats['residues_neutralized']}"
-            f"  Original total charge: {stats['original_total_charge']:+d}"
-            f"  Final total charge: {stats['final_total_charge']:+d}"
-        )
-
         if stats["remaining_outside_charged"]:
             for res in stats["remaining_outside_charged"]:
                 logger.warning(

From c8e81e8e540d2463e82e41dcb8266527a85cef19 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Sat, 30 Aug 2025 22:10:01 +0200
Subject: [PATCH 14/20] max residue code length needs to be 3

---
 src/QligFEP/FF/AMBER14sb.lib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/QligFEP/FF/AMBER14sb.lib b/src/QligFEP/FF/AMBER14sb.lib
index 06dc6ffb..6bcc70da 100644
--- a/src/QligFEP/FF/AMBER14sb.lib
+++ b/src/QligFEP/FF/AMBER14sb.lib
@@ -3452,7 +3452,7 @@
         head N
         tail C
 *--------------------------------------------------------------------------------
-{POPC}
+{POP}
     [atoms]
         ! Phosphocholine head group (PC residue atoms)
         1  C11    cC             0.910403

From 13536223b51c1cb4a07d541b1e0d3d58d36d1125 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Sat, 30 Aug 2025 23:05:10 +0200
Subject: [PATCH 15/20] add solution to convert popc molecules to Amber-like
 format within prepareation time

---
 src/QligFEP/CLI/_popc_utils.py | 340 +++++++++++++++++++++++++++++++++
 src/QligFEP/CLI/qprep_cli.py   |  37 +++-
 2 files changed, 374 insertions(+), 3 deletions(-)
 create mode 100644 src/QligFEP/CLI/_popc_utils.py

diff --git a/src/QligFEP/CLI/_popc_utils.py b/src/QligFEP/CLI/_popc_utils.py
new file mode 100644
index 00000000..f270415d
--- /dev/null
+++ b/src/QligFEP/CLI/_popc_utils.py
@@ -0,0 +1,340 @@
+from ..logger import logger
+from ..pdb_utils import write_dataframe_to_pdb
+
+
+def atom_element_mapping():
+    """
+    Create a mapping of POPC atom names to their correct elements.
+
+    Returns:
+        dict: {atom_name: element_symbol}
+    """
+    # Based on AMBER14sb_popc-only.lib atom types and molecular weights
+    element_mapping = {
+        # Carbon atoms (molecular weight ~12.01)
+        "C1": "C",
+        "C2": "C",
+        "C3": "C",
+        "C11": "C",
+        "C12": "C",
+        "C13": "C",
+        "C14": "C",
+        "C15": "C",
+        "C16": "C",
+        "C17": "C",
+        "C18": "C",
+        "C19": "C",
+        "C21": "C",
+        "C22": "C",
+        "C23": "C",
+        "C24": "C",
+        "C25": "C",
+        "C26": "C",
+        "C27": "C",
+        "C28": "C",
+        "C29": "C",
+        "C31": "C",
+        "C32": "C",
+        "C33": "C",
+        "C34": "C",
+        "C35": "C",
+        "C110": "C",
+        "C111": "C",
+        "C112": "C",
+        "C113": "C",
+        "C114": "C",
+        "C115": "C",
+        "C116": "C",
+        "C210": "C",
+        "C211": "C",
+        "C212": "C",
+        "C213": "C",
+        "C214": "C",
+        "C215": "C",
+        "C216": "C",
+        "C217": "C",
+        "C218": "C",
+        "CA1": "C",
+        "CA2": "C",
+        # Nitrogen atoms (molecular weight ~14.01)
+        "N4": "N",
+        "N31": "N",
+        # Oxygen atoms (molecular weight ~16.00)
+        "O7": "O",
+        "O9": "O",
+        "O10": "O",
+        "O11": "O",
+        "O12": "O",
+        "O14": "O",
+        "O16": "O",
+        "O21": "O",
+        "O22": "O",
+        "O31": "O",
+        "O32": "O",
+        "O33": "O",
+        "O34": "O",
+        "O35": "O",
+        # Phosphorus atoms (molecular weight ~30.97)
+        "P8": "P",
+        "P31": "P",
+        # Hydrogen atoms (molecular weight ~1.01) - all H atoms start with H
+        "HR": "H",
+        "HS": "H",
+        "HX": "H",
+        "HA": "H",
+        "HB": "H",
+        "H1A": "H",
+        "H1B": "H",
+        "H2A": "H",
+        "H2B": "H",
+        "H2R": "H",
+        "H2S": "H",
+        "H3A": "H",
+        "H3B": "H",
+        "H3C": "H",
+        "H3R": "H",
+        "H3S": "H",
+        "H4A": "H",
+        "H4B": "H",
+        "H4C": "H",
+        "H4R": "H",
+        "H4S": "H",
+        "H5A": "H",
+        "H5B": "H",
+        "H5C": "H",
+        "H5R": "H",
+        "H5S": "H",
+        "H6R": "H",
+        "H6S": "H",
+        "H7R": "H",
+        "H7S": "H",
+        "H8R": "H",
+        "H8S": "H",
+        "H9R": "H",
+        "H9S": "H",
+        "H10R": "H",
+        "H10S": "H",
+        "H11R": "H",
+        "H11S": "H",
+        "H12R": "H",
+        "H12S": "H",
+        "H13R": "H",
+        "H13S": "H",
+        "H14R": "H",
+        "H14S": "H",
+        "H15R": "H",
+        "H15S": "H",
+        "H16R": "H",
+        "H16S": "H",
+        "H16T": "H",
+        "H22R": "H",
+        "H22S": "H",
+        "H23R": "H",
+        "H23S": "H",
+        "H24R": "H",
+        "H24S": "H",
+        "H25R": "H",
+        "H25S": "H",
+        "H26R": "H",
+        "H26S": "H",
+        "H27R": "H",
+        "H27S": "H",
+        "H28R": "H",
+        "H28S": "H",
+        "H29R": "H",
+        "H20R": "H",
+        "H21R": "H",
+        "H21S": "H",
+        "H22A": "H",
+        "H22B": "H",
+        "H23A": "H",
+        "H23B": "H",
+        "H24A": "H",
+        "H24B": "H",
+        "H25A": "H",
+        "H25B": "H",
+        "H26A": "H",
+        "H26B": "H",
+        "H26C": "H",
+        "H27A": "H",
+        "H27B": "H",
+        "H28A": "H",
+        "H28B": "H",
+        "H28C": "H",
+    }
+
+    return element_mapping
+
+
+def pymemdyn_to_amber_mapping():
+    """
+    Direct mapping from pymemdyn POPC atom names to unified format.
+    This skips the buildH step and only handles heavy atoms.
+    """
+    return {
+        # Choline head group - direct mapping from pymemdyn to unified
+        "C1": "C34",  # Choline methyl carbon
+        "C2": "C35",  # Choline methyl carbon
+        "C3": "C33",  # Choline methyl carbon
+        "N4": "N31",  # Choline nitrogen
+        "C5": "C31",  # Choline CH2 carbon
+        "C6": "C32",  # Choline CH2 carbon (connected to phosphate)
+        # Phosphate group
+        "O7": "O32",  # Phosphate-choline oxygen
+        "P8": "P31",  # Phosphorus
+        "O9": "O34",  # Phosphate oxygen
+        "O10": "O33",  # Phosphate oxygen
+        "O11": "O31",  # Phosphate-glycerol oxygen
+        # Glycerol backbone
+        "C12": "C3",  # Glycerol CH2 (sn-3 position)
+        "C13": "C2",  # Glycerol CH (sn-2 position)
+        "C32": "C1",  # Glycerol CH2 (sn-1 position)
+        # sn-2 ester linkage and carbonyl
+        "O14": "O21",  # sn-2 ester oxygen
+        "C15": "C21",  # sn-2 ester carbonyl carbon
+        "O16": "O22",  # sn-2 ester carbonyl oxygen
+        # sn-1 ester linkage and carbonyl
+        "O33": "O11",  # sn-1 ester oxygen
+        "C34": "C11",  # sn-1 ester carbonyl carbon
+        "O35": "O12",  # sn-1 ester carbonyl oxygen
+        # sn-2 fatty acid chain (oleic acid chain)
+        "C17": "C22",  # First carbon of sn-2 chain
+        "C18": "C23",
+        "C19": "C24",
+        "C20": "C25",
+        "C21": "C26",
+        "C22": "C27",
+        "C23": "C28",
+        "C24": "C29",  # Before double bond
+        "C25": "C210",  # Double bond carbon
+        "C26": "C211",  # Double bond carbon
+        "C27": "C212",  # After double bond
+        "C28": "C213",
+        "C29": "C214",
+        "C30": "C215",
+        "C31": "C216",
+        "CA1": "C217",  # Additional carbons in oleic chain
+        "CA2": "C218",  # Terminal methyl
+        # sn-1 fatty acid chain (palmitic acid chain)
+        "C36": "C12",  # First carbon of sn-1 chain
+        "C37": "C13",
+        "C38": "C14",
+        "C39": "C15",
+        "C40": "C16",
+        "C41": "C17",
+        "C42": "C18",
+        "C43": "C19",
+        "C44": "C110",
+        "C45": "C111",
+        "C46": "C112",
+        "C47": "C113",
+        "C48": "C114",
+        "C49": "C115",
+        "C50": "C116",  # Terminal methyl of palmitic chain
+    }
+
+
+def has_pop_residues(pdb_data):
+    """
+    Check if the PDB data contains any POP residues.
+
+    Args:
+        pdb_data: DataFrame containing PDB data
+
+    Returns:
+        tuple: (bool: True if POP residues are found, int: Number of POP residues found)
+    """
+    # Use value_counts for more efficient counting
+    residue_counts = pdb_data["residue_name"].value_counts()
+    has_pop = "POP" in residue_counts
+
+    if has_pop:
+        # Count unique residue instances (chain_id + residue_seq_number combinations)
+        pop_residues = pdb_data[pdb_data["residue_name"] == "POP"]
+        pop_count = len(pop_residues.groupby(["chain_id", "residue_seq_number"]))
+        return True, pop_count
+    else:
+        return False, 0
+
+
+def convert_pymemdyn_to_unified_dataframe(pdb_data):
+    """Convert pymemdyn POPC format directly to unified format for DataFrame using vectorized operations."""
+    logger.info("Converting POP residues from pymemdyn to unified format...")
+
+    # Get mapping
+    mapping = pymemdyn_to_amber_mapping()
+
+    # Create a mask for POP residues
+    pop_mask = pdb_data["residue_name"] == "POP"
+
+    if not pop_mask.any():
+        return pdb_data
+
+    pop_residues_count = len(pdb_data[pop_mask].groupby(["chain_id", "residue_seq_number"]))
+    pdb_data_corrected = pdb_data.copy()
+
+    # Apply atom name mapping only to the POPC atoms
+    pop_atoms = pdb_data_corrected.loc[pop_mask, "atom_name"]
+    mapped_atoms = pop_atoms.map(mapping)
+
+    # keep original for unmapped atoms
+    unmapped_mask = mapped_atoms.isna()
+    mapped_atoms.loc[unmapped_mask] = pop_atoms.loc[unmapped_mask]
+    pdb_data_corrected.loc[pop_mask, "atom_name"] = mapped_atoms
+
+    # Set default values for missing fields
+    occupancy_mask = pop_mask & (
+        pdb_data_corrected["occupancy"].isna() | (pdb_data_corrected["occupancy"] == "")
+    )
+    temp_factor_mask = pop_mask & (
+        pdb_data_corrected["temp_factor"].isna() | (pdb_data_corrected["temp_factor"] == "")
+    )
+    charge_mask = pop_mask & (pdb_data_corrected["charge"].isna() | (pdb_data_corrected["charge"] == ""))
+
+    pdb_data_corrected.loc[occupancy_mask, "occupancy"] = 1.00
+    pdb_data_corrected.loc[temp_factor_mask, "temp_factor"] = 0.00
+    pdb_data_corrected.loc[charge_mask, "charge"] = ""
+
+    logger.debug(f"POP lipid conversion completed for {pop_residues_count} residue(s)")
+    logger.debug("Element symbols will be corrected based on atom names during file writing")
+
+    unmapped_atoms = set(pop_atoms.loc[unmapped_mask].unique()) if unmapped_mask.any() else set()
+    if unmapped_atoms:
+        logger.warning(f"Unmapped atoms (kept original names): {unmapped_atoms}")
+
+    return pdb_data_corrected
+
+
+def correct_popc_elements(df):
+    """
+    Correct element symbols for POP residues using vectorized operations.
+    """
+    element_mapping = atom_element_mapping()
+    pop_mask = df["residue_name"] == "POP"
+
+    if not pop_mask.any():
+        return df
+
+    df_corrected = df.copy()
+
+    pop_atoms = df_corrected.loc[pop_mask, "atom_name"]
+    corrected_elements = pop_atoms.map(element_mapping)
+
+    # For atoms not in mapping, guess from first character (fallback)
+    missing_mask = corrected_elements.isna()
+    corrected_elements.loc[missing_mask] = pop_atoms.loc[missing_mask].str[0].fillna("C")
+
+    df_corrected.loc[pop_mask, "element_symbol"] = corrected_elements
+    return df_corrected
+
+
+def df_to_pdb_corrected_element(df, output_file, header=None):
+    """
+    Write DataFrame to PDB with element correction for lipid atoms using vectorized operations.
+    """
+    has_pop, _ = has_pop_residues(df)
+    if has_pop:
+        df_corrected = correct_popc_elements(df)
+        write_dataframe_to_pdb(df_corrected, output_file, header=header)
+    else:
+        write_dataframe_to_pdb(df, output_file, header=header)
diff --git a/src/QligFEP/CLI/qprep_cli.py b/src/QligFEP/CLI/qprep_cli.py
index 7a8ce426..76b443b9 100644
--- a/src/QligFEP/CLI/qprep_cli.py
+++ b/src/QligFEP/CLI/qprep_cli.py
@@ -15,6 +15,11 @@
     write_dataframe_to_pdb,
 )
 from ..settings.settings import CONFIGS, FF_DIR
+from ._popc_utils import (
+    convert_pymemdyn_to_unified_dataframe,
+    df_to_pdb_corrected_element,
+    has_pop_residues,
+)
 from .utils import handle_cysbonds
 
 # NOTE: cysbonds will have \n after each bond -> `maketop MKC_p` is in a different line
@@ -527,6 +532,19 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
         processing_steps.append("neutralized")
         logger.info("Charged residues neutralized")
 
+    # Step 4: POPC lipid renaming (only for AMBER14sb with POP residues)
+    lipid_conversion_performed = False
+    pop_count = 0
+    if args.FF == "AMBER14sb":
+        has_pop, pop_count = has_pop_residues(pdb_data)
+        if has_pop:
+            logger.info(f"Detected {pop_count} POP lipid residue(s) with AMBER14sb forcefield")
+            logger.info("Converting pymemdyn POPC atom names to unified format compatible with AMBER14sb")
+            logger.info("Note: Qprep will add missing hydrogens automatically during topology generation")
+            pdb_data = convert_pymemdyn_to_unified_dataframe(pdb_data)
+            processing_steps.append("lipid_renamed")
+            lipid_conversion_performed = True
+
     # Create final processed PDB file with descriptive name
     if processing_steps:
         processed_stem = f"{original_stem}_{'_'.join(processing_steps)}"
@@ -534,7 +552,11 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
     else:
         processed_pdb_path = pdb_path
 
-    write_dataframe_to_pdb(pdb_data, processed_pdb_path)
+    # Use element-corrected writing if lipids were converted
+    if lipid_conversion_performed:
+        df_to_pdb_corrected_element(pdb_data, processed_pdb_path)
+    else:
+        write_dataframe_to_pdb(pdb_data, processed_pdb_path)
     pdb_file = str(processed_pdb_path)
     pdb_path = processed_pdb_path
     logger.info(f"Final processed protein saved as: {processed_pdb_path}")
@@ -568,8 +590,17 @@ def main(args: Optional[argparse.Namespace] = None, **kwargs) -> None:
     qprep_error_check(qprep_out_path, args.FF)
     logger.info("qprep run finished. Check the output `qprep.out` for more information.")
 
-    # Log neutralization summary if performed
-    if neutralization_stats and not args.skip_neutralization:
+    # Log lipid conversion summary if performed
+    if lipid_conversion_performed:
+        logger.info(
+            "LIPID CONVERSION SUMMARY\n"
+            f"Converted {pop_count} POP lipid residue(s) from pymemdyn to Amber-unified format. "
+            "With the correct heavy atom naming, qprep will add the missing hydrogen atoms. "
+            "For this, check the output top_p.pdb from this program."
+        )
+
+    # Log neutralization summary if performed and charged residues were found
+    if neutralization_stats and not args.skip_neutralization and neutralization_stats['total_charged_residues'] > 0:
         logger.info(
             "NEUTRALIZATION SUMMARY\n"
             f"Total charged residues processed: {neutralization_stats['total_charged_residues']}\n"

From ebd9f31cbb7c41b3de390f81f38a481319269c8e Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Mon, 1 Sep 2025 16:46:52 +0200
Subject: [PATCH 16/20] avoid populating results json with nan

---
 src/QligFEP/analyze_FEP.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/QligFEP/analyze_FEP.py b/src/QligFEP/analyze_FEP.py
index 0a803489..2ee3f700 100644
--- a/src/QligFEP/analyze_FEP.py
+++ b/src/QligFEP/analyze_FEP.py
@@ -416,9 +416,9 @@ def calculate_ddG(self, water_sys: str = "1.water", protein_sys: str = "2.protei
                 self.data["result"][delta_method].update(
                     {
                         fep: {
-                            f"{delta_method}_avg": ddG,
-                            f"{delta_method}_sem": ddG_sem,
-                            f"{delta_method}_std": ddG_std,
+                            f"{delta_method}_avg": (ddG if not np.isnan(ddG) else None),
+                            f"{delta_method}_sem": (ddG_sem if not np.isnan(ddG_sem) else None),
+                            f"{delta_method}_std": (ddG_std if not np.isnan(ddG_std) else None),
                             "from": w_fep["from"],  # the same for protein & water; can use either
                             "to": w_fep["to"],
                         }
@@ -472,11 +472,15 @@ def populate_mapping_dictionary(self, method, output_file: str | Path | None = N
             for fep in self.feps:
                 fep_dict = self.data["result"][method][fep]
                 if fep_dict["from"] == _from and fep_dict["to"] == _to:
+                    avg_val = fep_dict[f"{method}_avg"] if not np.isnan(fep_dict[f"{method}_avg"]) else None
+                    sem_val = fep_dict[f"{method}_sem"] if not np.isnan(fep_dict[f"{method}_sem"]) else None
+                    std_val = fep_dict[f"{method}_std"] if not np.isnan(fep_dict[f"{method}_std"]) else None
+
                     edge.update(
                         {
-                            "Q_ddG_avg": fep_dict[f"{method}_avg"],
-                            "Q_ddG_sem": fep_dict[f"{method}_sem"],
-                            "Q_ddG_std": fep_dict[f"{method}_std"],
+                            "Q_ddG_avg": avg_val,
+                            "Q_ddG_sem": sem_val,
+                            "Q_ddG_std": std_val,
                         }
                     )
         if output_file is not None:

From 40bc1b6e59d4c2df643ac6771d429cdfcad3f882 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Wed, 10 Sep 2025 10:11:24 +0200
Subject: [PATCH 17/20] change y axis to calc for consistency with the
 manuscript

---
 src/QligFEP/analyze_FEP.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/QligFEP/analyze_FEP.py b/src/QligFEP/analyze_FEP.py
index 2ee3f700..b9c945ad 100644
--- a/src/QligFEP/analyze_FEP.py
+++ b/src/QligFEP/analyze_FEP.py
@@ -650,7 +650,7 @@ def result_to_latex(res, latexify_each=False):  # TODO: move this out of this me
             f"{len(exp_values)}$)"
         )
         plt.xlabel("$\Delta\Delta G_{exp} (kcal/mol)$")  # noqa: W605
-        plt.ylabel("$\Delta\Delta G_{pred} (kcal/mol)$")  # noqa: W605
+        plt.ylabel("$\Delta\Delta G_{calc} (kcal/mol)$")  # noqa: W605
         plt.xlim(min_val, max_val)
         plt.ylim(min_val, max_val)
         ax.set_aspect("equal", adjustable="box")

From a00ce4e04413e224c69ce467ceb90bebc623b0ea Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Mon, 15 Sep 2025 17:35:05 +0200
Subject: [PATCH 18/20] update readme with instructions and other clarity
 improvements

---
 README.md | 50 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index e95f051c..73b9657c 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 ## ⚙️ Installation
 
-The current conda environment is available in the `environment.yml` file, but installing it with `conda env create -f environment.yml` will take a long time. Instead, we recommend that you use `mamba` or its lightweight version `micromamba`. Please check this Gist on how to [install micromamba](https://gist.github.com/David-Araripe/3ecd90bfbfd1c8e813812a203384b3c0).
+We recommend that you use `mamba` or, preferably, its lightweight version `micromamba`. Please check this link on how to [install it](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html).
 
 Once you have `micromamba` installed and have already cloned this repo, you can create the environment with:
 
@@ -22,7 +22,7 @@ python -m pip install -e .
 ```
 
 <details>
-<summary>In one line...</summary>
+<summary>To install everything in one line...</summary>
 
 ```bash
 micromamba create -n qligfep_new python=3.11 -c conda-forge -y && micromamba activate qligfep_new && micromamba install openff-toolkit=0.16.4 "openff-utilities>=0.1.12" openff-forcefields=2024.09.0 openmm=8.1.1 "openff-nagl>=0.3.8" lomap2 kartograf michellab::fkcombu -c conda-forge --yes && python -m pip install -e .
@@ -31,20 +31,38 @@ micromamba create -n qligfep_new python=3.11 -c conda-forge -y && micromamba act
 
 ### 🍎 MacOS
 
-The environment provided doesn't build on Mac due to missing libraries. If you're using this operating system, you'll have to create the environment by scratch through the commands:
+Similar to Linux, you can create the environment and install the required packages with:
 
 ``` bash
-micromamba create -n qligfep_new python=3.11 openff-toolkit=0.16.4 "openff-utilities>=0.1.12" openff-forcefields=2024.09.0 openmm=8.1.1 lomap2 kartograf michellab::fkcombu -c conda-forge --yes
+micromamba create -n qligfep_new python=3.11 openff-toolkit=0.16.4 "openff-utilities>=0.1.12" openff-forcefields=2024.09.0 openmm=8.1.1 lomap2 kartograf davidararipe::kcombu_bss -c conda-forge --yes
 micromamba activate qligfep_new
 python -m pip install joblib scipy tqdm
 python -m pip install -e .
 ```
 
+<details>
+<summary>To install everything in one line...</summary>
+
+```bash
+micromamba create -n qligfep_new python=3.11 openff-toolkit=0.16.4 "openff-utilities>=0.1.12" openff-forcefields=2024.09.0 openmm=8.1.1 lomap2 kartograf davidararipe::kcombu_bss -c conda-forge --yes && micromamba activate qligfep_new && python -m pip install joblib scipy tqdm && python -m pip install -e .
+```
+</details>
+
 ### 🛠️ Compiling Q
 
-To compile the Q binaries, you will need to have the `gfortran` compiler installed. To access this compiler on different HPCs, you can use the `module load` command. Check [here](/src/QligFEP/settings/settings.py) for a list of the different HPCs we have already ran RBFE simulations on.
+> 💻 **If you're working locally**, If you're working locally, install `gfortran` using micromamba to compile Q, which is required for QligFEP's topology generation via `qprep`. Run the following commands:
 
-E.g.: To compile Q on Snellius, you will have to run the following commands:
+```bash
+micromamba install gfortran=11.3.0 -c conda-forge --yes
+cd src/q6
+make all COMP=gcc && cd ../..
+```
+
+If you're working on an HPC system, you will want to access `gfortran` through a pre-existing module avaiable in your system. This is done using the `module load` command. Check [here](/src/QligFEP/settings/settings.py) for a list of the different HPCs we have already ran RBFE simulations on. Module availability and loading is system-dependent, so please check with your system administrator if you have any issues.
+
+_Example_:
+
+To compile Q on Snellius, you will have to run the following commands:
 ```bash
 module load 2021
 module load gompi/2021a
@@ -54,19 +72,21 @@ Finally, compiling Q can be done with the following commands:
 make all COMP=gcc && make mpi COMP=gcc
 ```
 
+_Note_: see how we don't use `make mpi` locally. This is because we don't need parallelization for running `qprep`, which is the only Q program we use locally.
+
 ## ⌨️ Command line interface (CLI)
 
 Now you're set with the qligfep package. This includes the command-linde-interfaces (CLIs):
 
 1. `qparams`: used to generate ligand parameters;
-1. `pdb2amber`: formats a PDB file to be used with Q's implementation of the AMBER forcefield;
-1. `qlomap`: wraps `Lomap` to generate the `.json` perturbation mapping;
-1. `qmapfep`: in-house developed method to generate the `.json` perturbation mapping, interactively visualize and add or remove edges.
-1. `qligfep`: main CLI for running QligFEP simulations.
-1. `setupFEP`: sets up all the the QligFEP files for a simulation, including protein and water systems.
-1. `qligfep_analyze`: CLI to analyze the results of a QligFEP simulation.
-1. `qcog`: calculates the center of geometry (COG) of a ligand in a PDB/SDF file. If multiple ligands are found in sdf, the program will calculate the COG for all of them
-1. `qprep_prot`: creates an input file for qprep (fortran program) and runs it to either: 1) solvate the protein structure; 2) create the water sphere.
+2. `pdb2amber`: formats a PDB file to be used with Q's implementation of the AMBER forcefield;
+3. `qlomap`: wraps `Lomap` to generate the `.json` perturbation mapping;
+4. `qmapfep`: in-house developed method to generate the `.json` perturbation mapping, interactively visualize and add or remove edges.
+5. `qligfep`: main CLI for running QligFEP simulations.
+6. `setupFEP`: sets up all the the QligFEP files for a simulation, including protein and water systems.
+7. `qligfep_analyze`: CLI to analyze the results of a QligFEP simulation.
+8. `qcog`: calculates the center of geometry (COG) of a ligand in a PDB/SDF file. If multiple ligands are found in sdf, the program will calculate the COG for all of them
+9. `qprep_prot`: creates an input file for qprep (fortran program) and runs it to either: 1) solvate the protein structure; 2) create the water sphere.
 
 # Q-GPU #
 Version control of **Q-GPU**, an adaptation of **Q** version 5.06 running on GPUs.
@@ -81,7 +101,7 @@ This version includes a translation of the original **Q** fortran code to C/CUDA
 
 
 ## Authors ##
-Chiel Jespers, Willem Jespers, Mauricio Esguerra, Johan Åqvist, Hugo Gutiérrez‐de‐Terán
+Chiel Jespers, David Araripe, Willem Jespers, Mauricio Esguerra, Johan Åqvist, Hugo Gutiérrez‐de‐Terán
 
 
 ## Installation ##

From 2fc096220765b86e74b4c0139f581894426c7250 Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Tue, 16 Sep 2025 16:52:15 +0200
Subject: [PATCH 19/20] add nagl to macos environment

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 73b9657c..f3a00bd0 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ micromamba create -n qligfep_new python=3.11 -c conda-forge -y && micromamba act
 Similar to Linux, you can create the environment and install the required packages with:
 
 ``` bash
-micromamba create -n qligfep_new python=3.11 openff-toolkit=0.16.4 "openff-utilities>=0.1.12" openff-forcefields=2024.09.0 openmm=8.1.1 lomap2 kartograf davidararipe::kcombu_bss -c conda-forge --yes
+micromamba create -n qligfep_new python=3.11 openff-toolkit=0.16.4 "openff-utilities>=0.1.12" openff-forcefields=2024.09.0 openmm=8.1.1 "openff-nagl>=0.3.8" lomap2 kartograf davidararipe::kcombu_bss -c conda-forge --yes
 micromamba activate qligfep_new
 python -m pip install joblib scipy tqdm
 python -m pip install -e .

From dfd6cf30fed5ed2ccc72998140d48586e8a870fb Mon Sep 17 00:00:00 2001
From: David-Araripe <david.araripe17@gmail.com>
Date: Tue, 16 Sep 2025 17:17:14 +0200
Subject: [PATCH 20/20] update tutorial

---
 tutorials/Tyk2/README.md | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/tutorials/Tyk2/README.md b/tutorials/Tyk2/README.md
index e030c35f..aeae740c 100644
--- a/tutorials/Tyk2/README.md
+++ b/tutorials/Tyk2/README.md
@@ -16,7 +16,13 @@ The workflow consists of the following steps:
 
 # Tutorial
 
-We start this tutorial preparing the ligands for the simulations. For this, go to the directory with the ligand files:
+## Prerequisites
+
+Before starting this tutorial, ensure that:
+
+**Q6 is compiled**: Navigate to `src/q6` directory and run `make all && make mpi` to compile the Q6 binaries before proceeding.
+
+We start this tutorial preparing the ligands for the simulations. Navigate to the directory with the ligand files:
 ```bash
 cd tutorials/Tyk2/ligprep
 ```
@@ -44,10 +50,11 @@ cp ligprep/tyk2_ligands/*.sdf ligprep/tyk2_ligands/lomap.json setupFEP/
 
 Visualizing and interacting with the generated perturbation network from `qlomap` is possible by using the `qmapfep` program. For this, _make sure you're working locally (not on a remote machine)_.
 
-Now that the `lomap.json` was created under `tyk2_ligands/lomap.json`, let's use both the input `.sdf` file and the generated mapping `.json` file to crate the visualizer. Run:
+Now that the `lomap.json` was created under `tyk2_ligands/lomap.json`, let's use both the input `.sdf` file and the generated mapping `.json` file to crate the visualizer. From the main Tyk2 directory, run:
 
 ```bash
-qmapfep -i tyk2_ligands.sdf -wd . -l tyk2_ligands/lomap.json 
+cd ../  # Go back to tutorials/Tyk2 directory
+qmapfep -i ligprep/tyk2_ligands.sdf -wd . -l ligprep/tyk2_ligands/lomap.json 
 ```
 
 Two new files should be created: **qmapfep.html** and **tyk2_ligands.json**. To visualize the perturbation network, open the `qmapfep.html` file in your browser.
@@ -67,21 +74,23 @@ To add new edges to the system, press *edit* on the top left corner and proceed
 
 ## Water sphere
 
-Now we just need to prepare our water sphere. The first step is to calculate the center of geometry of the ligand. For this, we can use the `qcog` command:
+Now we just need to prepare our water sphere. The first step is to calculate the center of geometry of the ligand. For this, we can use the `qcog` command (make sure you're in the main Tyk2 directory):
 ```bash
 qcog -i ligprep/tyk2_ligands.sdf
 ```
-The final value printed by this function is the geometric center of all ligands within the series (in the sdf file). You should see an output like like this:
+The final value printed by this function is the geometric center of all ligands within the series (in the sdf file). You should see an output like this:
 ```text
-| INFO     | QligFEP.CLI.cog_cli:main:116 - Center of geometry: [-4.727 26.163 -30.542]
+| INFO     | QligFEP.CLI.cog_cli:main:116 - Center of geometry: [-4.689 26.119 -30.570]
 ```
+**Note**: Use the center of geometry values from your own `qcog` calculation, as they may differ slightly from this example.
+
 We'll use this value as the center of our water sphere. We also use the Amber forcefield for this tutorial, so let's prepare the protein in:
 ```bash
 cd setupFEP/amber
 ```
-To generate the water sphere, we then run:
+To generate the water sphere, we then run (replace the coordinates with your own values from `qcog`):
 ```bash
-qprep_prot -cog -4.727 26.163 -30.542 -i protein.pdb -FF AMBER14sb -r 25
+qprep_prot -cog -4.689 26.119 -30.570 -i protein.pdb -FF AMBER14sb -r 25
 ```
 Done! No we move both the protein and the water `.pdb` files to the same directory containing our ligand files. To do this, run the command:
 ```bash
@@ -91,8 +100,9 @@ The next step is creating the job submission files to run the FEP calculations o
 
 ## Setup FEP
 
-For preparing the files, we run:
+For preparing the files, navigate to the setupFEP directory and run:
 ```bash
+cd ../  # Go back to setupFEP directory if you're still in setupFEP/amber
 setupFEP -FF AMBER14sb -r 25 -ts 2fs -j lomap.json -clean dcd -rs 42 -c SNELLIUS
 ```
 The explanation of the flags is as follows:
@@ -102,7 +112,7 @@ The explanation of the flags is as follows:
 - `-j lomap.json`: The perturbation network generated by lomap;
 - `-clean dcd`: The suffix of the files to be removed within the job run. Removing the `.dcd` files will save a lot of space, since they are the largest files generated by the simulation;
 - `-rs 42`: The random seed for the simulation.
-- `-c CSB`: The cluster for which the job submission files will be generated. This takes configurations from the `CLUSTER_DICT` global variable within the [settings.py](../../src/QligFEP/settings/settings.py#L139-L150) module.
+- `-c SNELLIUS`: The cluster for which the job submission files will be generated. This takes configurations from the `CLUSTER_DICT` global variable within the [settings.py](../../src/QligFEP/settings/settings.py#L139-L150) module.
 
 ## Restraint setting