OpenFreeEnergy · atravitz · Apr 24, 2025 · Apr 22, 2025 · Apr 23, 2025 · Apr 23, 2025
diff --git a/news/pretty_print.rst b/news/pretty_print.rst
@@ -0,0 +1,23 @@
+**Added:**
+
+* <news item>
+
+**Changed:**
+
+* Improved formatting of ``openfe gather`` output tables. Use ``--tsv`` to instead view the raw tsv formatted output (this was the default behavior as of v1.3.x)
+
+**Deprecated:**
+
+* <news item>
+
+**Removed:**
+
+* <news item>
+
+**Fixed:**
+
+* <news item>
+
+**Security:**
+
+* <news item>
diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
@@ -545,6 +545,27 @@ def _get_legs_from_result_jsons(
 
     return legs
 
+
+def rich_print_to_stdout(df: pd.DataFrame) -> None:
+    """Use rich to pretty print a table to stdout."""
+
+    from rich.console import Console
+    from rich.table import Table
+    from rich import box
+
+    table = Table(box=box.SQUARE)
+
+    for col in df.columns:
+        table.add_column(col)
+
+    for row_values in df.values:
+        row = [str(val) for val in row_values]
+        table.add_row(*row)
+
+    console = Console()
+    console.print(table)
+
+
 @click.command(
     'gather',
     short_help="Gather result jsons for network of RFE results into a TSV file"
@@ -558,26 +579,37 @@ def _get_legs_from_result_jsons(
     '--report',
     type=HyphenAwareChoice(['dg', 'ddg', 'raw'],
                            case_sensitive=False),
-    default="dg", show_default=True,
+    default="dg",
+    show_default=True,
     help=(
         "What data to report. 'dg' gives maximum-likelihood estimate of "
         "absolute deltaG,  'ddg' gives delta-delta-G, and 'raw' gives "
         "the raw result of the deltaG for a leg."
     )
 )
-@click.option('output', '-o',
-              type=click.File(mode='w'),
-              default='-')
+@click.option("output", "-o", type=click.File(mode="w"), default="-")
+@click.option(
+    "--tsv",
+    is_flag=True,
+    default=False,
+    help=("Results that are output to stdout will be formatted as tab-separated, "
+          "identical to the formatting used when writing to file."
+          "By default, the output table will be formatted for human-readability."
+    ),
+)
 @click.option(
-    '--allow-partial', is_flag=True, default=False,
+    "--allow-partial",
+    is_flag=True,
+    default=False,
     help=(
         "Do not raise errors if results are missing parts for some edges. "
         "(Skip those edges and issue warning instead.)"
-    )
+    ),
 )
 def gather(results:List[os.PathLike|str],
            output:os.PathLike|str,
            report:Literal['dg','ddg','raw'],
+           tsv:bool,
            allow_partial:bool
            ):
     """Gather simulation result JSON files of relative calculations to a tsv file.
@@ -622,11 +654,16 @@ def gather(results:List[os.PathLike|str],
     df = report_func(legs, allow_partial)
 
     # write output
-    if isinstance(output, click.utils.LazyFile):
+    is_output_file = isinstance(output, click.utils.LazyFile)
+    if is_output_file:
         click.echo(f"writing {report} output to '{output.name}'")
+    if is_output_file or tsv:
+        df.to_csv(output, sep="\t", lineterminator="\n", index=False)
+
+    # TODO: we can add a --pretty flag if we want this to be optional/preserve backwards compatibility
+    else:
+        rich_print_to_stdout(df)
 
-    # TODO: we can use rich to make this output prettier
-    df.to_csv(output, sep="\t", lineterminator='\n', index=False)
 
 PLUGIN = OFECommandPlugin(
     command=gather,

diff --git a/openfecli/tests/commands/test_gather.py b/openfecli/tests/commands/test_gather.py
@@ -129,7 +129,6 @@ def test_get_legs_from_result_jsons(self, capsys, sim_result):
 
 def test_no_results_found():
     runner = CliRunner(mix_stderr=False)
-
     cli_result = runner.invoke(gather, "not_a_file.txt")
     assert cli_result.exit_code == 1
     assert "No results JSON files found" in str(cli_result.stderr)
@@ -241,7 +240,7 @@ def test_cmet_full_results(self, cmet_result_dir, report, file_regression):
         results = [str(cmet_result_dir / f'results_{i}') for i in range(3)]
         args = ["--report", report]
         runner = CliRunner(mix_stderr=False)
-        cli_result = runner.invoke(gather, results + args + ['-o', '-'])
+        cli_result = runner.invoke(gather, results + args + ['--tsv'])
 
         assert_click_success(cli_result)
         file_regression.check(cli_result.output, extension='.tsv')
@@ -253,7 +252,7 @@ def test_cmet_missing_complex_leg(self, cmet_result_dir, report, file_regression
         results = [str(cmet_result_dir / d) for d in ['results_0_partial', 'results_1', "results_2"]]
         args = ["--report", report]
         runner = CliRunner(mix_stderr=False)
-        cli_result = runner.invoke(gather, results + args + ['-o', '-'])
+        cli_result = runner.invoke(gather, results + args + ['--tsv'])
 
         assert_click_success(cli_result)
         file_regression.check(cli_result.output, extension='.tsv')
@@ -263,7 +262,7 @@ def test_cmet_missing_edge(self, cmet_result_dir, report,file_regression):
         results = [str(cmet_result_dir / f'results_{i}_remove_edge') for i in range(3)]
         args = ["--report", report]
         runner = CliRunner(mix_stderr=False)
-        cli_result = runner.invoke(gather, results + args + ['-o', '-'])
+        cli_result = runner.invoke(gather, results + args + ['--tsv'])
         file_regression.check(cli_result.output, extension='.tsv')
 
         assert_click_success(cli_result)
@@ -274,7 +273,7 @@ def test_cmet_failed_edge(self, cmet_result_dir, report, file_regression):
         results = [str(cmet_result_dir / f'results_{i}_failed_edge') for i in range(3)]
         args = ["--report", report]
         runner = CliRunner(mix_stderr=False)
-        cli_result = runner.invoke(gather, results + args)
+        cli_result = runner.invoke(gather, results + args + ['--tsv'])
 
         assert_click_success(cli_result)
         file_regression.check(cli_result.output, extension=".tsv")
@@ -287,7 +286,7 @@ def test_cmet_too_few_edges_error(self, cmet_result_dir, allow_partial):
         if allow_partial:
             args += ['--allow-partial']
 
-        cli_result = runner.invoke(gather, results + args)
+        cli_result = runner.invoke(gather, results + args + ['--tsv'])
         assert cli_result.exit_code == 1
         assert (
             "The results network has 1 edge(s), but 3 or more edges are required"
@@ -311,11 +310,32 @@ def test_cmet_missing_all_complex_legs_allow_partial(self, cmet_result_dir, repo
         results = glob.glob(f"{cmet_result_dir}/results_*/*solvent*", recursive=True)
         args = ["--report", report, "--allow-partial"]
         runner = CliRunner(mix_stderr=False)
-        cli_result = runner.invoke(gather, results + args + ['-o', '-'])
+        cli_result = runner.invoke(gather, results + args + ['--tsv'])
 
         assert_click_success(cli_result)
         file_regression.check(cli_result.output, extension='.tsv')
 
+    @pytest.mark.parametrize('report', ["dg", "ddg", "raw"])
+    def test_pretty_print(self, cmet_result_dir, report, file_regression):
+        results = [str(cmet_result_dir / f'results_{i}') for i in range(3)]
+        args = ["--report", report]
+        runner = CliRunner(mix_stderr=False)
+        cli_result = runner.invoke(gather, results + args)
+        assert_click_success(cli_result)
+        # TODO: figure out how to mock terminal size, since it affects the table wrapping
+        # file_regression.check(cli_result.output, extension='.txt')
+
+    def test_write_to_file(self, cmet_result_dir):
+        runner = CliRunner(mix_stderr=False)
+        with runner.isolated_filesystem():
+            results = [str(cmet_result_dir / f'results_{i}') for i in range(3)]
+            fname = "output.tsv"
+            args = ["--report", "raw", "-o", fname]
+            cli_result = runner.invoke(gather, results + args)
+            assert "writing raw output to 'output.tsv'" in cli_result.output
+            assert pathlib.Path(fname).is_file()
+
+
 @pytest.mark.skipif(not os.path.exists(POOCH_CACHE) and not HAS_INTERNET,reason="Internet seems to be unavailable and test data is not cached locally.")
 @pytest.mark.parametrize('dataset', ['rbfe_results_serial_repeats', 'rbfe_results_parallel_repeats'])
 @pytest.mark.parametrize('report', ["", "dg", "ddg", "raw"])
@@ -342,7 +362,7 @@ def test_rbfe_gather(rbfe_result_dir, dataset, report, input_mode):
         results = glob.glob(f"{results}/*", recursive=True)
         assert len(results) > 1  # sanity check to make sure we're passing in multiple paths
 
-    cli_result = runner.invoke(gather, results + args + ['-o', '-'])
+    cli_result = runner.invoke(gather, results + args + ['--tsv'])
 
     assert_click_success(cli_result)
 
@@ -355,7 +375,7 @@ def test_rbfe_gather_single_repeats_dg_error(rbfe_result_dir):
     runner = CliRunner(mix_stderr=False)
     results = rbfe_result_dir("rbfe_results_parallel_repeats")
     args = ['report','dg']
-    cli_result = runner.invoke(gather, [f"{results}/replicate_0"] + args)
+    cli_result = runner.invoke(gather, [f"{results}/replicate_0"] + args + ['--tsv'])
     assert cli_result.exit_code == 1
 
 @pytest.mark.skipif(not os.path.exists(POOCH_CACHE) and not HAS_INTERNET,reason="Internet seems to be unavailable and test data is not cached locally.")
@@ -388,7 +408,7 @@ def test_missing_leg_allow_partial_disconnected(self, results_paths_serial_missi
         runner = CliRunner(mix_stderr=False)
         with pytest.warns():
             args =  ["--report", "dg", "--allow-partial"]
-            result = runner.invoke(gather, results_paths_serial_missing_legs + args)
+            result = runner.invoke(gather, results_paths_serial_missing_legs + args + ['--tsv'])
             assert result.exit_code == 1
             assert "The results network is disconnected" in str(result.stderr)
 
@@ -398,5 +418,5 @@ def test_missing_leg_allow_partial_(self, results_paths_serial_missing_legs: str
         # we *dont* want the suggestion to use --allow-partial if the user already used it!
         with pytest.warns(match='[^using the \-\-allow\-partial]'):
             args =  ["--report", "ddg", "--allow-partial"]
-            result = runner.invoke(gather, results_paths_serial_missing_legs + args)
-            assert_click_success(result)
+            result = runner.invoke(gather, results_paths_serial_missing_legs + args + ['--tsv'])
+            assert_click_success(result)
diff --git a/openfecli/tests/test_rbfe_tutorial.py b/openfecli/tests/test_rbfe_tutorial.py
@@ -118,7 +118,7 @@ def test_run_tyk2(tyk2_ligands, tyk2_protein, expected_transformations,
             result2 = runner.invoke(quickrun, [fn])
             assert_click_success(result2)
 
-        gather_result = runner.invoke(gather, ["--report", "ddg", '.'])
+        gather_result = runner.invoke(gather, ["--report", "ddg", '.', '--tsv'])
 
         assert_click_success(gather_result)
         assert gather_result.stdout == ref_gather