From 0c4b26f2465f7172a30dfc9ef0a6a8692b8959a7 Mon Sep 17 00:00:00 2001 From: Yuzuki <36879321+Yuzuki-S@users.noreply.github.com> Date: Tue, 22 Jul 2025 04:30:13 +0000 Subject: [PATCH 1/4] Add table --- cmd/eval/eval.go | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/cmd/eval/eval.go b/cmd/eval/eval.go index 902ca4ca..dc6e3c63 100644 --- a/cmd/eval/eval.go +++ b/cmd/eval/eval.go @@ -10,13 +10,21 @@ import ( "time" "github.com/MakeNowJust/heredoc" + "github.com/cli/go-gh/v2/pkg/tableprinter" "github.com/github/gh-models/internal/azuremodels" "github.com/github/gh-models/pkg/command" "github.com/github/gh-models/pkg/prompt" "github.com/github/gh-models/pkg/util" + "github.com/mgutz/ansi" "github.com/spf13/cobra" ) +var ( + lightGrayUnderline = ansi.ColorFunc("white+du") + red = ansi.ColorFunc("red") + green = ansi.ColorFunc("green") +) + // EvaluationSummary represents the overall evaluation summary type EvaluationSummary struct { Name string `json:"name"` @@ -167,6 +175,7 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error { for i, testCase := range h.evalFile.TestData { if !h.jsonOutput { + h.cfg.WriteToOut("-------------------------\n") h.cfg.WriteToOut(fmt.Sprintf("Running test case %d/%d...\n", i+1, totalTests)) } @@ -235,30 +244,46 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error { } func (h *evalCommandHandler) printTestResult(result TestResult, testPassed bool) { + printer := h.cfg.NewTablePrinter() if testPassed { - h.cfg.WriteToOut(" ✓ PASSED\n") + printer.AddField("Result", tableprinter.WithColor(lightGrayUnderline)) + printer.AddField("✓ PASSED", tableprinter.WithColor(green)) + printer.EndRow() } else { - h.cfg.WriteToOut(" ✗ FAILED\n") + printer.AddField("Result", tableprinter.WithColor(lightGrayUnderline)) + printer.AddField("✗ FAILED", tableprinter.WithColor(red)) + printer.EndRow() // Show the first 100 characters of the model response when test fails preview := result.ModelResponse if len(preview) > 100 { preview = preview[:100] + "..." } - h.cfg.WriteToOut(fmt.Sprintf(" Model Response: %s\n", preview)) + + printer.AddField("Model Response", tableprinter.WithColor(lightGrayUnderline)) + printer.AddField(preview) + printer.EndRow() } + printer.Render() + h.cfg.WriteToOut("\n") + table := h.cfg.NewTablePrinter() + table.AddHeader([]string{"EVALUATION", "RESULT", "SCORE", "CRITERIA"}, tableprinter.WithColor(lightGrayUnderline)) // Show evaluation details for _, evalResult := range result.EvaluationResults { - status := "✓" + status, color := "✓", green if !evalResult.Passed { - status = "✗" + status, color = "✗", red } - h.cfg.WriteToOut(fmt.Sprintf(" %s %s (score: %.2f)\n", - status, evalResult.EvaluatorName, evalResult.Score)) + table.AddField(evalResult.EvaluatorName) + table.AddField(status, tableprinter.WithColor(color)) + table.AddField(fmt.Sprintf("%.2f", evalResult.Score), tableprinter.WithColor(color)) + if evalResult.Details != "" { - h.cfg.WriteToOut(fmt.Sprintf(" %s\n", evalResult.Details)) + table.AddField(evalResult.Details) } + table.EndRow() } + table.Render() h.cfg.WriteToOut("\n") } From 57c00ee427300423c5abe39d64b5f3d6403d9c68 Mon Sep 17 00:00:00 2001 From: Yuzuki <36879321+Yuzuki-S@users.noreply.github.com> Date: Tue, 22 Jul 2025 04:44:33 +0000 Subject: [PATCH 2/4] Add error check --- cmd/eval/eval.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cmd/eval/eval.go b/cmd/eval/eval.go index dc6e3c63..6d718ed9 100644 --- a/cmd/eval/eval.go +++ b/cmd/eval/eval.go @@ -263,7 +263,12 @@ func (h *evalCommandHandler) printTestResult(result TestResult, testPassed bool) printer.AddField(preview) printer.EndRow() } - printer.Render() + + err := printer.Render() + if err != nil { + return + } + h.cfg.WriteToOut("\n") table := h.cfg.NewTablePrinter() @@ -283,7 +288,12 @@ func (h *evalCommandHandler) printTestResult(result TestResult, testPassed bool) } table.EndRow() } - table.Render() + + err = table.Render() + if err != nil { + return + } + h.cfg.WriteToOut("\n") } From e53d61368608336821cb0fc2e7b753412fdf856d Mon Sep 17 00:00:00 2001 From: Yuzuki <36879321+Yuzuki-S@users.noreply.github.com> Date: Tue, 22 Jul 2025 04:59:14 +0000 Subject: [PATCH 3/4] Update test --- cmd/eval/eval_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/eval/eval_test.go b/cmd/eval/eval_test.go index 90228766..59fc128f 100644 --- a/cmd/eval/eval_test.go +++ b/cmd/eval/eval_test.go @@ -312,7 +312,8 @@ evaluators: require.Contains(t, output, "Failing Test") require.Contains(t, output, "Running test case") require.Contains(t, output, "FAILED") - require.Contains(t, output, "Model Response: actual model response") + require.Contains(t, output, "Model Response") + require.Contains(t, output, "actual model response") }) t.Run("json output format", func(t *testing.T) { From 347c6de5b0088520df2393df3f52c62b3964afc9 Mon Sep 17 00:00:00 2001 From: Yuzuki <36879321+Yuzuki-S@users.noreply.github.com> Date: Tue, 22 Jul 2025 15:28:02 +1000 Subject: [PATCH 4/4] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- cmd/eval/eval.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/eval/eval.go b/cmd/eval/eval.go index 6d718ed9..566bd0df 100644 --- a/cmd/eval/eval.go +++ b/cmd/eval/eval.go @@ -285,6 +285,8 @@ func (h *evalCommandHandler) printTestResult(result TestResult, testPassed bool) if evalResult.Details != "" { table.AddField(evalResult.Details) + } else { + table.AddField("") } table.EndRow() }