From 04d7f6c58f7834459f7a4c68928e98914c85d85f Mon Sep 17 00:00:00 2001 From: Marais Rossouw Date: Sat, 7 Jun 2025 17:14:44 +1000 Subject: [PATCH] feat: when evals fail, exit(1) --- cmd/eval/eval.go | 17 ++++++++++++++--- cmd/eval/eval_test.go | 6 +++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/cmd/eval/eval.go b/cmd/eval/eval.go index 7374ba69..6d91f860 100644 --- a/cmd/eval/eval.go +++ b/cmd/eval/eval.go @@ -48,6 +48,8 @@ type EvaluationResult struct { Details string `json:"details,omitempty"` } +var FailedTests = errors.New("❌ Some tests failed.") + // NewEvalCommand returns a new command to evaluate prompts against models func NewEvalCommand(cfg *command.Config) *cobra.Command { cmd := &cobra.Command{ @@ -106,7 +108,14 @@ func NewEvalCommand(cfg *command.Config) *cobra.Command { jsonOutput: jsonOutput, } - return handler.runEvaluation(cmd.Context()) + err = handler.runEvaluation(cmd.Context()) + if err == FailedTests { + // Cobra by default will show the help message when an error occurs, + // which is not what we want for failed evaluations. + // Instead, we just want to exit with a non-zero code. + cmd.SilenceUsage = true + } + return err }, } @@ -206,6 +215,10 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error { h.printSummary(passedTests, totalTests, passRate) } + if totalTests-passedTests > 0 { + return FailedTests + } + return nil } @@ -249,8 +262,6 @@ func (h *evalCommandHandler) printSummary(passedTests, totalTests int, passRate if passedTests == totalTests { h.cfg.WriteToOut("🎉 All tests passed!\n") - } else { - h.cfg.WriteToOut("❌ Some tests failed.\n") } } diff --git a/cmd/eval/eval_test.go b/cmd/eval/eval_test.go index ed831705..33a2014a 100644 --- a/cmd/eval/eval_test.go +++ b/cmd/eval/eval_test.go @@ -291,7 +291,7 @@ evaluators: cmd.SetArgs([]string{promptFile}) err = cmd.Execute() - require.NoError(t, err) + require.ErrorIs(t, err, FailedTests) output := out.String() require.Contains(t, output, "Failing Test") @@ -361,7 +361,7 @@ evaluators: cmd.SetArgs([]string{"--json", promptFile}) err = cmd.Execute() - require.NoError(t, err) + require.ErrorIs(t, err, FailedTests) output := out.String() @@ -534,7 +534,7 @@ evaluators: cmd.SetArgs([]string{"--json", promptFile}) err = cmd.Execute() - require.NoError(t, err) + require.ErrorIs(t, err, FailedTests) output := out.String()