Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions cmd/eval/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type EvaluationResult struct {
Details string `json:"details,omitempty"`
}

var FailedTests = errors.New("❌ Some tests failed.")

// NewEvalCommand returns a new command to evaluate prompts against models
func NewEvalCommand(cfg *command.Config) *cobra.Command {
cmd := &cobra.Command{
Expand Down Expand Up @@ -106,7 +108,14 @@ func NewEvalCommand(cfg *command.Config) *cobra.Command {
jsonOutput: jsonOutput,
}

return handler.runEvaluation(cmd.Context())
err = handler.runEvaluation(cmd.Context())
if err == FailedTests {
// Cobra by default will show the help message when an error occurs,
// which is not what we want for failed evaluations.
// Instead, we just want to exit with a non-zero code.
cmd.SilenceUsage = true
}
return err
},
}

Expand Down Expand Up @@ -206,6 +215,10 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error {
h.printSummary(passedTests, totalTests, passRate)
}

if totalTests-passedTests > 0 {
return FailedTests
}

return nil
}

Expand Down Expand Up @@ -249,8 +262,6 @@ func (h *evalCommandHandler) printSummary(passedTests, totalTests int, passRate

if passedTests == totalTests {
h.cfg.WriteToOut("🎉 All tests passed!\n")
} else {
h.cfg.WriteToOut("❌ Some tests failed.\n")
}
}

Expand Down
6 changes: 3 additions & 3 deletions cmd/eval/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ evaluators:
cmd.SetArgs([]string{promptFile})

err = cmd.Execute()
require.NoError(t, err)
require.ErrorIs(t, err, FailedTests)

output := out.String()
require.Contains(t, output, "Failing Test")
Expand Down Expand Up @@ -361,7 +361,7 @@ evaluators:
cmd.SetArgs([]string{"--json", promptFile})

err = cmd.Execute()
require.NoError(t, err)
require.ErrorIs(t, err, FailedTests)

output := out.String()

Expand Down Expand Up @@ -534,7 +534,7 @@ evaluators:
cmd.SetArgs([]string{"--json", promptFile})

err = cmd.Execute()
require.NoError(t, err)
require.ErrorIs(t, err, FailedTests)

output := out.String()

Expand Down
Loading