diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb
index 8d5732c2..487d57a8 100644
--- a/lib/benchmark_runner.rb
+++ b/lib/benchmark_runner.rb
@@ -79,16 +79,20 @@ def render_graph(json_path)
     end
 
     # Checked system - error or return info if the command fails
-    def check_call(command, env: {}, raise_error: true, quiet: false)
+    def check_call(command, env: {}, raise_error: true, quiet: ENV['BENCHMARK_QUIET'] == '1')
       puts("+ #{command}") unless quiet
 
       result = {}
 
-      result[:success] = system(env, command)
+      if quiet
+        result[:success] = system(env, command, out: File::NULL, err: File::NULL)
+      else
+        result[:success] = system(env, command)
+      end
       result[:status] = $?
 
       unless result[:success]
-        puts "Command #{command.inspect} failed with exit code #{result[:status].exitstatus} in directory #{Dir.pwd}"
+        puts "Command #{command.inspect} failed with exit code #{result[:status].exitstatus} in directory #{Dir.pwd}" unless quiet
         raise RuntimeError.new if raise_error
       end
 
diff --git a/lib/benchmark_runner/cli.rb b/lib/benchmark_runner/cli.rb
new file mode 100644
index 00000000..c5343fd8
--- /dev/null
+++ b/lib/benchmark_runner/cli.rb
@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+
+require 'fileutils'
+require_relative '../argument_parser'
+require_relative '../cpu_config'
+require_relative '../benchmark_runner'
+require_relative '../benchmark_suite'
+require_relative '../results_table_builder'
+
+module BenchmarkRunner
+  class CLI
+    attr_reader :args
+
+    def self.run(argv = ARGV)
+      args = ArgumentParser.parse(argv)
+      new(args).run
+    end
+
+    def initialize(args)
+      @args = args
+    end
+
+    def run
+      CPUConfig.configure_for_benchmarking(turbo: args.turbo)
+
+      # Create the output directory
+      FileUtils.mkdir_p(args.out_path)
+
+      ruby_descriptions = {}
+
+      # Benchmark with and without YJIT
+      bench_start_time = Time.now.to_f
+      bench_data = {}
+      bench_failures = {}
+      args.executables.each do |name, executable|
+        ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
+
+        suite = BenchmarkSuite.new(
+          ruby: executable,
+          ruby_description: ruby_descriptions[name],
+          categories: args.categories,
+          name_filters: args.name_filters,
+          out_path: args.out_path,
+          harness: args.harness,
+          pre_init: args.with_pre_init,
+          no_pinning: args.no_pinning
+        )
+        bench_data[name], failures = suite.run
+        # Make it easier to query later.
+        bench_failures[name] = failures unless failures.empty?
+      end
+
+      bench_end_time = Time.now.to_f
+      bench_total_time = (bench_end_time - bench_start_time).to_i
+      puts("Total time spent benchmarking: #{bench_total_time}s")
+
+      if !bench_failures.empty?
+        puts("Failed benchmarks: #{bench_failures.map { |k, v| v.size }.sum}")
+      end
+
+      puts
+
+      # Build results table
+      builder = ResultsTableBuilder.new(
+        executable_names: ruby_descriptions.keys,
+        bench_data: bench_data,
+        include_rss: args.rss
+      )
+      table, format = builder.build
+
+      output_path = BenchmarkRunner.output_path(args.out_path, out_override: args.out_override)
+
+      # Save the raw data as JSON
+      out_json_path = BenchmarkRunner.write_json(output_path, ruby_descriptions, bench_data)
+
+      # Save data as CSV so we can produce tables/graphs in a spreasheet program
+      # NOTE: we don't do any number formatting for the output file because
+      #       we don't want to lose any precision
+      BenchmarkRunner.write_csv(output_path, ruby_descriptions, table)
+
+      # Save the output in a text file that we can easily refer to
+      output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures)
+      out_txt_path = output_path + ".txt"
+      File.open(out_txt_path, "w") { |f| f.write output_str }
+
+      # Print the table to the console, with numbers truncated
+      puts(output_str)
+
+      # Print JSON and PNG file names
+      puts
+      puts "Output:"
+      puts out_json_path
+
+      if args.graph
+        puts BenchmarkRunner.render_graph(out_json_path)
+      end
+
+      if !bench_failures.empty?
+        puts "\nFailed benchmarks:"
+        bench_failures.each do |name, data|
+          puts "  #{name}: #{data.keys.join(", ")}"
+        end
+        exit(1)
+      end
+    end
+  end
+end
diff --git a/run_benchmarks.rb b/run_benchmarks.rb
index b78e4402..4433905a 100755
--- a/run_benchmarks.rb
+++ b/run_benchmarks.rb
@@ -1,99 +1,5 @@
 #!/usr/bin/env ruby
 
-require 'pathname'
-require 'fileutils'
-require 'csv'
-require 'json'
-require 'shellwords'
-require 'rbconfig'
-require 'etc'
-require 'yaml'
-require_relative 'lib/cpu_config'
-require_relative 'lib/benchmark_runner'
-require_relative 'lib/benchmark_suite'
-require_relative 'lib/argument_parser'
-require_relative 'lib/results_table_builder'
+require_relative 'lib/benchmark_runner/cli'
 
-args = ArgumentParser.parse(ARGV)
-
-CPUConfig.configure_for_benchmarking(turbo: args.turbo)
-
-# Create the output directory
-FileUtils.mkdir_p(args.out_path)
-
-ruby_descriptions = {}
-
-# Benchmark with and without YJIT
-bench_start_time = Time.now.to_f
-bench_data = {}
-bench_failures = {}
-args.executables.each do |name, executable|
-  ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
-
-  suite = BenchmarkSuite.new(
-    ruby: executable,
-    ruby_description: ruby_descriptions[name],
-    categories: args.categories,
-    name_filters: args.name_filters,
-    out_path: args.out_path,
-    harness: args.harness,
-    pre_init: args.with_pre_init,
-    no_pinning: args.no_pinning
-  )
-  bench_data[name], failures = suite.run
-  # Make it easier to query later.
-  bench_failures[name] = failures unless failures.empty?
-end
-
-bench_end_time = Time.now.to_f
-bench_total_time = (bench_end_time - bench_start_time).to_i
-puts("Total time spent benchmarking: #{bench_total_time}s")
-
-if !bench_failures.empty?
-  puts("Failed benchmarks: #{bench_failures.map { |k, v| v.size }.sum}")
-end
-
-puts
-
-# Build results table
-builder = ResultsTableBuilder.new(
-  executable_names: ruby_descriptions.keys,
-  bench_data: bench_data,
-  include_rss: args.rss
-)
-table, format = builder.build
-
-output_path = BenchmarkRunner.output_path(args.out_path, out_override: args.out_override)
-
-# Save the raw data as JSON
-out_json_path = BenchmarkRunner.write_json(output_path, ruby_descriptions, bench_data)
-
-# Save data as CSV so we can produce tables/graphs in a spreasheet program
-# NOTE: we don't do any number formatting for the output file because
-#       we don't want to lose any precision
-BenchmarkRunner.write_csv(output_path, ruby_descriptions, table)
-
-# Save the output in a text file that we can easily refer to
-output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures)
-out_txt_path = output_path + ".txt"
-File.open(out_txt_path, "w") { |f| f.write output_str }
-
-# Print the table to the console, with numbers truncated
-puts(output_str)
-
-# Print JSON and PNG file names
-puts
-puts "Output:"
-puts out_json_path
-
-if args.graph
-  puts BenchmarkRunner.render_graph(out_json_path)
-end
-
-if !bench_failures.empty?
-  puts "\nFailed benchmarks:"
-  bench_failures.each do |name, data|
-    puts "  #{name}: #{data.keys.join(", ")}"
-  end
-  exit(1)
-end
+BenchmarkRunner::CLI.run(ARGV)
diff --git a/test/benchmark_runner_cli_test.rb b/test/benchmark_runner_cli_test.rb
new file mode 100644
index 00000000..4b155494
--- /dev/null
+++ b/test/benchmark_runner_cli_test.rb
@@ -0,0 +1,336 @@
+require_relative 'test_helper'
+require_relative '../lib/benchmark_runner/cli'
+require_relative '../lib/argument_parser'
+require 'tmpdir'
+require 'fileutils'
+require 'json'
+require 'csv'
+
+describe BenchmarkRunner::CLI do
+  before do
+    @original_env = {}
+    ['WARMUP_ITRS', 'MIN_BENCH_ITRS', 'MIN_BENCH_TIME', 'BENCHMARK_QUIET'].each do |key|
+      @original_env[key] = ENV[key]
+    end
+
+    # Set fast iteration counts for tests
+    ENV['WARMUP_ITRS'] = '0'
+    ENV['MIN_BENCH_ITRS'] = '1'
+    ENV['MIN_BENCH_TIME'] = '0'
+    # Suppress benchmark output during tests
+    ENV['BENCHMARK_QUIET'] = '1'
+  end
+
+  after do
+    @original_env.each do |key, value|
+      if value.nil?
+        ENV.delete(key)
+      else
+        ENV[key] = value
+      end
+    end
+  end
+
+  # Helper method to create args directly without parsing
+  def create_args(overrides = {})
+    defaults = {
+      executables: { 'interp' => [RbConfig.ruby], 'yjit' => [RbConfig.ruby, '--yjit'] },
+      out_path: nil,
+      out_override: nil,
+      harness: 'harness',
+      yjit_opts: '',
+      categories: [],
+      name_filters: [],
+      rss: false,
+      graph: false,
+      no_pinning: true,
+      turbo: true,
+      skip_yjit: false,
+      with_pre_init: nil
+    }
+    ArgumentParser::Args.new(**defaults.merge(overrides))
+  end
+
+  describe '.run class method' do
+    it 'parses ARGV and runs the CLI end-to-end' do
+      Dir.mktmpdir do |tmpdir|
+        # Test the full integration: argv array -> parse -> initialize -> run
+        output = capture_io do
+          BenchmarkRunner::CLI.run([
+            '--name_filters=fib',
+            '--out_path=' + tmpdir,
+            '--once',
+            '--no-pinning',
+            '--turbo'
+          ])
+        end.join
+
+        # Verify output contains expected information
+        assert_match(/fib/, output, "Output should mention the fib benchmark")
+        assert_match(/Total time spent benchmarking:/, output)
+        assert_match(/Output:/, output)
+
+        # Verify output files were created
+        json_files = Dir.glob(File.join(tmpdir, "output_*.json"))
+        assert_equal 1, json_files.size, "Should create exactly one JSON output file"
+      end
+    end
+  end
+
+  describe '#run integration test' do
+    it 'runs a simple benchmark end-to-end and produces all output files' do
+      Dir.mktmpdir do |tmpdir|
+        args = create_args(
+          name_filters: ['fib'],
+          out_path: tmpdir
+        )
+
+        # Run the CLI
+        cli = BenchmarkRunner::CLI.new(args)
+
+        # Capture output and run - should not raise errors
+        output = capture_io do
+          cli.run
+        end.join
+
+        # Verify output contains expected information
+        assert_match(/fib/, output, "Output should mention the fib benchmark")
+        assert_match(/Total time spent benchmarking:/, output)
+        assert_match(/Output:/, output)
+
+        # Verify JSON output file was created
+        json_files = Dir.glob(File.join(tmpdir, "output_*.json"))
+        assert_equal 1, json_files.size, "Should create exactly one JSON output file"
+
+        json_path = json_files.first
+        assert File.exist?(json_path), "JSON file should exist"
+
+        # Verify JSON content is valid and contains expected data
+        json_data = JSON.parse(File.read(json_path))
+        assert json_data.key?('metadata'), "JSON should contain metadata"
+        assert json_data.key?('raw_data'), "JSON should contain raw_data"
+
+        # Verify CSV output file was created
+        csv_path = json_path.sub('.json', '.csv')
+        assert File.exist?(csv_path), "CSV file should exist"
+
+        # Verify CSV content
+        csv_data = CSV.read(csv_path)
+        assert csv_data.size > 0, "CSV should have content"
+
+        # Verify TXT output file was created
+        txt_path = json_path.sub('.json', '.txt')
+        assert File.exist?(txt_path), "TXT file should exist"
+
+        # Verify TXT content
+        txt_content = File.read(txt_path)
+        assert_match(/fib/, txt_content, "TXT should contain benchmark results")
+      end
+    end
+
+    it 'handles multiple benchmarks with name filters' do
+      Dir.mktmpdir do |tmpdir|
+        args = create_args(
+          name_filters: ['fib', 'respond_to'],
+          out_path: tmpdir
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+        output = capture_io { cli.run }.join
+
+        # Check both benchmarks ran
+        assert_match(/fib/, output)
+        assert_match(/respond_to/, output)
+
+        # Check output files were created
+        json_files = Dir.glob(File.join(tmpdir, "*.json"))
+        assert_equal 1, json_files.size
+
+        json_data = JSON.parse(File.read(json_files.first))
+        raw_data = json_data['raw_data']
+
+        # Verify data contains results for both benchmarks
+        assert raw_data.values.any? { |data| data.key?('fib') }
+        assert raw_data.values.any? { |data| data.key?('respond_to') }
+      end
+    end
+
+    it 'respects output path override' do
+      Dir.mktmpdir do |tmpdir|
+        custom_name = File.join(tmpdir, 'custom_output')
+
+        args = create_args(
+          name_filters: ['fib'],
+          out_path: tmpdir,
+          out_override: custom_name
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+        capture_io { cli.run }
+
+        # Check that custom-named files were created
+        assert File.exist?(custom_name + '.json'), "Custom JSON file should exist"
+        assert File.exist?(custom_name + '.csv'), "Custom CSV file should exist"
+        assert File.exist?(custom_name + '.txt'), "Custom TXT file should exist"
+      end
+    end
+
+    it 'compares different ruby executables' do
+      skip "Requires actual ruby installations" unless ENV['RUN_INTEGRATION_TESTS']
+
+      Dir.mktmpdir do |tmpdir|
+        ruby_path = RbConfig.ruby
+
+        args = create_args(
+          executables: { 'test1' => [ruby_path], 'test2' => [ruby_path] },
+          name_filters: ['fib'],
+          out_path: tmpdir
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+        output = capture_io { cli.run }.join
+
+        # Should show comparison between two executables
+        assert_match(/test1/, output)
+        assert_match(/test2/, output)
+
+        json_files = Dir.glob(File.join(tmpdir, "*.json"))
+        json_data = JSON.parse(File.read(json_files.first))
+
+        # Both executables should be in metadata
+        assert json_data['metadata'].key?('test1')
+        assert json_data['metadata'].key?('test2')
+
+        # Both should have raw data
+        assert json_data['raw_data'].key?('test1')
+        assert json_data['raw_data'].key?('test2')
+      end
+    end
+
+    it 'handles benchmark with category filter' do
+      Dir.mktmpdir do |tmpdir|
+        args = create_args(
+          categories: ['micro'],
+          name_filters: ['fib'],
+          out_path: tmpdir
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+        output = capture_io { cli.run }.join
+
+        # Should run successfully
+        assert_match(/Total time spent benchmarking:/, output)
+
+        # Output files should exist
+        json_files = Dir.glob(File.join(tmpdir, "*.json"))
+        assert_equal 1, json_files.size
+      end
+    end
+
+    it 'creates sequential output files when no override specified' do
+      Dir.mktmpdir do |tmpdir|
+        # Run first benchmark
+        args1 = create_args(
+          name_filters: ['fib'],
+          out_path: tmpdir
+        )
+        cli1 = BenchmarkRunner::CLI.new(args1)
+        capture_io { cli1.run }
+
+        # Run second benchmark
+        args2 = create_args(
+          name_filters: ['respond_to'],
+          out_path: tmpdir
+        )
+        cli2 = BenchmarkRunner::CLI.new(args2)
+        capture_io { cli2.run }
+
+        # Should have two sets of output files
+        json_files = Dir.glob(File.join(tmpdir, "output_*.json")).sort
+        assert_equal 2, json_files.size
+        assert_match(/output_001\.json$/, json_files[0])
+        assert_match(/output_002\.json$/, json_files[1])
+      end
+    end
+
+    it 'includes RSS data when --rss flag is set' do
+      Dir.mktmpdir do |tmpdir|
+        args = create_args(
+          name_filters: ['fib'],
+          out_path: tmpdir,
+          rss: true
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+        capture_io { cli.run }
+
+        # Output should reference RSS
+        txt_files = Dir.glob(File.join(tmpdir, "*.txt"))
+        txt_content = File.read(txt_files.first)
+        assert_match(/RSS/, txt_content, "Output should include RSS information")
+      end
+    end
+
+    it 'handles no matching benchmarks gracefully' do
+      Dir.mktmpdir do |tmpdir|
+        args = create_args(
+          name_filters: ['nonexistent_benchmark_xyz'],
+          out_path: tmpdir
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+
+        # Should run without error but produce empty results
+        capture_io { cli.run }
+
+        # Should still create output files
+        json_files = Dir.glob(File.join(tmpdir, "*.json"))
+        assert_equal 1, json_files.size
+      end
+    end
+
+    it 'can be instantiated and have args accessed' do
+      args = create_args(name_filters: ['fib'])
+      cli = BenchmarkRunner::CLI.new(args)
+
+      assert_equal args, cli.args
+      assert_equal ['fib'], cli.args.name_filters
+    end
+
+    it 'prints benchmark timing information' do
+      Dir.mktmpdir do |tmpdir|
+        args = create_args(
+          name_filters: ['fib'],
+          out_path: tmpdir
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+        output = capture_io { cli.run }.join
+
+        # Should show timing
+        assert_match(/Total time spent benchmarking: \d+s/, output)
+      end
+    end
+
+    it 'creates output directory if it does not exist' do
+      Dir.mktmpdir do |parent_tmpdir|
+        nested_dir = File.join(parent_tmpdir, 'nested', 'output', 'dir')
+        refute Dir.exist?(nested_dir), "Directory should not exist yet"
+
+        args = create_args(
+          name_filters: ['fib'],
+          out_path: nested_dir
+        )
+
+        cli = BenchmarkRunner::CLI.new(args)
+        capture_io { cli.run }
+
+        assert Dir.exist?(nested_dir), "Directory should be created"
+
+        # Verify files were created in the new directory
+        json_files = Dir.glob(File.join(nested_dir, "*.json"))
+        assert_equal 1, json_files.size
+      end
+    end
+  end
+end
diff --git a/test/run_benchmarks_integration_test.rb b/test/run_benchmarks_integration_test.rb
index f6bbaa7b..9cf5a888 100644
--- a/test/run_benchmarks_integration_test.rb
+++ b/test/run_benchmarks_integration_test.rb
@@ -3,51 +3,39 @@
 require 'tmpdir'
 require 'fileutils'
 
+# Tests for run_benchmarks.rb script integration
+# This complements benchmark_runner_cli_test.rb by testing:
+# - The script itself as a subprocess
+# - Script structure and permissions
+# - Benchmark metadata validation
 describe 'run_benchmarks.rb integration' do
   before do
     @script_path = File.expand_path('../run_benchmarks.rb', __dir__)
     @ruby_path = RbConfig.ruby
+    @original_env = ENV['BENCHMARK_QUIET']
   end
 
-  describe 'command-line parsing' do
-    it 'shows help with --help flag' do
-      skip 'Skipping integration test - requires full setup'
-    end
-
-    it 'handles --once flag' do
-      Dir.mktmpdir do |tmpdir|
-        # This would set ENV["WARMUP_ITRS"] = "0" and ENV["MIN_BENCH_ITRS"] = "1"
-        cmd = "#{@ruby_path} #{@script_path} --once --name_filters=fib --out_path=#{tmpdir} 2>&1"
-        result = `#{cmd}`
-
-        # Should run but may fail due to missing benchmarks - that's okay
-        # We're just checking the script can parse arguments
-        skip 'Requires benchmark environment' unless $?.success? || result.include?('Running benchmark')
-      end
+  after do
+    if @original_env.nil?
+      ENV.delete('BENCHMARK_QUIET')
+    else
+      ENV['BENCHMARK_QUIET'] = @original_env
     end
   end
 
-  describe 'output files' do
-    it 'creates output files with correct naming convention' do
+  describe 'script execution as subprocess' do
+    it 'runs successfully as a standalone script' do
       Dir.mktmpdir do |tmpdir|
-        # Create some mock output files
-        File.write(File.join(tmpdir, 'output_001.csv'), 'test')
-        File.write(File.join(tmpdir, 'output_002.csv'), 'test')
+        # Test that the script can be invoked as a subprocess
+        ENV['BENCHMARK_QUIET'] = '1'
+        cmd = "#{@ruby_path} #{@script_path} --once --name_filters=fib --out_path=#{tmpdir} --no-pinning --turbo 2>&1"
+        result = `#{cmd}`
+        exit_status = $?.exitstatus
 
-        # The output_path function should find the next number
-        require_relative '../lib/benchmark_runner'
-        output_path = BenchmarkRunner.output_path(tmpdir)
-        expected_path = File.join(tmpdir, 'output_003')
-        assert_equal expected_path, output_path
+        assert_equal 0, exit_status, "Script should exit successfully. Output: #{result}"
+        assert_match(/Total time spent benchmarking:/, result)
       end
     end
-
-    it 'uses correct output file format' do
-      file_no = 42
-      expected = 'output_042.csv'
-      actual = 'output_%03d.csv' % file_no
-      assert_equal expected, actual
-    end
   end
 
   describe 'benchmark metadata' do