diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb
index 4cc4a886..e3ea26de 100644
--- a/lib/benchmark_runner.rb
+++ b/lib/benchmark_runner.rb
@@ -16,31 +16,6 @@ def free_file_no(directory)
     end
   end
 
-  # Resolve the pre_init file path into a form that can be required
-  def expand_pre_init(path)
-    require 'pathname'
-
-    path = Pathname.new(path)
-
-    unless path.exist?
-      puts "--with-pre-init called with non-existent file!"
-      exit(-1)
-    end
-
-    if path.directory?
-      puts "--with-pre-init called with a directory, please pass a .rb file"
-      exit(-1)
-    end
-
-    library_name = path.basename(path.extname)
-    load_path = path.parent.expand_path
-
-    [
-      "-I", load_path,
-      "-r", library_name
-    ]
-  end
-
   # Sort benchmarks with headlines first, then others, then micro
   def sort_benchmarks(bench_names, metadata)
     headline_benchmarks = metadata.select { |_, meta| meta['category'] == 'headline' }.keys
@@ -51,36 +26,6 @@ def sort_benchmarks(bench_names, metadata)
     headline_names.sort + other_names.sort + micro_names.sort
   end
 
-  # Check which OS we are running
-  def os
-    @os ||= (
-      host_os = RbConfig::CONFIG['host_os']
-      case host_os
-      when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
-        :windows
-      when /darwin|mac os/
-        :macosx
-      when /linux/
-        :linux
-      when /solaris|bsd/
-        :unix
-      else
-        raise "unknown os: #{host_os.inspect}"
-      end
-    )
-  end
-
-  # Generate setarch prefix for Linux
-  def setarch_prefix
-    # Disable address space randomization (for determinism)
-    prefix = ["setarch", `uname -m`.strip, "-R"]
-
-    # Abort if we don't have permission (perhaps in a docker container).
-    return [] unless system(*prefix, "true", out: File::NULL, err: File::NULL)
-
-    prefix
-  end
-
   # Checked system - error or return info if the command fails
   def check_call(command, env: {}, raise_error: true, quiet: false)
     puts("+ #{command}") unless quiet
diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
new file mode 100644
index 00000000..83cc8bb0
--- /dev/null
+++ b/lib/benchmark_suite.rb
@@ -0,0 +1,234 @@
+# frozen_string_literal: true
+
+require 'json'
+require 'pathname'
+require 'fileutils'
+require 'shellwords'
+require 'etc'
+require 'yaml'
+require 'rbconfig'
+require_relative 'benchmark_filter'
+require_relative 'benchmark_runner'
+
+# BenchmarkSuite runs a collection of benchmarks and collects their results
+class BenchmarkSuite
+  BENCHMARKS_DIR = "benchmarks"
+  RACTOR_BENCHMARKS_DIR = "benchmarks-ractor"
+  RACTOR_ONLY_CATEGORY = ["ractor-only"].freeze
+  RACTOR_CATEGORY = ["ractor"].freeze
+  RACTOR_HARNESS = "harness-ractor"
+
+  attr_reader :ruby, :ruby_description, :categories, :name_filters, :out_path, :harness, :pre_init, :no_pinning, :bench_dir, :ractor_bench_dir
+
+  def initialize(ruby:, ruby_description:, categories:, name_filters:, out_path:, harness:, pre_init: nil, no_pinning: false)
+    @ruby = ruby
+    @ruby_description = ruby_description
+    @categories = categories
+    @name_filters = name_filters
+    @out_path = out_path
+    @harness = harness
+    @pre_init = pre_init ? expand_pre_init(pre_init) : nil
+    @no_pinning = no_pinning
+    @ractor_only = (categories == RACTOR_ONLY_CATEGORY)
+
+    setup_benchmark_directories
+  end
+
+  # Run all the benchmarks and record execution times
+  # Returns [bench_data, bench_failures]
+  def run
+    bench_data = {}
+    bench_failures = {}
+
+    bench_file_grouping.each do |bench_dir, bench_files|
+      bench_files.each_with_index do |entry, idx|
+        bench_name = entry.delete_suffix('.rb')
+
+        puts("Running benchmark \"#{bench_name}\" (#{idx+1}/#{bench_files.length})")
+
+        result_json_path = File.join(out_path, "temp#{Process.pid}.json")
+        result = run_single_benchmark(bench_dir, entry, result_json_path)
+
+        if result[:success]
+          bench_data[bench_name] = process_benchmark_result(result_json_path, result[:command])
+        else
+          bench_failures[bench_name] = result[:status].exitstatus
+        end
+      end
+    end
+
+    [bench_data, bench_failures]
+  end
+
+  private
+
+  def setup_benchmark_directories
+    if @ractor_only
+      @bench_dir = RACTOR_BENCHMARKS_DIR
+      @ractor_bench_dir = RACTOR_BENCHMARKS_DIR
+      @harness = RACTOR_HARNESS
+      @categories = []
+    else
+      @bench_dir = BENCHMARKS_DIR
+      @ractor_bench_dir = RACTOR_BENCHMARKS_DIR
+    end
+  end
+
+  def process_benchmark_result(result_json_path, command)
+    JSON.parse(File.read(result_json_path)).tap do |json|
+      json["command_line"] = command
+      File.unlink(result_json_path)
+    end
+  end
+
+  def run_single_benchmark(bench_dir, entry, result_json_path)
+    # Path to the benchmark runner script
+    script_path = File.join(bench_dir, entry)
+
+    unless script_path.end_with?('.rb')
+      script_path = File.join(script_path, 'benchmark.rb')
+    end
+
+    # Fix for jruby/jruby#7394 in JRuby 9.4.2.0
+    script_path = File.expand_path(script_path)
+
+    # Set up the environment for the benchmarking command
+    ENV["RESULT_JSON_PATH"] = result_json_path
+
+    # Set up the benchmarking command
+    cmd = base_cmd + [
+      *ruby,
+      "-I", harness,
+      *pre_init,
+      script_path,
+    ].compact
+
+    # Do the benchmarking
+    result = BenchmarkRunner.check_call(cmd.shelljoin, env: benchmark_env, raise_error: false)
+    result[:command] = cmd.shelljoin
+    result
+  end
+
+  def benchmark_env
+    @benchmark_env ||= begin
+      # When the Ruby running this script is not the first Ruby in PATH, shell commands
+      # like `bundle install` in a child process will not use the Ruby being benchmarked.
+      # It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.
+      env = {}
+      ruby_path = `#{ruby.shelljoin} -e 'print RbConfig.ruby' 2> #{File::NULL}`
+
+      if ruby_path != RbConfig.ruby
+        env["PATH"] = "#{File.dirname(ruby_path)}:#{ENV["PATH"]}"
+
+        # chruby sets GEM_HOME and GEM_PATH in your shell. We have to unset it in the child
+        # process to avoid installing gems to the version that is running run_benchmarks.rb.
+        ["GEM_HOME", "GEM_PATH"].each do |var|
+          env[var] = nil if ENV.key?(var)
+        end
+      end
+
+      env
+    end
+  end
+
+  def bench_file_grouping
+    grouping = { bench_dir => filtered_bench_entries(bench_dir, main_benchmark_filter) }
+
+    if benchmark_ractor_directory?
+      # We ignore the category filter here because everything in the
+      # benchmarks-ractor directory should be included when we're benchmarking the
+      # Ractor category
+      grouping[ractor_bench_dir] = filtered_bench_entries(ractor_bench_dir, ractor_benchmark_filter)
+    end
+
+    grouping
+  end
+
+  def main_benchmark_filter
+    @main_benchmark_filter ||= BenchmarkFilter.new(
+      categories: categories,
+      name_filters: name_filters,
+      metadata: benchmarks_metadata
+    )
+  end
+
+  def ractor_benchmark_filter
+    @ractor_benchmark_filter ||= BenchmarkFilter.new(
+      categories: [],
+      name_filters: name_filters,
+      metadata: benchmarks_metadata
+    )
+  end
+
+  def benchmarks_metadata
+    @benchmarks_metadata ||= YAML.load_file('benchmarks.yml')
+  end
+
+  def filtered_bench_entries(dir, filter)
+    Dir.children(dir).sort.filter do |entry|
+      filter.match?(entry)
+    end
+  end
+
+  def benchmark_ractor_directory?
+    categories == RACTOR_CATEGORY
+  end
+
+  # Check if running on Linux
+  def linux?
+    @linux ||= RbConfig::CONFIG['host_os'] =~ /linux/
+  end
+
+  # Set up the base command with CPU pinning if needed
+  def base_cmd
+    @base_cmd ||= if linux?
+      cmd = setarch_prefix
+
+      # Pin the process to one given core to improve caching and reduce variance on CRuby
+      # Other Rubies need to use multiple cores, e.g., for JIT threads
+      if ruby_description.start_with?('ruby ') && !no_pinning
+        # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
+        cpu = [(Etc.nprocessors / 2) - 1, 0].max
+        cmd.concat(["taskset", "-c", "#{cpu}"])
+      end
+
+      cmd
+    else
+      []
+    end
+  end
+
+  # Generate setarch prefix for Linux
+  def setarch_prefix
+    # Disable address space randomization (for determinism)
+    prefix = ["setarch", `uname -m`.strip, "-R"]
+
+    # Abort if we don't have permission (perhaps in a docker container).
+    return [] unless system(*prefix, "true", out: File::NULL, err: File::NULL)
+
+    prefix
+  end
+
+  # Resolve the pre_init file path into a form that can be required
+  def expand_pre_init(path)
+    path = Pathname.new(path)
+
+    unless path.exist?
+      puts "--with-pre-init called with non-existent file!"
+      exit(-1)
+    end
+
+    if path.directory?
+      puts "--with-pre-init called with a directory, please pass a .rb file"
+      exit(-1)
+    end
+
+    library_name = path.basename(path.extname)
+    load_path = path.parent.expand_path
+
+    [
+      "-I", load_path,
+      "-r", library_name
+    ]
+  end
+end
diff --git a/run_benchmarks.rb b/run_benchmarks.rb
index 9c7038d5..f4991686 100755
--- a/run_benchmarks.rb
+++ b/run_benchmarks.rb
@@ -11,8 +11,8 @@
 require_relative 'misc/stats'
 require_relative 'lib/cpu_config'
 require_relative 'lib/benchmark_runner'
+require_relative 'lib/benchmark_suite'
 require_relative 'lib/table_formatter'
-require_relative 'lib/benchmark_filter'
 require_relative 'lib/argument_parser'
 
 def mean(values)
@@ -23,135 +23,11 @@ def stddev(values)
   Stats.new(values).stddev
 end
 
-def benchmark_filter(categories:, name_filters:)
-  @benchmark_filter ||= {}
-  key = [categories, name_filters]
-  @benchmark_filter[key] ||= BenchmarkFilter.new(
-    categories: categories,
-    name_filters: name_filters,
-    metadata: benchmarks_metadata
-  )
-end
-
-def benchmarks_metadata
-  @benchmarks_metadata ||= YAML.load_file('benchmarks.yml')
-end
-
 def sort_benchmarks(bench_names)
+  benchmarks_metadata = YAML.load_file('benchmarks.yml')
   BenchmarkRunner.sort_benchmarks(bench_names, benchmarks_metadata)
 end
 
-# Run all the benchmarks and record execution times
-def run_benchmarks(ruby:, ruby_description:, categories:, name_filters:, out_path:, harness:, pre_init:, no_pinning:)
-  bench_data = {}
-  bench_failures = {}
-
-  bench_dir = "benchmarks"
-  ractor_bench_dir = "benchmarks-ractor"
-
-  if categories == ["ractor-only"]
-    bench_dir = ractor_bench_dir
-    harness = "harness-ractor"
-    categories = []
-  end
-
-  bench_file_grouping = {}
-
-  # Get the list of benchmark files/directories matching name filters
-  filter = benchmark_filter(categories: categories, name_filters: name_filters)
-  bench_file_grouping[bench_dir] = Dir.children(bench_dir).sort.filter do |entry|
-    filter.match?(entry)
-  end
-
-  if categories == ["ractor"]
-    # We ignore the category filter here because everything in the
-    # benchmarks-ractor directory should be included when we're benchmarking the
-    # Ractor category
-    ractor_filter = benchmark_filter(categories: [], name_filters: name_filters)
-    bench_file_grouping[ractor_bench_dir] = Dir.children(ractor_bench_dir).sort.filter do |entry|
-      ractor_filter.match?(entry)
-    end
-  end
-
-  if pre_init
-    pre_init = BenchmarkRunner.expand_pre_init(pre_init)
-  end
-
-
-  bench_file_grouping.each do |bench_dir, bench_files|
-    bench_files.each_with_index do |entry, idx|
-      bench_name = entry.gsub('.rb', '')
-
-      puts("Running benchmark \"#{bench_name}\" (#{idx+1}/#{bench_files.length})")
-
-      # Path to the benchmark runner script
-      script_path = File.join(bench_dir, entry)
-
-      if !script_path.end_with?('.rb')
-        script_path = File.join(script_path, 'benchmark.rb')
-      end
-
-      # Set up the environment for the benchmarking command
-      result_json_path = File.join(out_path, "temp#{Process.pid}.json")
-      ENV["RESULT_JSON_PATH"] = result_json_path
-
-      # Set up the benchmarking command
-      cmd = []
-      if BenchmarkRunner.os == :linux
-        cmd += BenchmarkRunner.setarch_prefix
-
-        # Pin the process to one given core to improve caching and reduce variance on CRuby
-        # Other Rubies need to use multiple cores, e.g., for JIT threads
-        if ruby_description.start_with?('ruby ') && !no_pinning
-          # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
-          cpu = [(Etc.nprocessors / 2) - 1, 0].max
-          cmd += ["taskset", "-c", "#{cpu}"]
-        end
-      end
-
-      # Fix for jruby/jruby#7394 in JRuby 9.4.2.0
-      script_path = File.expand_path(script_path)
-
-      cmd += [
-        *ruby,
-        "-I", harness,
-        *pre_init,
-        script_path,
-      ].compact
-
-      # When the Ruby running this script is not the first Ruby in PATH, shell commands
-      # like `bundle install` in a child process will not use the Ruby being benchmarked.
-      # It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.
-      env = {}
-      ruby_path = `#{ruby.shelljoin} -e 'print RbConfig.ruby' 2> #{File::NULL}`
-      if ruby_path != RbConfig.ruby
-        env["PATH"] = "#{File.dirname(ruby_path)}:#{ENV["PATH"]}"
-
-        # chruby sets GEM_HOME and GEM_PATH in your shell. We have to unset it in the child
-        # process to avoid installing gems to the version that is running run_benchmarks.rb.
-        ["GEM_HOME", "GEM_PATH"].each do |var|
-          env[var] = nil if ENV.key?(var)
-        end
-      end
-
-      # Do the benchmarking
-      result = BenchmarkRunner.check_call(cmd.shelljoin, env: env, raise_error: false)
-
-      if result[:success]
-        bench_data[bench_name] = JSON.parse(File.read(result_json_path)).tap do |json|
-          json["command_line"] = cmd.shelljoin
-          File.unlink(result_json_path)
-        end
-      else
-        bench_failures[bench_name] = result[:status].exitstatus
-      end
-
-    end
-  end
-
-  [bench_data, bench_failures]
-end
-
 args = ArgumentParser.parse(ARGV)
 
 CPUConfig.configure_for_benchmarking(turbo: args.turbo)
@@ -169,7 +45,7 @@ def run_benchmarks(ruby:, ruby_description:, categories:, name_filters:, out_pat
 bench_data = {}
 bench_failures = {}
 args.executables.each do |name, executable|
-  bench_data[name], failures = run_benchmarks(
+  suite = BenchmarkSuite.new(
     ruby: executable,
     ruby_description: ruby_descriptions[name],
     categories: args.categories,
@@ -179,6 +55,7 @@ def run_benchmarks(ruby:, ruby_description:, categories:, name_filters:, out_pat
     pre_init: args.with_pre_init,
     no_pinning: args.no_pinning
   )
+  bench_data[name], failures = suite.run
   # Make it easier to query later.
   bench_failures[name] = failures unless failures.empty?
 end
diff --git a/test/benchmark_runner_test.rb b/test/benchmark_runner_test.rb
index 3ecd592f..20a7b615 100644
--- a/test/benchmark_runner_test.rb
+++ b/test/benchmark_runner_test.rb
@@ -49,64 +49,6 @@
     end
   end
 
-  describe '.expand_pre_init' do
-    it 'returns load path and require options for valid file' do
-      Dir.mktmpdir do |dir|
-        file = File.join(dir, 'pre_init.rb')
-        FileUtils.touch(file)
-
-        result = BenchmarkRunner.expand_pre_init(file)
-
-        assert_equal 4, result.length
-        assert_equal '-I', result[0]
-        assert_equal dir, result[1].to_s
-        assert_equal '-r', result[2]
-        assert_equal 'pre_init', result[3].to_s
-      end
-    end
-
-    it 'handles files with different extensions' do
-      Dir.mktmpdir do |dir|
-        file = File.join(dir, 'my_config.rb')
-        FileUtils.touch(file)
-
-        result = BenchmarkRunner.expand_pre_init(file)
-
-        assert_equal 'my_config', result[3].to_s
-      end
-    end
-
-    it 'handles nested directories' do
-      Dir.mktmpdir do |dir|
-        subdir = File.join(dir, 'config', 'initializers')
-        FileUtils.mkdir_p(subdir)
-        file = File.join(subdir, 'setup.rb')
-        FileUtils.touch(file)
-
-        result = BenchmarkRunner.expand_pre_init(file)
-
-        assert_equal subdir, result[1].to_s
-        assert_equal 'setup', result[3].to_s
-      end
-    end
-
-    it 'exits when file does not exist' do
-      out = capture_io do
-        assert_raises(SystemExit) { BenchmarkRunner.expand_pre_init('/nonexistent/file.rb') }
-      end
-      assert_includes out, "--with-pre-init called with non-existent file!\n"
-    end
-
-    it 'exits when path is a directory' do
-      Dir.mktmpdir do |dir|
-        out = capture_io do
-          assert_raises(SystemExit) { BenchmarkRunner.expand_pre_init(dir) }
-        end
-        assert_includes out, "--with-pre-init called with a directory, please pass a .rb file\n"
-      end
-    end
-  end
-
   describe '.sort_benchmarks' do
     before do
       @metadata = {
@@ -158,24 +100,6 @@
     end
   end
 
-  describe '.os' do
-    it 'detects the operating system' do
-      result = BenchmarkRunner.os
-      assert_includes [:linux, :macosx, :windows, :unix], result
-    end
-
-    it 'caches the os result' do
-      first_call = BenchmarkRunner.os
-      second_call = BenchmarkRunner.os
-      assert_equal second_call, first_call
-    end
-
-    it 'returns a symbol' do
-      result = BenchmarkRunner.os
-      assert_instance_of Symbol, result
-    end
-  end
-
   describe '.check_call' do
     it 'runs a successful command and returns success status' do
       result = nil
@@ -252,40 +176,6 @@
     end
   end
 
-  describe '.setarch_prefix' do
-    it 'returns an array' do
-      result = BenchmarkRunner.setarch_prefix
-      assert_instance_of Array, result
-    end
-
-    it 'returns setarch command on Linux with proper permissions' do
-      skip 'Not on Linux' unless BenchmarkRunner.os == :linux
-
-      prefix = BenchmarkRunner.setarch_prefix
-
-      # Should either return the prefix or empty array if no permission
-      assert_includes [0, 3], prefix.length
-
-      if prefix.length == 3
-        assert_equal 'setarch', prefix[0]
-        assert_equal '-R', prefix[2]
-      end
-    end
-
-    it 'returns empty array when setarch fails' do
-      skip 'Test requires Linux' unless BenchmarkRunner.os == :linux
-
-      # If we don't have permissions, it should return empty array
-      prefix = BenchmarkRunner.setarch_prefix
-      if prefix.empty?
-        assert_equal [], prefix
-      else
-        # If we do have permissions, verify the structure
-        assert_equal 3, prefix.length
-      end
-    end
-  end
-
   describe 'Stats integration' do
     it 'calculates mean correctly' do
       values = [1, 2, 3, 4, 5]
diff --git a/test/benchmark_suite_test.rb b/test/benchmark_suite_test.rb
new file mode 100644
index 00000000..ce953b91
--- /dev/null
+++ b/test/benchmark_suite_test.rb
@@ -0,0 +1,536 @@
+require_relative 'test_helper'
+require_relative '../lib/benchmark_suite'
+require 'tempfile'
+require 'tmpdir'
+require 'fileutils'
+require 'json'
+require 'yaml'
+
+describe BenchmarkSuite do
+  before do
+    @original_dir = Dir.pwd
+    @temp_dir = Dir.mktmpdir
+    Dir.chdir(@temp_dir)
+
+    # Create mock benchmarks directory structure
+    FileUtils.mkdir_p('benchmarks')
+    FileUtils.mkdir_p('benchmarks-ractor')
+    FileUtils.mkdir_p('harness')
+
+    # Create a simple benchmark file
+    File.write('benchmarks/simple.rb', <<~RUBY)
+      require 'json'
+      result = {
+        'warmup' => [0.001],
+        'bench' => [0.001, 0.0009, 0.0011],
+        'rss' => 10485760
+      }
+      File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+    RUBY
+
+    # Create benchmarks metadata
+    @metadata = {
+      'simple' => { 'category' => 'micro' },
+      'fib' => { 'category' => 'micro' }
+    }
+    File.write('benchmarks.yml', YAML.dump(@metadata))
+
+    @out_path = File.join(@temp_dir, 'output')
+    FileUtils.mkdir_p(@out_path)
+  end
+
+  after do
+    Dir.chdir(@original_dir)
+    FileUtils.rm_rf(@temp_dir)
+  end
+
+  describe '#initialize' do
+    it 'sets all required attributes' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['micro'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal ['ruby'], suite.ruby
+      assert_equal 'ruby 3.2.0', suite.ruby_description
+      assert_equal ['micro'], suite.categories
+      assert_equal [], suite.name_filters
+      assert_equal @out_path, suite.out_path
+      assert_equal 'harness', suite.harness
+      assert_nil suite.pre_init
+      assert_equal false, suite.no_pinning
+    end
+
+    it 'accepts optional parameters' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      assert_equal true, suite.no_pinning
+    end
+
+    it 'sets bench_dir to BENCHMARKS_DIR by default' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['micro'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal 'benchmarks', suite.bench_dir
+      assert_equal 'benchmarks-ractor', suite.ractor_bench_dir
+      assert_equal 'harness', suite.harness
+      assert_equal ['micro'], suite.categories
+    end
+
+    it 'sets bench_dir to ractor directory and updates harness when ractor-only category is used' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor-only'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal 'benchmarks-ractor', suite.bench_dir
+      assert_equal 'benchmarks-ractor', suite.ractor_bench_dir
+      assert_equal 'harness-ractor', suite.harness
+      assert_equal [], suite.categories
+    end
+
+    it 'keeps bench_dir as BENCHMARKS_DIR when ractor category is used' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal 'benchmarks', suite.bench_dir
+      assert_equal 'benchmarks-ractor', suite.ractor_bench_dir
+      assert_equal 'harness', suite.harness
+      assert_equal ['ractor'], suite.categories
+    end
+  end
+
+  describe '#run' do
+    it 'returns bench_data and bench_failures as a tuple' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      result = nil
+      capture_io do
+        result = suite.run
+      end
+
+      assert_instance_of Array, result
+      assert_equal 2, result.length
+
+      bench_data, bench_failures = result
+      assert_instance_of Hash, bench_data
+      assert_instance_of Hash, bench_failures
+    end
+
+    it 'runs matching benchmarks and collects results' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      assert_includes bench_data, 'simple'
+      assert_includes bench_data['simple'], 'warmup'
+      assert_includes bench_data['simple'], 'bench'
+      assert_includes bench_data['simple'], 'rss'
+      assert_includes bench_data['simple'], 'command_line'
+
+      assert_empty bench_failures
+    end
+
+    it 'prints progress messages while running' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      output = capture_io do
+        suite.run
+      end
+
+      assert_includes output[0], 'Running benchmark "simple"'
+    end
+
+    it 'records failures when benchmark script fails' do
+      # Create a failing benchmark
+      File.write('benchmarks/failing.rb', <<~RUBY)
+        exit(1)
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['failing'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      assert_empty bench_data
+      assert_includes bench_failures, 'failing'
+      assert_equal 1, bench_failures['failing']
+    end
+
+    it 'handles benchmarks in subdirectories' do
+      # Create a benchmark in a subdirectory
+      FileUtils.mkdir_p('benchmarks/subdir')
+      File.write('benchmarks/subdir/benchmark.rb', <<~RUBY)
+        require 'json'
+        result = {
+          'warmup' => [0.001],
+          'bench' => [0.001],
+          'rss' => 10485760
+        }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['subdir'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      assert_includes bench_data, 'subdir'
+      assert_empty bench_failures
+    end
+
+    it 'handles ractor-only category' do
+      # Create a ractor benchmark
+      File.write('benchmarks-ractor/ractor_test.rb', <<~RUBY)
+        require 'json'
+        result = {
+          'warmup' => [0.001],
+          'bench' => [0.001],
+          'rss' => 10485760
+        }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor-only'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      # When ractor-only is specified, it should use benchmarks-ractor directory
+      assert_includes bench_data, 'ractor_test'
+      assert_empty bench_failures
+
+      # harness should be updated to harness-ractor
+      assert_equal 'harness-ractor', suite.harness
+    end
+
+    it 'includes both regular and ractor benchmarks with ractor category' do
+      File.write('benchmarks-ractor/ractor_bench.rb', <<~RUBY)
+        require 'json'
+        result = {
+          'warmup' => [0.001],
+          'bench' => [0.001],
+          'rss' => 10485760
+        }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      # With ractor category, both directories should be scanned
+      # but we need appropriate filters
+      assert_instance_of Hash, bench_data
+    end
+
+    it 'expands pre_init when provided' do
+      # Create a pre_init file
+      pre_init_file = File.join(@temp_dir, 'pre_init.rb')
+      File.write(pre_init_file, "# Pre-initialization code\n")
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        pre_init: pre_init_file,
+        no_pinning: true
+      )
+
+      assert_instance_of Array, suite.pre_init
+      assert_equal 4, suite.pre_init.length
+      assert_equal '-I', suite.pre_init[0]
+      assert_equal @temp_dir, suite.pre_init[1].to_s
+      assert_equal '-r', suite.pre_init[2]
+      assert_equal 'pre_init', suite.pre_init[3].to_s
+    end
+
+    it 'handles pre_init with different file extensions' do
+      # Create a pre_init file with a different name
+      pre_init_file = File.join(@temp_dir, 'my_config.rb')
+      File.write(pre_init_file, "# Config code\n")
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        pre_init: pre_init_file,
+        no_pinning: true
+      )
+
+      # Should extract filename without extension
+      assert_equal 'my_config', suite.pre_init[3].to_s
+    end
+
+    it 'handles pre_init in nested directories' do
+      # Create a pre_init file in nested directory
+      subdir = File.join(@temp_dir, 'config', 'initializers')
+      FileUtils.mkdir_p(subdir)
+      pre_init_file = File.join(subdir, 'setup.rb')
+      File.write(pre_init_file, "# Setup code\n")
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        pre_init: pre_init_file,
+        no_pinning: true
+      )
+
+      # Should use the nested directory as load path
+      assert_equal subdir, suite.pre_init[1].to_s
+      assert_equal 'setup', suite.pre_init[3].to_s
+    end
+
+    it 'exits when pre_init file does not exist' do
+      output = capture_io do
+        assert_raises(SystemExit) do
+          BenchmarkSuite.new(
+            ruby: [RbConfig.ruby],
+            ruby_description: 'ruby 3.2.0',
+            categories: [],
+            name_filters: ['simple'],
+            out_path: @out_path,
+            harness: 'harness',
+            pre_init: '/nonexistent/file.rb',
+            no_pinning: true
+          )
+        end
+      end
+      assert_includes output[0], '--with-pre-init called with non-existent file!'
+    end
+
+    it 'exits when pre_init path is a directory' do
+      output = capture_io do
+        assert_raises(SystemExit) do
+          BenchmarkSuite.new(
+            ruby: [RbConfig.ruby],
+            ruby_description: 'ruby 3.2.0',
+            categories: [],
+            name_filters: ['simple'],
+            out_path: @out_path,
+            harness: 'harness',
+            pre_init: @temp_dir,
+            no_pinning: true
+          )
+        end
+      end
+      assert_includes output[0], '--with-pre-init called with a directory'
+    end
+
+    it 'stores command_line in benchmark results' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, _ = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      assert_includes bench_data['simple'], 'command_line'
+      assert_instance_of String, bench_data['simple']['command_line']
+      assert_includes bench_data['simple']['command_line'], 'simple.rb'
+    end
+
+    it 'cleans up temporary JSON files after successful run' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      capture_io do
+        suite.run
+      end
+
+      # Temporary files should be cleaned up
+      temp_files = Dir.glob(File.join(@out_path, 'temp*.json'))
+      assert_empty temp_files
+    end
+
+    it 'filters benchmarks by name_filters' do
+      # Create multiple benchmarks
+      File.write('benchmarks/bench_a.rb', <<~RUBY)
+        require 'json'
+        result = { 'warmup' => [0.001], 'bench' => [0.001], 'rss' => 10485760 }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      File.write('benchmarks/bench_b.rb', <<~RUBY)
+        require 'json'
+        result = { 'warmup' => [0.001], 'bench' => [0.001], 'rss' => 10485760 }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['bench_a'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      assert_includes bench_data, 'bench_a'
+      refute_includes bench_data, 'bench_b'
+    end
+  end
+
+  describe 'integration with BenchmarkFilter' do
+    it 'uses BenchmarkFilter to match benchmarks' do
+      # Create benchmarks with different categories
+      File.write('benchmarks/micro_bench.rb', <<~RUBY)
+        require 'json'
+        result = { 'warmup' => [0.001], 'bench' => [0.001], 'rss' => 10485760 }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      metadata = {
+        'micro_bench' => { 'category' => 'micro' },
+        'simple' => { 'category' => 'other' }
+      }
+      File.write('benchmarks.yml', YAML.dump(metadata))
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['micro'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      # Should only include micro category benchmarks
+      assert_includes bench_data, 'micro_bench'
+      refute_includes bench_data, 'simple'
+    end
+  end
+end