diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb index e3ea26de..6333cbae 100644 --- a/lib/benchmark_runner.rb +++ b/lib/benchmark_runner.rb @@ -16,16 +16,6 @@ def free_file_no(directory) end end - # Sort benchmarks with headlines first, then others, then micro - def sort_benchmarks(bench_names, metadata) - headline_benchmarks = metadata.select { |_, meta| meta['category'] == 'headline' }.keys - micro_benchmarks = metadata.select { |_, meta| meta['category'] == 'micro' }.keys - - headline_names, bench_names = bench_names.partition { |name| headline_benchmarks.include?(name) } - micro_names, other_names = bench_names.partition { |name| micro_benchmarks.include?(name) } - headline_names.sort + other_names.sort + micro_names.sort - end - # Checked system - error or return info if the command fails def check_call(command, env: {}, raise_error: true, quiet: false) puts("+ #{command}") unless quiet diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb new file mode 100644 index 00000000..6bbc53f7 --- /dev/null +++ b/lib/results_table_builder.rb @@ -0,0 +1,170 @@ +require_relative '../misc/stats' +require 'yaml' + +class ResultsTableBuilder + SECONDS_TO_MS = 1000.0 + BYTES_TO_MIB = 1024.0 * 1024.0 + + def initialize(executable_names:, bench_data:, include_rss: false) + @executable_names = executable_names + @bench_data = bench_data + @include_rss = include_rss + @base_name = executable_names.first + @other_names = executable_names[1..] + @bench_names = compute_bench_names + end + + def build + table = [build_header] + format = build_format + + @bench_names.each do |bench_name| + next unless has_complete_data?(bench_name) + + row = build_row(bench_name) + table << row + end + + [table, format] + end + + private + + def has_complete_data?(bench_name) + @bench_data.all? { |(_k, v)| v[bench_name] } + end + + def build_header + header = ["bench"] + + @executable_names.each do |name| + header << "#{name} (ms)" << "stddev (%)" + header << "RSS (MiB)" if @include_rss + end + + @other_names.each do |name| + header << "#{name} 1st itr" + end + + @other_names.each do |name| + header << "#{@base_name}/#{name}" + end + + header + end + + def build_format + format = ["%s"] + + @executable_names.each do |_name| + format << "%.1f" << "%.1f" + format << "%.1f" if @include_rss + end + + @other_names.each do |_name| + format << "%.3f" + end + + @other_names.each do |_name| + format << "%.3f" + end + + format + end + + def build_row(bench_name) + t0s = extract_first_iteration_times(bench_name) + times_no_warmup = extract_benchmark_times(bench_name) + rsss = extract_rss_values(bench_name) + + base_t0, *other_t0s = t0s + base_t, *other_ts = times_no_warmup + base_rss, *other_rsss = rsss + + row = [bench_name] + build_base_columns(row, base_t, base_rss) + build_comparison_columns(row, other_ts, other_rsss) + build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts) + + row + end + + def build_base_columns(row, base_t, base_rss) + row << mean(base_t) + row << stddev_percent(base_t) + row << base_rss if @include_rss + end + + def build_comparison_columns(row, other_ts, other_rsss) + other_ts.zip(other_rsss).each do |other_t, other_rss| + row << mean(other_t) + row << stddev_percent(other_t) + row << other_rss if @include_rss + end + end + + def build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts) + ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 } + ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) } + row.concat(ratio_1sts) + row.concat(ratios) + end + + def extract_first_iteration_times(bench_name) + @executable_names.map do |name| + data = bench_data_for(name, bench_name) + (data['warmup'][0] || data['bench'][0]) * SECONDS_TO_MS + end + end + + def extract_benchmark_times(bench_name) + @executable_names.map do |name| + bench_data_for(name, bench_name)['bench'].map { |v| v * SECONDS_TO_MS } + end + end + + def extract_rss_values(bench_name) + @executable_names.map do |name| + bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB + end + end + + def bench_data_for(name, bench_name) + @bench_data[name][bench_name] + end + + def mean(values) + Stats.new(values).mean + end + + def stddev(values) + Stats.new(values).stddev + end + + def stddev_percent(values) + 100 * stddev(values) / mean(values) + end + + def compute_bench_names + benchmarks_metadata = YAML.load_file('benchmarks.yml') + sort_benchmarks(all_benchmark_names, benchmarks_metadata) + end + + def all_benchmark_names + @bench_data.values.flat_map(&:keys).uniq + end + + # Sort benchmarks with headlines first, then others, then micro + def sort_benchmarks(bench_names, metadata) + bench_names.sort_by { |name| [category_priority(name, metadata), name] } + end + + def category_priority(bench_name, metadata) + category = metadata.dig(bench_name, 'category') || 'other' + case category + when 'headline' then 0 + when 'micro' then 2 + else 1 + end + end +end diff --git a/run_benchmarks.rb b/run_benchmarks.rb index f4991686..e85cc897 100755 --- a/run_benchmarks.rb +++ b/run_benchmarks.rb @@ -8,25 +8,12 @@ require 'rbconfig' require 'etc' require 'yaml' -require_relative 'misc/stats' require_relative 'lib/cpu_config' require_relative 'lib/benchmark_runner' require_relative 'lib/benchmark_suite' require_relative 'lib/table_formatter' require_relative 'lib/argument_parser' - -def mean(values) - Stats.new(values).mean -end - -def stddev(values) - Stats.new(values).stddev -end - -def sort_benchmarks(bench_names) - benchmarks_metadata = YAML.load_file('benchmarks.yml') - BenchmarkRunner.sort_benchmarks(bench_names, benchmarks_metadata) -end +require_relative 'lib/results_table_builder' args = ArgumentParser.parse(ARGV) @@ -36,15 +23,14 @@ def sort_benchmarks(bench_names) FileUtils.mkdir_p(args.out_path) ruby_descriptions = {} -args.executables.each do |name, executable| - ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp -end # Benchmark with and without YJIT bench_start_time = Time.now.to_f bench_data = {} bench_failures = {} args.executables.each do |name, executable| + ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp + suite = BenchmarkSuite.new( ruby: executable, ruby_description: ruby_descriptions[name], @@ -61,9 +47,6 @@ def sort_benchmarks(bench_names) end bench_end_time = Time.now.to_f -# Get keys from all rows in case a benchmark failed for only some executables. -bench_names = sort_benchmarks(bench_data.map { |k, v| v.keys }.flatten.uniq) - bench_total_time = (bench_end_time - bench_start_time).to_i puts("Total time spent benchmarking: #{bench_total_time}s") @@ -73,55 +56,15 @@ def sort_benchmarks(bench_names) puts -# Table for the data we've gathered +# Build results table all_names = args.executables.keys base_name, *other_names = all_names -table = [["bench"]] -format = ["%s"] -all_names.each do |name| - table[0] += ["#{name} (ms)", "stddev (%)"] - format += ["%.1f", "%.1f"] - if args.rss - table[0] += ["RSS (MiB)"] - format += ["%.1f"] - end -end -other_names.each do |name| - table[0] += ["#{name} 1st itr"] - format += ["%.3f"] -end -other_names.each do |name| - table[0] += ["#{base_name}/#{name}"] - format += ["%.3f"] -end - -# Format the results table -bench_names.each do |bench_name| - # Skip this bench_name if we failed to get data for any of the executables. - next unless bench_data.all? { |(_k, v)| v[bench_name] } - - t0s = all_names.map { |name| (bench_data[name][bench_name]['warmup'][0] || bench_data[name][bench_name]['bench'][0]) * 1000.0 } - times_no_warmup = all_names.map { |name| bench_data[name][bench_name]['bench'].map { |v| v * 1000.0 } } - rsss = all_names.map { |name| bench_data[name][bench_name]['rss'] / 1024.0 / 1024.0 } - - base_t0, *other_t0s = t0s - base_t, *other_ts = times_no_warmup - base_rss, *other_rsss = rsss - - ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 } - ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) } - - row = [bench_name, mean(base_t), 100 * stddev(base_t) / mean(base_t)] - row << base_rss if args.rss - other_ts.zip(other_rsss).each do |other_t, other_rss| - row += [mean(other_t), 100 * stddev(other_t) / mean(other_t)] - row << other_rss if args.rss - end - - row += ratio_1sts + ratios - - table << row -end +builder = ResultsTableBuilder.new( + executable_names: all_names, + bench_data: bench_data, + include_rss: args.rss +) +table, format = builder.build output_path = nil if args.out_override diff --git a/test/benchmark_runner_test.rb b/test/benchmark_runner_test.rb index 20a7b615..ef450323 100644 --- a/test/benchmark_runner_test.rb +++ b/test/benchmark_runner_test.rb @@ -49,57 +49,6 @@ end end - describe '.sort_benchmarks' do - before do - @metadata = { - 'fib' => { 'category' => 'micro' }, - 'railsbench' => { 'category' => 'headline' }, - 'optcarrot' => { 'category' => 'headline' }, - 'some_bench' => { 'category' => 'other' }, - 'another_bench' => { 'category' => 'other' }, - 'zebra' => { 'category' => 'other' } - } - end - - it 'sorts benchmarks with headlines first, then others, then micro' do - bench_names = ['fib', 'some_bench', 'railsbench', 'another_bench', 'optcarrot'] - result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata) - - # Headlines should be first - headline_indices = [result.index('railsbench'), result.index('optcarrot')] - assert_equal true, headline_indices.all? { |i| i < 2 } - - # Micro should be last - assert_equal 'fib', result.last - - # Others in the middle - other_indices = [result.index('some_bench'), result.index('another_bench')] - assert_equal true, other_indices.all? { |i| i >= 2 && i < result.length - 1 } - end - - it 'sorts alphabetically within categories' do - bench_names = ['zebra', 'another_bench', 'some_bench'] - result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata) - assert_equal ['another_bench', 'some_bench', 'zebra'], result - end - - it 'handles empty list' do - result = BenchmarkRunner.sort_benchmarks([], @metadata) - assert_equal [], result - end - - it 'handles single benchmark' do - result = BenchmarkRunner.sort_benchmarks(['fib'], @metadata) - assert_equal ['fib'], result - end - - it 'handles only headline benchmarks' do - bench_names = ['railsbench', 'optcarrot'] - result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata) - assert_equal ['optcarrot', 'railsbench'], result - end - end - describe '.check_call' do it 'runs a successful command and returns success status' do result = nil diff --git a/test/results_table_builder_test.rb b/test/results_table_builder_test.rb new file mode 100644 index 00000000..f5b811fc --- /dev/null +++ b/test/results_table_builder_test.rb @@ -0,0 +1,371 @@ +require_relative 'test_helper' +require_relative '../lib/results_table_builder' +require 'yaml' +require 'tmpdir' + +describe ResultsTableBuilder do + before do + @original_dir = Dir.pwd + @temp_dir = Dir.mktmpdir + Dir.chdir(@temp_dir) + + benchmarks_metadata = { + 'fib' => { 'category' => 'micro' }, + 'loop' => { 'category' => 'micro' }, + 'railsbench' => { 'category' => 'headline' }, + 'optcarrot' => { 'category' => 'headline' }, + 'zebra' => { 'category' => 'other' }, + 'apple' => { 'category' => 'other' }, + 'mango' => { 'category' => 'other' }, + 'some_bench' => { 'category' => 'other' }, + 'another_bench' => { 'category' => 'other' } + } + File.write('benchmarks.yml', YAML.dump(benchmarks_metadata)) + end + + after do + Dir.chdir(@original_dir) + FileUtils.rm_rf(@temp_dir) + end + + describe '#build' do + it 'builds a table with header and data rows' do + executable_names = ['ruby', 'ruby-yjit'] + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1, 0.11, 0.09], + 'rss' => 1024 * 1024 * 10 + } + }, + 'ruby-yjit' => { + 'fib' => { + 'warmup' => [0.05], + 'bench' => [0.05, 0.06, 0.04], + 'rss' => 1024 * 1024 * 12 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, format = builder.build + + assert_equal ['bench', 'ruby (ms)', 'stddev (%)', 'ruby-yjit (ms)', 'stddev (%)', 'ruby-yjit 1st itr', 'ruby/ruby-yjit'], table[0] + + assert_equal ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f'], format + + assert_equal 'fib', table[1][0] + assert_in_delta 100.0, table[1][1], 1.0 + assert_in_delta 50.0, table[1][3], 1.0 + assert_in_delta 2.0, table[1][5], 0.1 + assert_in_delta 2.0, table[1][6], 0.1 + end + + it 'includes RSS columns when include_rss is true' do + executable_names = ['ruby'] + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: true + ) + + table, format = builder.build + + assert_equal ['bench', 'ruby (ms)', 'stddev (%)', 'RSS (MiB)'], table[0] + + assert_equal ['%s', '%.1f', '%.1f', '%.1f'], format + + assert_in_delta 10.0, table[1][3], 0.1 + end + + it 'skips benchmarks with missing data' do + executable_names = ['ruby', 'ruby-yjit'] + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + }, + 'loop' => { + 'warmup' => [0.2], + 'bench' => [0.2], + 'rss' => 1024 * 1024 * 10 + } + }, + 'ruby-yjit' => { + 'fib' => { + 'warmup' => [0.05], + 'bench' => [0.05], + 'rss' => 1024 * 1024 * 12 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, _format = builder.build + + assert_equal 2, table.length + assert_equal 'fib', table[1][0] + end + + it 'handles multiple executables correctly' do + executable_names = ['ruby', 'ruby-yjit', 'ruby-rjit'] + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + } + }, + 'ruby-yjit' => { + 'fib' => { + 'warmup' => [0.05], + 'bench' => [0.05], + 'rss' => 1024 * 1024 * 12 + } + }, + 'ruby-rjit' => { + 'fib' => { + 'warmup' => [0.07], + 'bench' => [0.07], + 'rss' => 1024 * 1024 * 11 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, format = builder.build + + expected_header = [ + 'bench', + 'ruby (ms)', 'stddev (%)', + 'ruby-yjit (ms)', 'stddev (%)', + 'ruby-rjit (ms)', 'stddev (%)', + 'ruby-yjit 1st itr', + 'ruby-rjit 1st itr', + 'ruby/ruby-yjit', + 'ruby/ruby-rjit' + ] + assert_equal expected_header, table[0] + + expected_format = ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f', '%.3f', '%.3f'] + assert_equal expected_format, format + end + + it 'uses bench data when warmup is missing' do + executable_names = ['ruby'] + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [], + 'bench' => [0.1, 0.11], + 'rss' => 1024 * 1024 * 10 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, _format = builder.build + + assert_equal 2, table.length + assert_equal 'fib', table[1][0] + assert_in_delta 100.0, table[1][1], 5.0 + end + + it 'sorts benchmarks with headlines first, then others, then micro' do + executable_names = ['ruby'] + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + }, + 'loop' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + }, + 'railsbench' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + }, + 'optcarrot' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, _format = builder.build + + bench_names = table[1..].map { |row| row[0] } + + assert_equal 'optcarrot', bench_names[0] + assert_equal 'railsbench', bench_names[1] + + assert_equal 'fib', bench_names[2] + assert_equal 'loop', bench_names[3] + end + + it 'sorts benchmarks alphabetically within other category' do + executable_names = ['ruby'] + bench_data = { + 'ruby' => { + 'zebra' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + }, + 'apple' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + }, + 'mango' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, _format = builder.build + + bench_names = table[1..].map { |row| row[0] } + + assert_equal ['apple', 'mango', 'zebra'], bench_names + end + + it 'handles single benchmark' do + executable_names = ['ruby'] + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, _format = builder.build + + assert_equal 2, table.length + assert_equal 'fib', table[1][0] + end + + it 'handles only headline benchmarks' do + executable_names = ['ruby'] + bench_data = { + 'ruby' => { + 'railsbench' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + }, + 'optcarrot' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 1024 * 1024 * 10 + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, _format = builder.build + + bench_names = table[1..].map { |row| row[0] } + + assert_equal ['optcarrot', 'railsbench'], bench_names + end + + it 'sorts mixed categories correctly with multiple benchmarks' do + executable_names = ['ruby'] + bench_data = { + 'ruby' => { + 'fib' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 }, + 'some_bench' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 }, + 'railsbench' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 }, + 'another_bench' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 }, + 'optcarrot' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 } + } + } + + builder = ResultsTableBuilder.new( + executable_names: executable_names, + bench_data: bench_data, + include_rss: false + ) + + table, _format = builder.build + bench_names = table[1..].map { |row| row[0] } + + assert_equal 'optcarrot', bench_names[0] + assert_equal 'railsbench', bench_names[1] + + assert_equal 'another_bench', bench_names[2] + assert_equal 'some_bench', bench_names[3] + + assert_equal 'fib', bench_names[4] + end + end +end