diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py index 76f0e533389..ba6142a4826 100755 --- a/.github/scripts/extract_benchmark_results.py +++ b/.github/scripts/extract_benchmark_results.py @@ -229,11 +229,7 @@ def extract_ios_metric( elif method == "forward": if metric_name == "Clock Monotonic Time, s": - benchmark_result["metric"] = ( - "generate_time(ms)" - if "llama" in test_name - else "avg_inference_latency(ms)" - ) + benchmark_result["metric"] = "avg_inference_latency(ms)" benchmark_result["actualValue"] = metric_value * 1000 elif metric_name == "Memory Peak Physical, kB": @@ -241,9 +237,14 @@ def extract_ios_metric( benchmark_result["metric"] = "peak_inference_mem_usage(mb)" benchmark_result["actualValue"] = metric_value / 1024 - elif method == "generate" and metric_name == "Tokens Per Second, t/s": - benchmark_result["metric"] = "token_per_sec" - benchmark_result["actualValue"] = metric_value + elif method == "generate": + if metric_name == "Clock Monotonic Time, s": + benchmark_result["metric"] = "generate_time(ms)" + benchmark_result["actualValue"] = metric_value * 1000 + + elif metric_name == "Tokens Per Second, t/s": + benchmark_result["metric"] = "token_per_sec" + benchmark_result["actualValue"] = metric_value return benchmark_result diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm index 16c1c1c1d6a..332c3986b0b 100644 --- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm +++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm @@ -82,7 +82,7 @@ @implementation LLaMATests return; } TokensPerSecondMetric *tokensPerSecondMetric = [TokensPerSecondMetric new]; - [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTMemoryMetric new] ] + [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTClockMetric new], [XCTMemoryMetric new] ] block:^{ tokensPerSecondMetric.tokenCount = 0; const auto status = runner->generate(