Add CacheBench dashboard (#6322)

huydhn · web-flow · commit 57cfb42e22a1 · 2025-02-21T17:01:00.000-08:00
Add a new benchmark dashboard for pytorch/pytorch#147546. This change is bigger than usual because I need to: * Add support to select different benchmarks from the same repo via a new `benchmarkName` parameter * Add a new mode dropdown list There are quite a handful of hardcoded benchmark-specific logic in the dashboard code now, so I think it's time to see if we can refactor them away ### Preview https://torchci-git-fork-huydhn-add-cachebench-page-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fpytorch&benchmarkName=TorchCache+Benchmark
diff --git a/clickhouse_db_schema/oss_ci_benchmark_v3_materialized_views/schema.sql b/clickhouse_db_schema/oss_ci_benchmark_v3_materialized_views/schema.sql
@@ -4,6 +4,7 @@ CREATE TABLE benchmark.oss_ci_benchmark_metadata (
     `repo` String,
     `benchmark_name` String,
     `benchmark_dtype` String,
+    `benchmark_mode` String,
     `model_name` String,
     `model_backend` String,
     `device` String,
@@ -19,6 +20,7 @@ ORDER BY
         repo,
         benchmark_name,
         benchmark_dtype,
+        benchmark_mode,
         model_name,
         model_backend,
         device,
@@ -34,6 +36,7 @@ SELECT
     repo AS repo,
     tupleElement(benchmark, 'name') AS benchmark_name,
     tupleElement(benchmark, 'dtype') AS benchmark_dtype,
+    tupleElement(benchmark, 'mode') AS benchmark_mode,
     tupleElement(model, 'name') AS model_name,
     tupleElement(model, 'backend') AS model_backend,
     IF(
@@ -54,7 +57,8 @@ SELECT
 FROM
     benchmark.oss_ci_benchmark_v3
 WHERE
-    timestamp >= toUnixTimestamp(toDateTime('2025-01-20 22:45:00'));
+    timestamp >= toUnixTimestamp(toDateTime('2025-02-19 00:00:00'))
+    AND tupleElement(benchmark, 'name') != 'sccache_stats';
 
 -- Below is the SQL query to backfill the view with all data from 2024 onward
 INSERT INTO
@@ -63,6 +67,7 @@ SELECT
     repo AS repo,
     tupleElement(benchmark, 'name') AS benchmark_name,
     tupleElement(benchmark, 'dtype') AS benchmark_dtype,
+    tupleElement(benchmark, 'mode') AS benchmark_mode,
     tupleElement(model, 'name') AS model_name,
     tupleElement(model, 'backend') AS model_backend,
     IF(
@@ -82,3 +87,5 @@ SELECT
     timestamp AS timestamp
 FROM
     benchmark.oss_ci_benchmark_v3
+WHERE
+    tupleElement(benchmark, 'name') != 'sccache_stats';
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json b/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json
@@ -4,6 +4,7 @@
     "branches": "Array(String)",
     "commits": "Array(String)",
     "device": "String",
+    "mode": "String",
     "dtypes": "Array(String)",
     "excludedMetrics": "Array(String)",
     "benchmarks": "Array(String)",
@@ -20,6 +21,7 @@
       "branches": ["main"],
       "commits": ["bb4bd5f00b35eaaecb47d17caddfbd69e1f733df"],
       "device": "",
+      "mode": "",
       "dtypes": [],
       "excludedMetrics": [],
       "benchmarks": ["PyTorch gpt-fast benchmark"],
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
@@ -12,6 +12,7 @@ WITH benchmarks AS (
         o.metric.'name' AS metric,
         floor(arrayAvg(o.metric.'benchmark_values'), 2) AS actual,
         floor(toFloat64(o.metric.'target_value'), 2) AS target,
+        o.benchmark.'mode' AS mode,
         o.benchmark.'dtype' AS dtype,
         IF(
             empty(o.runners),
@@ -47,6 +48,14 @@ WITH benchmarks AS (
             JSONExtractString(
                 tupleElement(o.benchmark, 'extra_info')['args'],
                 'tensor_parallel_size'
+            ),
+            -- Used by Cachebench
+            'is_dynamic',
+            IF(
+                tupleElement(o.benchmark, 'extra_info')['is_dynamic'] = '',
+                'false',
+                -- Default to false
+                tupleElement(o.benchmark, 'extra_info')['is_dynamic']
             )
         ) AS extra
     FROM
@@ -71,6 +80,10 @@ WITH benchmarks AS (
             has({backends: Array(String) }, o.model.'backend')
             OR empty({backends: Array(String) })
         )
+        AND (
+            o.benchmark.'mode' = {mode: String }
+            OR {mode: String } = ''
+        )
         AND (
             has({dtypes: Array(String) }, o.benchmark.'dtype')
             OR empty({dtypes: Array(String) })
@@ -91,6 +104,7 @@ SELECT DISTINCT
     metric,
     actual,
     target,
+    mode,
     dtype,
     device,
     arch,
@@ -117,6 +131,7 @@ ORDER BY
     workflow_id DESC,
     backend,
     model,
+    mode,
     dtype,
     device,
     metric
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql
@@ -5,6 +5,7 @@ SELECT DISTINCT
     model_backend AS backend,
     metric_name AS metric,
     benchmark_dtype AS dtype,
+    benchmark_mode AS mode,
     device,
     arch
 FROM
@@ -49,4 +50,5 @@ ORDER BY
     model,
     metric,
     dtype,
+    mode,
     device
diff --git a/torchci/components/NavBar.tsx b/torchci/components/NavBar.tsx
@@ -60,6 +60,10 @@ function NavBar() {
       name: "TorchAO LLMs",
       href: "/benchmark/llms?repoName=pytorch%2Fao",
     },
+    {
+      name: "PT CacheBench",
+      href: "/benchmark/llms?repoName=pytorch%2Fpytorch&benchmarkName=TorchCache+Benchmark",
+    },
     {
       name: "vLLM v1",
       href: "/benchmark/llms?repoName=vllm-project%2Fvllm",
diff --git a/torchci/components/benchmark/llms/ModelGraphPanel.tsx b/torchci/components/benchmark/llms/ModelGraphPanel.tsx
@@ -30,6 +30,7 @@ export function GraphPanel({
   queryParams,
   granularity,
   repoName,
+  benchmarkName,
   modelName,
   backendName,
   dtypeName,
@@ -41,6 +42,7 @@ export function GraphPanel({
   queryParams: { [key: string]: any };
   granularity: Granularity;
   repoName: string;
+  benchmarkName: string;
   modelName: string;
   backendName: string;
   dtypeName: string;
@@ -170,6 +172,12 @@ export function GraphPanel({
                 } else {
                   record.display = `${model} / tp${tensorParallel}`;
                 }
+              } else if (
+                repoName === "pytorch/pytorch" &&
+                benchmarkName === "TorchCache Benchmark"
+              ) {
+                const isDynamic = record.extra!["is_dynamic"];
+                record.display = `${model} / ${isDynamic}`;
               } else {
                 record.display = model.includes(dtype)
                   ? model.includes(device)
diff --git a/torchci/components/benchmark/llms/SummaryPanel.tsx b/torchci/components/benchmark/llms/SummaryPanel.tsx
@@ -21,14 +21,15 @@ const getDeviceArch = (
 ) => {
   const d = device ? device : "";
   const a = arch ? arch : "";
-  return `${d} (${a})`;
+  return a === "" ? d : `${d} (${a})`;
 };
 
 export function SummaryPanel({
   startTime,
   stopTime,
   granularity,
   repoName,
+  benchmarkName,
   modelName,
   backendName,
   metricNames,
@@ -40,6 +41,7 @@ export function SummaryPanel({
   stopTime: dayjs.Dayjs;
   granularity: Granularity;
   repoName: string;
+  benchmarkName: string;
   modelName: string;
   backendName: string;
   metricNames: string[];
@@ -56,7 +58,13 @@ export function SummaryPanel({
   const rCommit = rPerfData.commit;
   const rData = rPerfData.data;
 
-  const data = combineLeftAndRight(repoName, lPerfData, rPerfData);
+  const data = combineLeftAndRight(
+    repoName,
+    benchmarkName,
+    lPerfData,
+    rPerfData
+  );
+  console.log(data);
   const columns: any[] = [
     {
       field: "metadata",
@@ -87,6 +95,10 @@ export function SummaryPanel({
           return `Invalid model name`;
         }
 
+        const mode =
+          metadata.mode !== undefined
+            ? `&modeName=${encodeURIComponent(metadata.mode)}`
+            : "";
         const dtype =
           metadata.dtype !== undefined
             ? `&dtypeName=${encodeURIComponent(metadata.dtype)}`
@@ -99,58 +111,90 @@ export function SummaryPanel({
 
         const url = `/benchmark/llms?startTime=${startTime}&stopTime=${stopTime}&granularity=${granularity}&repoName=${encodeURIComponent(
           repoName
+        )}&benchmarkName=${encodeURIComponent(
+          benchmarkName
         )}&modelName=${encodeURIComponent(
           model
-        )}${backend}${dtype}&deviceName=${encodeURIComponent(
+        )}${backend}${mode}${dtype}&deviceName=${encodeURIComponent(
           deviceName
         )}&archName=${encodeURIComponent(archName)}`;
 
+        const displayName =
+          metadata.origins.length !== 0
+            ? `${model} (${metadata.origins.join(",")})`
+            : model;
         return (
           <a href={url}>
-            <b>{model}</b>
+            <b>{displayName}</b>
           </a>
         );
       },
     },
   ];
 
-  const hasDtype = data.length > 0 && "dtype" in data[0] ? true : false;
-  if (hasDtype) {
+  const hasMode = data.length > 0 && "mode" in data[0] ? true : false;
+  if (hasMode) {
     columns.push({
-      field: "dtype",
-      headerName: "Quantization",
+      field: "mode",
+      headerName: "Mode",
       flex: 1,
       renderCell: (params: GridRenderCellParams<any>) => {
         return `${params.value}`;
       },
     });
   }
 
-  const hasBackend = data.length > 0 && "backend" in data[0] ? true : false;
-  if (hasBackend) {
+  if (repoName === "vllm-project/vllm") {
     columns.push({
-      field: "backend",
-      headerName: "Backend",
+      field: "tensor_parallel_size",
+      headerName: "Tensor parallel",
+      flex: 1,
+      renderCell: (params: GridRenderCellParams<any>) => {
+        return `${params.value}`;
+      },
+    });
+
+    columns.push({
+      field: "request_rate",
+      headerName: "Request rate",
       flex: 1,
       renderCell: (params: GridRenderCellParams<any>) => {
         return `${params.value}`;
       },
     });
   }
 
-  if (repoName === "vllm-project/vllm") {
+  if (
+    repoName === "pytorch/pytorch" &&
+    benchmarkName === "TorchCache Benchmark"
+  ) {
     columns.push({
-      field: "tensor_parallel_size",
-      headerName: "Tensor parallel",
+      field: "is_dynamic",
+      headerName: "Is dynamic?",
       flex: 1,
       renderCell: (params: GridRenderCellParams<any>) => {
         return `${params.value}`;
       },
     });
+  }
 
+  const hasDtype = data.length > 0 && "dtype" in data[0] ? true : false;
+  if (hasDtype) {
     columns.push({
-      field: "request_rate",
-      headerName: "Request rate",
+      field: "dtype",
+      headerName: "Quantization",
+      flex: 1,
+      renderCell: (params: GridRenderCellParams<any>) => {
+        return `${params.value}`;
+      },
+    });
+  }
+
+  const hasBackend = data.length > 0 && "backend" in data[0] ? true : false;
+  if (hasBackend && benchmarkName !== "TorchCache Benchmark") {
+    columns.push({
+      field: "backend",
+      headerName: "Backend",
       flex: 1,
       renderCell: (params: GridRenderCellParams<any>) => {
         return `${params.value}`;
diff --git a/torchci/components/benchmark/llms/common.tsx b/torchci/components/benchmark/llms/common.tsx
@@ -15,6 +15,9 @@ export const EXCLUDED_METRICS: string[] = [
   "std_itl_ms",
   "std_tpot_ms",
   "std_ttft_ms",
+  "cold_compile_time(s)",
+  "warm_compile_time(s)",
+  "speedup_pct",
 ];
 export const DEFAULT_MODEL_NAME = "All Models";
 export const SCALE = 2;
@@ -59,6 +62,10 @@ export const IS_INCREASING_METRIC_VALUE_GOOD: { [k: string]: boolean } = {
   p99_ttft_ms: false,
   requests_per_second: true,
   tokens_per_second: true,
+  "Cold compile time (s)": false,
+  "Warm compile time (s)": false,
+  Speedup: true,
+  "Speedup (%)": true,
 };
 export const METRIC_DISPLAY_SHORT_HEADERS: { [k: string]: string } = {
   "memory_bandwidth(GB/s)": "Bandwidth",
@@ -70,10 +77,13 @@ export const METRIC_DISPLAY_SHORT_HEADERS: { [k: string]: string } = {
   "peak_inference_mem_usage(mb)": "InferenceMem",
   "peak_load_mem_usuage(mb)": "LoadMem",
   "generate_time(ms)": "GenerateTime",
+  "Cold compile time (s)": "ColdCompTime",
+  "Warm compile time (s)": "WarmCompTime",
 };
 export const DEFAULT_DEVICE_NAME = "All Devices";
 export const DEFAULT_ARCH_NAME = "All Platforms";
 export const DEFAULT_DTYPE_NAME = "All DType";
+export const DEFAULT_MODE_NAME = "All Modes";
 export const DEFAULT_BACKEND_NAME = "All Backends";
 
 // Only used by ExecuTorch for now
@@ -94,6 +104,7 @@ export interface LLMsBenchmarkData {
   metric: string;
   actual: number;
   target: number;
+  mode?: string;
   dtype: string;
   device: string;
   arch: string;
diff --git a/torchci/lib/benchmark/llmUtils.ts b/torchci/lib/benchmark/llmUtils.ts
diff --git a/torchci/pages/benchmark/llms.tsx b/torchci/pages/benchmark/llms.tsx