[Benchmark]Deprecate v2 (#9238)

yangw-dev · web-flow · commit a131826b5e74 · 2025-03-13T15:26:36.000-07:00
Issue: pytorch/test-infra#6294 Remove benchmark v2 schema logics, still keep the way to store v3 with v3 folder, since we might have higher version of schema in the future next step is introduce the failure handling for benchmark record
diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py
@@ -86,36 +86,6 @@ def parse_args() -> Any:
         action=ValidateDir,
         help="the directory to keep the benchmark results",
     )
-    parser.add_argument(
-        "--repo",
-        type=str,
-        required=True,
-        help="which GitHub repo this workflow run belongs to",
-    )
-    parser.add_argument(
-        "--head-branch",
-        type=str,
-        required=True,
-        help="the head branch that runs",
-    )
-    parser.add_argument(
-        "--workflow-name",
-        type=str,
-        required=True,
-        help="the name of the benchmark workflow",
-    )
-    parser.add_argument(
-        "--workflow-run-id",
-        type=int,
-        required=True,
-        help="the id of the benchmark workflow",
-    )
-    parser.add_argument(
-        "--workflow-run-attempt",
-        type=int,
-        required=True,
-        help="which retry of the workflow this is",
-    )
     parser.add_argument(
         "--benchmark-configs",
         type=str,
@@ -153,9 +123,10 @@ def extract_android_benchmark_results(
         # This is to handle the case where there is no benchmark results
         warning(f"Fail to load the benchmark results from {artifact_s3_url}")
         return []
+    return []
 
 
-def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
+def initialize_ios_metadata(test_name: str) -> Dict[str, Any]:
     """
     Extract the benchmark metadata from the test name, for example:
         test_forward_llama2_pte_iOS_17_2_1_iPhone15_4
@@ -364,14 +335,7 @@ def transform(
     app_type: str,
     benchmark_results: List,
     benchmark_config: Dict[str, str],
-    repo: str,
-    head_branch: str,
-    workflow_name: str,
-    workflow_run_id: int,
-    workflow_run_attempt: int,
     job_name: str,
-    job_id: int,
-    schema_version: str,
 ) -> List:
     """
     Transform the benchmark results into the format writable into the benchmark database
@@ -381,87 +345,51 @@ def transform(
     for r in benchmark_results:
         r["deviceInfo"]["device"] = job_name
 
-    if schema_version == "v2":
-        # TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
-        return [
-            {
-                # GH-info to identify where the benchmark is run
-                "repo": repo,
-                "head_branch": head_branch,
-                "workflow_id": workflow_run_id,
-                "run_attempt": workflow_run_attempt,
-                "job_id": job_id,
-                # The model
-                "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
-                "dtype": (
-                    r["benchmarkModel"]["quantization"]
-                    if r["benchmarkModel"]["quantization"]
-                    else "unknown"
-                ),
-                # The metric value
-                "metric": r["metric"],
-                "actual": r["actualValue"],
-                "target": r["targetValue"],
-                # The device
-                "device": r["deviceInfo"]["device"],
-                "arch": r["deviceInfo"].get("os", ""),
-                # Not used here, just set it to something unique here
-                "filename": workflow_name,
-                "test_name": app_type,
-                "runner": job_name,
-            }
-            for r in benchmark_results
-        ]
-    elif schema_version == "v3":
-        v3_benchmark_results = []
-        # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
-        return [
-            {
-                "benchmark": {
-                    "name": "ExecuTorch",
-                    "mode": "inference",
-                    "extra_info": {
-                        "app_type": app_type,
-                        # Just keep a copy of the benchmark config here
-                        "benchmark_config": json.dumps(benchmark_config),
-                    },
-                },
-                "model": {
-                    "name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
-                    "type": "OSS model",
-                    "backend": benchmark_config.get(
-                        "config", r["benchmarkModel"].get("backend", "")
-                    ),
+    # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
+    return [
+        {
+            "benchmark": {
+                "name": "ExecuTorch",
+                "mode": "inference",
+                "extra_info": {
+                    "app_type": app_type,
+                    # Just keep a copy of the benchmark config here
+                    "benchmark_config": json.dumps(benchmark_config),
                 },
-                "metric": {
-                    "name": r["metric"],
-                    "benchmark_values": [r["actualValue"]],
-                    "target_value": r["targetValue"],
-                    "extra_info": {
-                        "method": r.get("method", ""),
-                    },
+            },
+            "model": {
+                "name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
+                "type": "OSS model",
+                "backend": benchmark_config.get(
+                    "config", r["benchmarkModel"].get("backend", "")
+                ),
+            },
+            "metric": {
+                "name": r["metric"],
+                "benchmark_values": [r["actualValue"]],
+                "target_value": r["targetValue"],
+                "extra_info": {
+                    "method": r.get("method", ""),
                 },
-                "runners": [
-                    {
-                        "name": r["deviceInfo"]["device"],
-                        "type": r["deviceInfo"]["os"],
-                        "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
-                        "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
-                    }
-                ],
-            }
-            for r in benchmark_results
-        ]
+            },
+            "runners": [
+                {
+                    "name": r["deviceInfo"]["device"],
+                    "type": r["deviceInfo"]["os"],
+                    "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
+                    "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
+                }
+            ],
+        }
+        for r in benchmark_results
+    ]
 
 
 def main() -> None:
     args = parse_args()
 
     # Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
-    all_benchmark_results = {
-        "v2": [],
-        "v3": [],
-    }
+    all_benchmark_results = []
     benchmark_config = {}
 
     with open(args.artifacts) as f:
@@ -482,7 +410,7 @@ def main() -> None:
                 benchmark_config = read_benchmark_config(
                     artifact_s3_url, args.benchmark_configs
                 )
-
+            benchmark_results = []
             if app_type == "ANDROID_APP":
                 benchmark_results = extract_android_benchmark_results(
                     job_name, artifact_type, artifact_s3_url
@@ -494,32 +422,17 @@ def main() -> None:
                 )
 
             if benchmark_results:
-                for schema in all_benchmark_results.keys():
-                    results = transform(
-                        app_type,
-                        benchmark_results,
-                        benchmark_config,
-                        args.repo,
-                        args.head_branch,
-                        args.workflow_name,
-                        args.workflow_run_id,
-                        args.workflow_run_attempt,
-                        job_name,
-                        extract_job_id(args.artifacts),
-                        schema,
-                    )
-                    all_benchmark_results[schema].extend(results)
-
-    for schema in all_benchmark_results.keys():
-        if not all_benchmark_results.get(schema):
-            continue
-
-        output_dir = os.path.join(args.output_dir, schema)
-        os.makedirs(output_dir, exist_ok=True)
+                results = transform(
+                    app_type, benchmark_results, benchmark_config, job_name
+                )
+                all_benchmark_results.extend(results)
 
+        # add v3 in case we have higher version of schema
+        output_dir = os.path.join(args.output_dir, "v3")
+        os.makedirs(output_dir, exist_ok=True)
         output_file = os.path.basename(args.artifacts)
         with open(f"{output_dir}/{output_file}", "w") as f:
-            json.dump(all_benchmark_results[schema], f)
+            json.dump(all_benchmark_results, f)
 
 
 if __name__ == "__main__":
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -462,29 +462,14 @@ jobs:
             ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
               --artifacts "${ARTIFACTS_BY_JOB}" \
               --output-dir benchmark-results \
-              --repo ${{ github.repository }} \
-              --head-branch ${{ github.head_ref || github.ref_name }} \
-              --workflow-name "${{ github.workflow }}" \
-              --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }} \
               --benchmark-configs benchmark-configs
           done
 
-          for SCHEMA in v2 v3; do
-            for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
-              cat "${BENCHMARK_RESULTS}"
-              echo
-            done
+          for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
+            cat "${BENCHMARK_RESULTS}"
+            echo
           done
 
-      # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
-      - name: Upload the benchmark results (v2)
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
-        with:
-          benchmark-results-dir: benchmark-results/v2
-          dry-run: false
-          schema-version: v2
-
       - name: Upload the benchmark results (v3)
         uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
         with:
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -521,29 +521,14 @@ jobs:
             ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
               --artifacts "${ARTIFACTS_BY_JOB}" \
               --output-dir benchmark-results \
-              --repo ${{ github.repository }} \
-              --head-branch ${{ github.head_ref || github.ref_name }} \
-              --workflow-name "${{ github.workflow }}" \
-              --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }} \
               --benchmark-configs benchmark-configs
           done
 
-          for SCHEMA in v2 v3; do
-            for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
-              cat "${BENCHMARK_RESULTS}"
-              echo
-            done
+          for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
+            cat "${BENCHMARK_RESULTS}"
+            echo
           done
 
-      # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
-      - name: Upload the benchmark results (v2)
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
-        with:
-          benchmark-results-dir: benchmark-results/v2
-          dry-run: false
-          schema-version: v2
-
       - name: Upload the benchmark results (v3)
         uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
         with: