Skip to content

Commit c661a34

Browse files
authored
Upload benchmark results in v3 schema to ossci-benchmarks S3 bucket (#5845)
After #5839, it's time to upload the GHA to upload to S3. I'll update the S3 lambda replicator in a separate PR. ### Testing * Locally ``` # Backward compatibility, upload to both dynamoDB and S3 for v2 schema $python upload_benchmark_results.py --benchmark-results-dir benchmark-results-dir-for-testing/v2 --schema-version v2 --dry-run INFO:root:Uploading benchmark-results-dir-for-testing/v2/android-artifacts-31017223108.json to dynamoDB (v2) INFO:root:Writing 16 documents to DynamoDB torchci-oss-ci-benchmark INFO:root:Upload benchmark-results-dir-for-testing/v2/android-artifacts-31017223108.json to s3://ossci-benchmarks/v2/pytorch/executorch/12345/31017223108/android-artifacts-31017223108.json INFO:root:Uploading benchmark-results-dir-for-testing/v2/android-artifacts-31017223431.json to dynamoDB (v2) INFO:root:Writing 12 documents to DynamoDB torchci-oss-ci-benchmark INFO:root:Upload benchmark-results-dir-for-testing/v2/android-artifacts-31017223431.json to s3://ossci-benchmarks/v2/pytorch/executorch/12345/31017223431/android-artifacts-31017223431.json # We use only S3 for v3 schema $python upload_benchmark_results.py --benchmark-results-dir benchmark-results-dir-for-testing/v3 --schema-version v3 INFO:root:Upload benchmark-results-dir-for-testing/v3/mock.json to s3://ossci-benchmarks/v3/pytorch/pytorch/1/1/mock.json ``` * CI * v2 https://github.com/pytorch/test-infra/actions/runs/11606273442/job/32318017857?pr=5845#step:4:55 * v3 https://github.com/pytorch/test-infra/actions/runs/11606273442/job/32318017857?pr=5845#step:5:43 * Test PR on ExecuTorch to use the new version https://github.com/pytorch/executorch/actions/runs/11606339159 to see that the files are uploaded to S3 https://github.com/pytorch/executorch/actions/runs/11606339159/job/32318826449#step:8:87
1 parent 25dab1e commit c661a34

File tree

6 files changed

+110
-16
lines changed

6 files changed

+110
-16
lines changed

.github/actions/upload-benchmark-results/action.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ inputs:
66
required: True
77
dry-run:
88
default: 'true'
9+
# TODO (huydhn): Use this to gate the migration to oss_ci_benchmark_v3 on S3
10+
schema-version:
11+
default: 'v2'
912

1013
runs:
1114
using: composite
@@ -16,21 +19,22 @@ runs:
1619
set -eux
1720
python3 -mpip install boto3==1.35.33
1821
19-
# TODO (huydhn): Once the generic benchmark database is ready, this will be
20-
# uploaded to S3 instead
21-
- name: Upload benchmark results to DynamoDB
22+
- name: Upload benchmark results
2223
shell: bash
2324
env:
2425
BENCHMARK_RESULTS_DIR: ${{ inputs.benchmark-results-dir }}
2526
DRY_RUN: ${{ inputs.dry-run }}
27+
SCHEMA_VERSION: ${{ inputs.schema-version }}
2628
run: |
2729
set -eux
2830
2931
if [[ "${DRY_RUN}" == "true" ]]; then
3032
python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \
3133
--benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" \
34+
--schema-version "${SCHEMA_VERSION}" \
3235
--dry-run
3336
else
3437
python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \
35-
--benchmark-results-dir "${BENCHMARK_RESULTS_DIR}"
38+
--benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" \
39+
--schema-version "${SCHEMA_VERSION}"
3640
fi
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{}, {"repo": "pytorch/pytorch"}, {"repo": "pytorch/pytorch", "workflow_id": 1}, {"repo": "pytorch/pytorch", "workflow_id": 1, "job_id": 1}]

.github/scripts/upload_benchmark_results.py

Lines changed: 91 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8+
import gzip
89
import hashlib
910
import json
1011
import logging
@@ -21,6 +22,9 @@
2122
logging.basicConfig(level=logging.INFO)
2223

2324

25+
OSSCI_BENCHMARKS_BUCKET = "ossci-benchmarks"
26+
27+
2428
class ValidateDir(Action):
2529
def __call__(
2630
self,
@@ -57,6 +61,13 @@ def parse_args() -> Any:
5761
default="torchci-oss-ci-benchmark",
5862
help="the name of the DynamoDB table to upload to",
5963
)
64+
# v3 is defined at torchci/clickhouse_queries/oss_ci_benchmark_v3/query.sql
65+
parser.add_argument(
66+
"--schema-version",
67+
choices=["v2", "v3"],
68+
required=True,
69+
help="the database schema to use",
70+
)
6071

6172
return parser.parse_args()
6273

@@ -69,7 +80,6 @@ def default(self, o: Any) -> Any:
6980
return super().default(o)
7081

7182

72-
# TODO (huydhn): This can be replaced by S3 path once we move to S3
7383
def generate_partition_key(doc: Dict[str, Any]) -> str:
7484
"""
7585
Generate an unique partition key for the document on DynamoDB
@@ -106,24 +116,95 @@ def upload_to_dynamodb(
106116
batch.put_item(Item=doc)
107117

108118

119+
def generate_s3_path(filepath: str, schema_version: str) -> Optional[str]:
120+
with open(filepath) as f:
121+
docs = json.load(f)
122+
123+
if not docs:
124+
info(f"{filepath} is empty")
125+
return ""
126+
127+
for doc in docs:
128+
repo = doc.get("repo", "")
129+
workflow_id = doc.get("workflow_id", 0)
130+
job_id = doc.get("job_id", 0)
131+
servicelab_experiment_id = doc.get("servicelab_experiment_id", 0)
132+
servicelab_trial_id = doc.get("servicelab_trial_id", 0)
133+
134+
# Also handle service lab records here
135+
workflow_id = workflow_id if workflow_id else servicelab_experiment_id
136+
job_id = job_id if job_id else servicelab_trial_id
137+
138+
# We just need one record here to get some metadata to generate the s3 path
139+
if repo and workflow_id and job_id:
140+
break
141+
142+
if not repo or not workflow_id or not job_id:
143+
info(
144+
f"{filepath} is without any information about the repo, workflow, or job id"
145+
)
146+
return ""
147+
148+
filename = os.path.basename(filepath)
149+
return f"{schema_version}/{repo}/{workflow_id}/{job_id}/{filename}"
150+
151+
152+
def upload_to_s3(
153+
s3_bucket: str,
154+
filepath: str,
155+
schema_version: str,
156+
dry_run: bool = True,
157+
) -> None:
158+
"""
159+
Upload the benchmark results to S3
160+
"""
161+
s3_path = generate_s3_path(filepath, schema_version)
162+
if not s3_path:
163+
info(f"Could not generate an S3 path for {filepath}, skipping...")
164+
return
165+
166+
info(f"Upload {filepath} to s3://{s3_bucket}/{s3_path}")
167+
if not dry_run:
168+
# Copied from upload stats script
169+
with open(filepath) as f:
170+
boto3.resource("s3").Object(
171+
f"{s3_bucket}",
172+
f"{s3_path}",
173+
).put(
174+
Body=gzip.compress(f.read().encode()),
175+
ContentEncoding="gzip",
176+
ContentType="application/json",
177+
)
178+
179+
109180
def main() -> None:
110181
args = parse_args()
182+
schema_version = args.schema_version
111183

112184
for file in os.listdir(args.benchmark_results_dir):
113185
if not file.endswith(".json"):
114186
continue
115187

116188
filepath = os.path.join(args.benchmark_results_dir, file)
117-
info(f"Loading {filepath}")
118189

119-
with open(filepath) as f:
120-
upload_to_dynamodb(
121-
dynamodb_table=args.dynamodb_table,
122-
# NB: DynamoDB only accepts decimal number, not float
123-
docs=json.load(f, parse_float=Decimal),
124-
generate_partition_key=generate_partition_key,
125-
dry_run=args.dry_run,
126-
)
190+
# NB: This is for backward compatibility before we move to schema v3
191+
if schema_version == "v2":
192+
with open(filepath) as f:
193+
info(f"Uploading {filepath} to dynamoDB ({schema_version})")
194+
upload_to_dynamodb(
195+
dynamodb_table=args.dynamodb_table,
196+
# NB: DynamoDB only accepts decimal number, not float
197+
docs=json.load(f, parse_float=Decimal),
198+
generate_partition_key=generate_partition_key,
199+
dry_run=args.dry_run,
200+
)
201+
202+
upload_to_s3(
203+
s3_bucket=OSSCI_BENCHMARKS_BUCKET,
204+
filepath=filepath,
205+
schema_version=schema_version,
206+
dry_run=args.dry_run,
207+
)
127208

128209

129210
if __name__ == "__main__":

.github/workflows/test_upload_benchmark_results.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,16 @@ jobs:
1313
steps:
1414
- uses: actions/checkout@v3
1515

16-
- name: Test upload the benchmark results
16+
- name: Test upload the benchmark results (v2)
1717
uses: ./.github/actions/upload-benchmark-results
1818
with:
19-
benchmark-results-dir: .github/scripts/benchmark-results-dir-for-testing
19+
benchmark-results-dir: .github/scripts/benchmark-results-dir-for-testing/v2
20+
schema-version: v2
21+
dry-run: true
22+
23+
- name: Test upload the benchmark results (v3)
24+
uses: ./.github/actions/upload-benchmark-results
25+
with:
26+
benchmark-results-dir: .github/scripts/benchmark-results-dir-for-testing/v3
27+
schema-version: v3
2028
dry-run: true

0 commit comments

Comments
 (0)