Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions .github/workflows/update-s3-html.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Update S3 HTML indices for download.pytorch.org

on:
schedule:
# Update the indices every 30 minutes
- cron: "*/30 * * * *"
workflow_dispatch:

permissions:
id-token: write
contents: read

jobs:
update:
runs-on: ubuntu-22.04
environment: pytorchbot-env
strategy:
matrix:
prefix: ["whl", "whl/test", "whl/nightly", "whl/lts/1.8"]
fail-fast: False
container:
image: continuumio/miniconda3:4.12.0
steps:
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_update
aws-region: us-east-1
- name: Checkout repository test-infra
uses: actions/checkout@v3
with:
repository: pytorch/test-infra
ref: ${{ github.ref }}
- name: Update s3 html index
run: |
set -ex

# Create Conda Environment
conda create --quiet -y --prefix run_env python="3.8"
conda activate ./run_env

# Install requirements
pip install -r s3_management/requirements.txt
python s3_management/manage.py --generate-pep503 ${{ matrix.prefix }}
3 changes: 3 additions & 0 deletions s3_management/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# s3_management

This directory houses scripts to maintain the s3 HTML indices for https://download.pytorch.org/whl
73 changes: 73 additions & 0 deletions s3_management/backup_conda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python3
# Downloads domain pytorch and library packages from channel
# And backs them up to S3
# Do not use unless you know what you are doing
# Usage: python backup_conda.py --version 1.6.0

import boto3
from typing import List, Optional
import conda.api
import urllib
import os
import hashlib
import argparse

S3 = boto3.resource('s3')
BUCKET = S3.Bucket('pytorch-backup')
_known_subdirs = ["linux-64", "osx-64", "osx-arm64", "win-64"]


def compute_md5(path:str) -> str:
with open(path, "rb") as f:
return hashlib.md5(f.read()).hexdigest()


def download_conda_package(package:str, version:Optional[str] = None,
depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]:
packages = conda.api.SubdirData.query_all(package,
channels = [channel] if channel is not None else None,
subdirs = _known_subdirs)
rc = []

for pkg in packages:
if version is not None and pkg.version != version:
continue
if depends is not None and depends not in pkg.depends:
continue

print(f"Downloading {pkg.url}...")
os.makedirs(pkg.subdir, exist_ok = True)
fname = f"{pkg.subdir}/{pkg.fn}"
if not os.path.exists(fname):
with open(fname, "wb") as f, urllib.request.urlopen(pkg.url) as url:
f.write(url.read())
if compute_md5(fname) != pkg.md5:
print(f"md5 of {fname} is {compute_md5(fname)} does not match {pkg.md5}")
continue
rc.append(fname)

return rc

def upload_to_s3(prefix: str, fnames: List[str]) -> None:
for fname in fnames:
BUCKET.upload_file(fname, f"{prefix}/{fname}")
print(fname)



if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--version",
help="PyTorch Version to backup",
type=str,
required = True
)
options = parser.parse_args()
rc = download_conda_package("pytorch", channel = "pytorch", version = options.version)
upload_to_s3(f"v{options.version}/conda", rc)

for libname in ["torchvision", "torchaudio", "torchtext"]:
print(f"processing {libname}")
rc = download_conda_package(libname, channel = "pytorch", depends = f"pytorch {options.version}")
upload_to_s3(f"v{options.version}/conda", rc)
Loading