Skip to content

Commit 8acbaa9

Browse files
authored
Refactor wheel upload job to a separate job running on GH ephemeral runner (#4877)
To run the upload part in a separate upload job on GH ephemeral runners, we need: 1. Specific artifact name for each binary, so the upload job could find the correct one. 2. Create a new GHA `setup-binary-upload` to: 1. Download the artifacts from GitHub 2. Running `pkg-helpers` is needed to figure out the correct S3 bucket and path to upload to. 3. Create a new GHA reusable workflow `_binary_upload` to upload the artifacts to S3. 1. Run on GH ephemeral runner `ubuntu-22.04`. 2. Only this job has access to the credential, the build job doesn't have that privilege anymore. A small caveat here is that the upload job will depend on the build job with all its configuration matrix, so it can only be run after all build configurations finish successfully, not when individual builds finish. The PR is quite big, so I will do a similar follow up for conda build after this using the same `_binary_upload` reusable workflow.
1 parent 5a8239d commit 8acbaa9

File tree

6 files changed

+226
-109
lines changed

6 files changed

+226
-109
lines changed

.github/actions/setup-binary-builds/action.yml

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ inputs:
77
description: If set to any value, don't use sudo to clean the workspace
88
required: false
99
type: string
10-
default: ""
10+
default: ''
1111
ref:
12-
description: Works as stated in actions/checkout, but the default value is recursive
12+
description: Works as stated in actions/checkout
1313
required: false
1414
type: string
1515
default: nightly
@@ -19,15 +19,27 @@ inputs:
1919
type: string
2020
default: recursive
2121
setup-miniconda:
22-
description: Works as stated in actions/checkout, but the default value is recursive
22+
description: Set to true if setup-miniconda is needed
2323
required: false
2424
type: boolean
2525
default: false
2626
python-version:
27-
description: Works as stated in actions/checkout, but the default value is recursive
27+
description: The target Python version
28+
required: true
29+
type: string
30+
cuda-version:
31+
description: The target CUDA version
32+
required: true
33+
type: string
34+
arch:
35+
description: The target ARCH
36+
required: true
37+
type: string
38+
upload-to-base-bucket:
39+
description: One of the parameter used by pkg-helpers
2840
required: false
2941
type: boolean
30-
default: false
42+
default: no
3143

3244
runs:
3345
using: composite
@@ -62,11 +74,13 @@ runs:
6274
shell: bash
6375
env:
6476
PYTHON_VERSION: ${{ inputs.python-version }}
77+
CU_VERSION: ${{ inputs.cuda-version }}
78+
ARCH: ${{ inputs.arch }}
6579
run: |
6680
set -euxo pipefail
6781
# Set artifact name here since github actions doesn't have string manipulation tools
6882
# and "/" is not allowed in artifact names
69-
echo "ARTIFACT_NAME=${REPOSITORY/\//_}_${REF}_${PYTHON_VERSION}" >> "${GITHUB_ENV}"
83+
echo "ARTIFACT_NAME=${REPOSITORY/\//_}_${REF}_${PYTHON_VERSION}_${CU_VERSION}_${ARCH}" >> "${GITHUB_ENV}"
7084
- name: Setup miniconda (for pytorch_pkg_helpers)
7185
if: ${{ inputs.setup-miniconda == 'true' }}
7286
uses: conda-incubator/[email protected]
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
name: Set up binary upload jobs
2+
3+
description: Setup a GitHub ephemeral runner to upload binary wheel and conda artifacts
4+
5+
inputs:
6+
repository:
7+
description: The repository name, i.e. pytorch/vision
8+
required: true
9+
type: string
10+
ref:
11+
description: Part of the artifact name
12+
required: false
13+
type: string
14+
default: ''
15+
python-version:
16+
description: Part of the artifact name
17+
required: true
18+
type: string
19+
cuda-version:
20+
description: Part of the artifact name
21+
required: true
22+
type: string
23+
arch:
24+
description: Part of the artifact name
25+
required: true
26+
type: string
27+
upload-to-base-bucket:
28+
description: One of the parameter used by pkg-helpers
29+
required: false
30+
type: boolean
31+
default: no
32+
33+
runs:
34+
using: composite
35+
steps:
36+
- uses: actions/setup-python@v4
37+
with:
38+
python-version: '3.11'
39+
cache: pip
40+
41+
- name: Set the artifact name
42+
shell: bash
43+
env:
44+
REPOSITORY: ${{ inputs.repository }}
45+
REF: ${{ inputs.ref }}
46+
PYTHON_VERSION: ${{ inputs.python-version }}
47+
CU_VERSION: ${{ inputs.cuda-version }}
48+
ARCH: ${{ inputs.arch }}
49+
run: |
50+
set -ex
51+
52+
# Set artifact name here since github actions doesn't have string manipulation tools
53+
# and "/" is not allowed in artifact names
54+
echo "ARTIFACT_NAME=${REPOSITORY/\//_}_${REF}_${PYTHON_VERSION}_${CU_VERSION}_${ARCH}" >> "${GITHUB_ENV}"
55+
56+
- name: Generate env variables from pytorch_pkg_helpers
57+
shell: bash
58+
env:
59+
REPOSITORY: ${{ inputs.repository }}
60+
REF: ${{ inputs.ref }}
61+
PYTHON_VERSION: ${{ inputs.python-version }}
62+
CU_VERSION: ${{ inputs.cuda-version }}
63+
ARCH: ${{ inputs.arch }}
64+
run: |
65+
set -ex
66+
67+
python -m pip install tools/pkg-helpers
68+
69+
BUILD_ENV_FILE="${RUNNER_TEMP}/build_env_${GITHUB_RUN_ID}"
70+
python -m pytorch_pkg_helpers > "${BUILD_ENV_FILE}"
71+
72+
cat "${BUILD_ENV_FILE}"
73+
echo "BUILD_ENV_FILE=${BUILD_ENV_FILE}" >> "${GITHUB_ENV}"
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
name: upload
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
repository:
7+
description: 'Repository to checkout, defaults to ""'
8+
default: ''
9+
type: string
10+
ref:
11+
description: 'Reference to checkout, defaults to "nightly"'
12+
default: 'nightly'
13+
type: string
14+
build-matrix:
15+
description: "Build matrix to utilize"
16+
default: ''
17+
type: string
18+
architecture:
19+
description: Architecture to build for x86_64 for default Linux, or aarch64 for Linux aarch64 builds
20+
required: false
21+
type: string
22+
default: ''
23+
trigger-event:
24+
description: "Trigger Event in caller that determines whether or not to upload"
25+
type: string
26+
default: ''
27+
28+
jobs:
29+
upload:
30+
runs-on: ubuntu-22.04
31+
environment: ${{(inputs.trigger-event == 'push' && (startsWith(github.event.ref, 'refs/heads/nightly') || startsWith(github.event.ref, 'refs/tags/v'))) && 'pytorchbot-env' || ''}}
32+
strategy:
33+
fail-fast: false
34+
matrix: ${{ fromJSON(inputs.build-matrix) }}
35+
timeout-minutes: 30
36+
name: ${{ matrix.build_name }}
37+
steps:
38+
- uses: actions/checkout@v3
39+
40+
# For pytorch_pkg_helpers which we need to run to generate the artifact name and target S3 buckets
41+
- uses: ./.github/actions/setup-binary-upload
42+
with:
43+
repository: ${{ inputs.repository }}
44+
ref: ${{ inputs.ref }}
45+
python-version: ${{ matrix.python_version }}
46+
cuda-version: ${{ matrix.desired_cuda }}
47+
arch: ${{ inputs.architecture }}
48+
upload-to-base-bucket: ${{ matrix.upload_to_base_bucket }}
49+
50+
- uses: ./.github/actions/set-channel
51+
52+
- name: Download the artifact
53+
uses: actions/download-artifact@v3
54+
with:
55+
name: ${{ env.ARTIFACT_NAME }}
56+
path: ${{ inputs.repository }}/dist/
57+
58+
- name: Configure aws credentials (pytorch account)
59+
if: ${{ inputs.trigger-event == 'push' && startsWith(github.event.ref, 'refs/heads/nightly') }}
60+
uses: aws-actions/configure-aws-credentials@v3
61+
with:
62+
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
63+
aws-region: us-east-1
64+
65+
- name: Configure aws credentials (pytorch account)
66+
if: ${{ env.CHANNEL == 'test' && startsWith(github.event.ref, 'refs/tags/v') }}
67+
uses: aws-actions/configure-aws-credentials@v3
68+
with:
69+
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
70+
aws-region: us-east-1
71+
72+
- name: Nightly or release RC
73+
if: ${{ (inputs.trigger-event == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) || (env.CHANNEL == 'test' && startsWith(github.event.ref, 'refs/tags/')) }}
74+
shell: bash
75+
run: |
76+
set -ex
77+
echo "NIGHTLY_OR_TEST=1" >> "${GITHUB_ENV}"
78+
79+
- name: Upload package to pytorch.org
80+
shell: bash
81+
working-directory: ${{ inputs.repository }}
82+
run: |
83+
set -ex
84+
85+
# shellcheck disable=SC1090
86+
source "${BUILD_ENV_FILE}"
87+
88+
pip install awscli==1.32.18
89+
90+
AWS_CMD="aws s3 cp --dryrun"
91+
if [[ "${NIGHTLY_OR_TEST:-0}" == "1" ]]; then
92+
AWS_CMD="aws s3 cp"
93+
fi
94+
95+
for pkg in dist/*; do
96+
${AWS_CMD} "$pkg" "${PYTORCH_S3_BUCKET_PATH}" --acl public-read
97+
done

.github/workflows/build_wheels_linux.yml

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,6 @@ on:
6565
required: false
6666
type: boolean
6767
default: true
68-
# TODO (huydhn): Remove them once all libraries using Nova has removed them
69-
secrets:
70-
AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID:
71-
description: "AWS Access Key passed from caller workflow"
72-
required: false
73-
AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY:
74-
description: "AWS Secret Access Ket passed from caller workflow"
75-
required: false
7668

7769
permissions:
7870
id-token: write
@@ -93,7 +85,6 @@ jobs:
9385
ARCH: ${{ inputs.architecture }}
9486
name: ${{ matrix.build_name }}
9587
runs-on: ${{ matrix.validation_runner }}
96-
environment: ${{(inputs.trigger-event == 'push' || startsWith(github.event.ref, 'refs/tags/')) && 'pytorchbot-env' || ''}}
9788
container:
9889
image: ${{ matrix.container_image }}
9990
options: ${{ matrix.gpu_arch_type == 'cuda' && '--gpus all' || ' ' }}
@@ -153,6 +144,8 @@ jobs:
153144
ref: ${{ inputs.ref }}
154145
setup-miniconda: ${{ inputs.setup-miniconda }}
155146
python-version: ${{ env.PYTHON_VERSION }}
147+
cuda-version: ${{ env.CU_VERSION }}
148+
arch: ${{ env.ARCH }}
156149
- name: Combine Env Var and Build Env Files
157150
if: ${{ inputs.env-var-script != '' }}
158151
working-directory: ${{ inputs.repository }}
@@ -235,31 +228,16 @@ jobs:
235228
echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found"
236229
${CONDA_RUN} python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}"
237230
fi
238-
# TODO (huydhn): Move the following step to a separate build job
239-
- name: Configure aws credentials (pytorch account)
240-
if: ${{ inputs.trigger-event == 'push' && startsWith(github.event.ref, 'refs/heads/nightly') }}
241-
uses: aws-actions/configure-aws-credentials@v3
242-
with:
243-
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
244-
aws-region: us-east-1
245-
- name: Configure aws credentials (pytorch account)
246-
if: ${{ env.CHANNEL == 'test' && startsWith(github.event.ref, 'refs/tags/') }}
247-
uses: aws-actions/configure-aws-credentials@v3
248-
with:
249-
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
250-
aws-region: us-east-1
251-
- name: Upload package to pytorch.org
252-
if: ${{ (inputs.trigger-event == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) || (env.CHANNEL == 'test' && startsWith(github.event.ref, 'refs/tags/')) }}
253-
shell: bash -l {0}
254-
working-directory: ${{ inputs.repository }}
255-
run: |
256-
set -euxo pipefail
257-
source "${BUILD_ENV_FILE}"
258-
${CONDA_RUN} pip install awscli
259-
for pkg in dist/*; do
260-
# PYTORCH_S3_BUCKET_PATH derived from pkg-helpers
261-
${CONDA_RUN} aws s3 cp "$pkg" "${PYTORCH_S3_BUCKET_PATH}" --acl public-read
262-
done
231+
232+
upload:
233+
needs: build
234+
uses: ./.github/workflows/_binary_upload.yml
235+
with:
236+
repository: ${{ inputs.repository }}
237+
ref: ${{ inputs.ref }}
238+
build-matrix: ${{ inputs.build-matrix }}
239+
architecture: ${{ inputs.architecture }}
240+
trigger-event: ${{ inputs.trigger-event }}
263241

264242
concurrency:
265243
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}

.github/workflows/build_wheels_macos.yml

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,6 @@ on:
5959
description: "The key created when saving a cache and the key used to search for a cache."
6060
default: ""
6161
type: string
62-
# TODO (huydhn): Remove them once all libraries using Nova has removed them
63-
secrets:
64-
AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID:
65-
description: "AWS Access Key passed from caller workflow"
66-
required: false
67-
AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY:
68-
description: "AWS Secret Access Ket passed from caller workflow"
69-
required: false
7062

7163
permissions:
7264
id-token: write
@@ -82,9 +74,9 @@ jobs:
8274
PACKAGE_TYPE: wheel
8375
REPOSITORY: ${{ inputs.repository }}
8476
REF: ${{ inputs.ref }}
77+
CU_VERSION: ${{ matrix.desired_cuda }}
8578
name: ${{ matrix.build_name }}
8679
runs-on: ${{ inputs.runner-type }}
87-
environment: ${{(inputs.trigger-event == 'push' || startsWith(github.event.ref, 'refs/tags/')) && 'pytorchbot-env' || ''}}
8880
# If a build is taking longer than 60 minutes on these runners we need
8981
# to have a conversation
9082
timeout-minutes: 60
@@ -115,6 +107,8 @@ jobs:
115107
ref: ${{ inputs.ref }}
116108
setup-miniconda: false
117109
python-version: ${{ env.PYTHON_VERSION }}
110+
cuda-version: ${{ env.CU_VERSION }}
111+
arch: ${{ env.ARCH }}
118112
- name: Combine Env Var and Build Env Files
119113
if: ${{ inputs.env-var-script != '' }}
120114
working-directory: ${{ inputs.repository }}
@@ -123,7 +117,7 @@ jobs:
123117
- name: Install delocate-wheel
124118
run: |
125119
set -euxo pipefail
126-
${CONDA_RUN} python3 -m pip install delocate
120+
${CONDA_RUN} python3 -m pip install delocate==0.10.7
127121
- name: Install torch dependency
128122
run: |
129123
set -euxo pipefail
@@ -209,37 +203,20 @@ jobs:
209203
${CONDA_RUN} python3 "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}"
210204
fi
211205
export PATH=${OLD_PATH}
212-
# TODO (huydhn): Move the following step to a separate build job
213-
- name: Configure aws credentials (pytorch account)
214-
if: ${{ inputs.trigger-event == 'push' && startsWith(github.event.ref, 'refs/heads/nightly') }}
215-
uses: aws-actions/configure-aws-credentials@v3
216-
with:
217-
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
218-
aws-region: us-east-1
219-
- name: Configure aws credentials (pytorch account)
220-
if: ${{ env.CHANNEL == 'test' && startsWith(github.event.ref, 'refs/tags/') }}
221-
uses: aws-actions/configure-aws-credentials@v3
222-
with:
223-
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
224-
aws-region: us-east-1
225-
- name: Upload package to pytorch.org
226-
if: ${{ (inputs.trigger-event == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) || (env.CHANNEL == 'test' && startsWith(github.event.ref, 'refs/tags/')) }}
227-
shell: bash -l {0}
228-
working-directory: ${{ inputs.repository }}
229-
run: |
230-
set -euxo pipefail
231-
# shellcheck disable=SC1090
232-
source "${BUILD_ENV_FILE}"
233-
${CONDA_RUN} pip install awscli
234-
for pkg in dist/*; do
235-
# PYTORCH_S3_BUCKET_PATH derived from pkg-helpers
236-
${CONDA_RUN} aws s3 cp "$pkg" "${PYTORCH_S3_BUCKET_PATH}" --acl public-read
237-
done
238206
- name: Clean up disk space
239207
if: always()
240208
continue-on-error: true
241209
uses: ./test-infra/.github/actions/check-disk-space
242210

211+
upload:
212+
needs: build
213+
uses: ./.github/workflows/_binary_upload.yml
214+
with:
215+
repository: ${{ inputs.repository }}
216+
ref: ${{ inputs.ref }}
217+
build-matrix: ${{ inputs.build-matrix }}
218+
trigger-event: ${{ inputs.trigger-event }}
219+
243220
concurrency:
244221
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}
245222
cancel-in-progress: true

0 commit comments

Comments
 (0)