Skip to content

Commit 7b643ae

Browse files
committed
Update on "[ET-VK] Introduce generic export pass for fusing Q/DQ nodes"
## Context When quantizing models with the PT2E quantization flow, quantize/dequantize nodes will be inserted into the graph. However, these quantize/dequantize nodes must be fused with operators such as `aten.linear.default` to produce nodes corresponding to quantized operators (e.g. `weight_int8pack_mm`) in order for quantized operator implementations to be called at runtime. Currently, the op fusion is done by the `fuse_dequant_linear.py` pass, however, this only handles one specific fusion pattern to generate a `weight_int8pack_mm` operator. As more quantized operators are to be supported in ET-VK via the PT2E quantization flow, a more generic fusion pass is needed that can handle a variety of fusion patterns. ## Changes Introduce the `FuseQuantizedOpsTransform()` pass. I elected to introduce a new pass under the `backends/vulkan/_passes` directory, as opposed to modifying the existing pass because I anticipate the majority of the fusion patterns to be specific to ET-VK. Remove the existing `FuseDequantLinearPass()` Switch to using the `FuseQuantizedOpsTransform` pass instead of the old `FuseDequantLinear` pass. Add `test_vulkan_passes` Python test to test export passes. Some small refactors to `test_vulkan_delegate` Python test to improve code organizations. Differential Revision: [D73794042](https://our.internmc.facebook.com/intern/diff/D73794042/) [ghstack-poisoned]
2 parents c55ef19 + 3cd336d commit 7b643ae

File tree

94 files changed

+2019
-695
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+2019
-695
lines changed

.ci/scripts/gather_benchmark_configs.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"apple_iphone_15": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d",
2222
"apple_iphone_15+ios_18": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/12c8b15c-8d03-4e07-950d-0a627e7595b4",
2323
"samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",
24+
"samsung_galaxy_s22_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/ea6b049d-1508-4233-9a56-5d9eacbe1078",
2425
"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
2526
"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
2627
"google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",

.github/scripts/extract_benchmark_results.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,10 @@ def transform(
349349
# Overwrite the device name here with the job name as it has more information about
350350
# the device, i.e. Samsung Galaxy S22 5G instead of just Samsung
351351
for r in benchmark_results:
352-
r["deviceInfo"]["device"] = job_name
352+
is_private_device = job_report.get("is_private_instance", False)
353+
r["deviceInfo"]["device"] = (
354+
f"{job_name} (private)" if is_private_device else job_name
355+
)
353356

354357
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
355358
return [
@@ -363,6 +366,7 @@ def transform(
363366
"benchmark_config": json.dumps(benchmark_config),
364367
"job_conclusion": "SUCCESS",
365368
"job_arn": job_report.get("arn", ""),
369+
"instance_arn": job_report.get("instance_arn", ""),
366370
},
367371
},
368372
"model": {

.github/workflows/_link_check.yml

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
on:
2+
workflow_call:
3+
inputs:
4+
ref:
5+
type: string
6+
required: true
7+
8+
jobs:
9+
lint-urls:
10+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
11+
with:
12+
runner: linux.2xlarge
13+
docker-image: executorch-ubuntu-22.04-linter
14+
submodules: 'none'
15+
fetch-depth: 0
16+
ref: ${{ inputs.ref }}
17+
timeout: 90
18+
script: |
19+
./scripts/lint_urls.sh $(
20+
[ "${{ github.event_name }}" = "pull_request" ] \
21+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
22+
|| [ "${{ github.event_name }}" = "push" ] \
23+
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
24+
)
25+
26+
lint-xrefs:
27+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
28+
with:
29+
runner: linux.2xlarge
30+
docker-image: executorch-ubuntu-22.04-linter
31+
submodules: 'none'
32+
fetch-depth: 0
33+
ref: ${{ inputs.ref }}
34+
timeout: 90
35+
script: |
36+
./scripts/lint_xrefs.sh $(
37+
[ "${{ github.event_name }}" = "pull_request" ] \
38+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
39+
|| [ "${{ github.event_name }}" = "push" ] \
40+
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
41+
)

.github/workflows/android-perf-private-device-experiment.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ on:
2323
description: Target devices to run benchmark
2424
required: false
2525
type: string
26-
default: google_pixel_3_private_rooted
26+
default: samsung_galaxy_s22_private
2727
benchmark_configs:
2828
description: The list of configs used the benchmark
2929
required: false
@@ -39,7 +39,7 @@ on:
3939
description: Target devices to run benchmark
4040
required: false
4141
type: string
42-
default: google_pixel_3_private_rooted
42+
default: samsung_galaxy_s22_private
4343
benchmark_configs:
4444
description: The list of configs used the benchmark
4545
required: false
@@ -58,5 +58,5 @@ jobs:
5858
contents: read
5959
with:
6060
models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }}
61-
devices: google_pixel_3_private_rooted
61+
devices: samsung_galaxy_s22_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/apple-perf-private-device-experiment.yml

+7-9
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
name: apple-perf (private devices)
22

33
on:
4-
# TODO (huydhn): Disable the schedule run until we land the change to add device pool and device name
5-
# to separate between public and private iOS devices
6-
# schedule:
7-
# - cron: 0 0,4,8,12,16,20 * * *
4+
schedule:
5+
- cron: 0 0,4,8,12,16,20 * * *
86
pull_request:
97
paths:
108
- .github/workflows/apple-perf-private-device-experiment.yml
11-
# push:
12-
# branches:
13-
# - main
14-
# paths:
15-
# - .github/workflows/apple-perf-private-device-experiment.yml
9+
push:
10+
branches:
11+
- main
12+
paths:
13+
- .github/workflows/apple-perf-private-device-experiment.yml
1614
# Note: GitHub has an upper limit of 10 inputs
1715
workflow_dispatch:
1816
inputs:

.github/workflows/doc-build.yml

-14
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,6 @@ on:
1414
- cron: '0 0 * * *'
1515

1616
jobs:
17-
check-urls:
18-
runs-on: ubuntu-latest
19-
steps:
20-
- uses: actions/checkout@v3
21-
- name: Check URLs
22-
run: bash ./scripts/check_urls.sh
23-
24-
check-xrefs:
25-
runs-on: ubuntu-latest
26-
steps:
27-
- uses: actions/checkout@v3
28-
- name: Check Links
29-
run: bash ./scripts/check_xrefs.sh
30-
3117
build:
3218
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3319
permissions:

.github/workflows/lint.yml

+5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ jobs:
6464
6565
exit $RC
6666
67+
link-check:
68+
uses: ./.github/workflows/_link_check.yml
69+
with:
70+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
71+
6772
android-java-format:
6873
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
6974
permissions:

.github/workflows/nightly.yml

+6
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,9 @@ jobs:
3030
test-infra-ref: main
3131
updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
3232
pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
33+
34+
link-check:
35+
needs: update-pytorch-commit-hash
36+
uses: ./.github/workflows/_link_check.yml
37+
with:
38+
ref: ${{ github.sha }}

.github/workflows/pull.yml

+14-13
Original file line numberDiff line numberDiff line change
@@ -481,37 +481,38 @@ jobs:
481481
build-tool: buck2
482482
docker-image: executorch-ubuntu-22.04-clang12
483483

484-
unittest-arm:
484+
unittest-arm-backend-with-no-fvp:
485+
name: unittest-arm-backend-with-no-fvp
485486
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
486487
permissions:
487488
id-token: write
488489
contents: read
490+
strategy:
491+
matrix:
492+
include:
493+
- test_arm_baremetal: test_pytest_ops
494+
- test_arm_baremetal: test_pytest_models
495+
fail-fast: false
489496
with:
490497
runner: linux.2xlarge
491498
docker-image: executorch-ubuntu-22.04-arm-sdk
492499
submodules: 'recursive'
493500
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
494501
timeout: 90
495502
script: |
496-
set -eux
497-
498503
# The generic Linux job chooses to use base env, not the one setup by the image
499504
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
500505
conda activate "${CONDA_ENV}"
501506
502-
BUILD_TOOL="cmake"
503-
504-
# Setup MacOS dependencies as there is no Docker support on MacOS atm
505-
PYTHON_EXECUTABLE=python \
506-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" \
507-
EXECUTORCH_BUILD_ARM_BAREMETAL=ON \
508-
.ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
507+
source .ci/scripts/utils.sh
508+
install_executorch "--use-pt-pinned-commit"
509509
510-
# Install Arm dependencies
511510
.ci/scripts/setup-arm-baremetal-tools.sh
512511
513-
# Run pytest without simulator
514-
backends/arm/test/test_arm_baremetal.sh test_pytest
512+
ARM_TEST=${{ matrix.test_arm_baremetal }}
513+
514+
# Test test_arm_baremetal.sh with test
515+
backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
515516
516517
test-llama-runner-qnn-linux:
517518
name: test-llama-runner-qnn-linux

.github/workflows/trunk.yml

+15-29
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,22 @@ jobs:
176176
# Test selective build
177177
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
178178
179-
test-arm-backend-delegation:
180-
name: test-arm-backend-delegation
179+
test-arm-backend:
180+
name: test-arm-backend
181181
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
182182
permissions:
183183
id-token: write
184184
contents: read
185+
strategy:
186+
matrix:
187+
include:
188+
- test_arm_baremetal: test_pytest_ops_ethosu_fvp
189+
- test_arm_baremetal: test_pytest_models_ethosu_fvp
190+
- test_arm_baremetal: test_run_ethosu_fvp
191+
- test_arm_baremetal: test_models_tosa
192+
- test_arm_baremetal: test_models_ethos-u55
193+
- test_arm_baremetal: test_models_ethos-u85
194+
fail-fast: false
185195
with:
186196
runner: linux.2xlarge.memory
187197
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -202,34 +212,10 @@ jobs:
202212
# Hopefully this is high enough for this setup.
203213
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
204214
205-
# Test ethos-u delegate examples with run.sh
206-
backends/arm/test/test_arm_baremetal.sh test_full_ethosu_fvp
207-
208-
209-
test-arm-reference-delegation:
210-
name: test-arm-reference-delegation
211-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
212-
permissions:
213-
id-token: write
214-
contents: read
215-
with:
216-
runner: linux.2xlarge.memory
217-
docker-image: executorch-ubuntu-22.04-arm-sdk
218-
submodules: 'recursive'
219-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
220-
timeout: 90
221-
script: |
222-
# The generic Linux job chooses to use base env, not the one setup by the image
223-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
224-
conda activate "${CONDA_ENV}"
225-
226-
source .ci/scripts/utils.sh
227-
install_executorch "--use-pt-pinned-commit"
228-
229-
.ci/scripts/setup-arm-baremetal-tools.sh
215+
ARM_TEST=${{ matrix.test_arm_baremetal }}
230216
231-
# Run arm unit tests using the simulator
232-
backends/arm/test/test_arm_baremetal.sh test_pytest_ethosu_fvp
217+
# Test test_arm_baremetal.sh with test
218+
backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
233219
234220
test-arm-cortex-m-size-test:
235221
name: test-arm-cortex-m-size-test

0 commit comments

Comments
 (0)