Skip to content

Commit bd32947

Browse files
committed
Update on "Parallelize optimized op_log_softmax"
Straightforward application of parallel_for. Differential Revision: [D76831122](https://our.internmc.facebook.com/intern/diff/D76831122/) [ghstack-poisoned]
2 parents a04da54 + 6f8e61b commit bd32947

File tree

83 files changed

+2692
-426
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+2692
-426
lines changed

.ci/scripts/test_model.sh

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,22 @@ test_model_with_qnn() {
188188
EXPORT_SCRIPT=edsr
189189
# Additional deps for edsr
190190
pip install piq
191+
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
192+
EXPORT_SCRIPT=cvt
193+
elif [[ "${MODEL_NAME}" == "dit" ]]; then
194+
EXPORT_SCRIPT=dit
195+
elif [[ "${MODEL_NAME}" == "efficientnet" ]]; then
196+
EXPORT_SCRIPT=efficientnet
197+
elif [[ "${MODEL_NAME}" == "focalnet" ]]; then
198+
EXPORT_SCRIPT=focalnet
199+
elif [[ "${MODEL_NAME}" == "mobilevit_v1" ]]; then
200+
EXPORT_SCRIPT=mobilevit_v1
201+
elif [[ "${MODEL_NAME}" == "mobilevit_v2" ]]; then
202+
EXPORT_SCRIPT=mobilevit_v2
203+
elif [[ "${MODEL_NAME}" == "pvt" ]]; then
204+
EXPORT_SCRIPT=pvt
205+
elif [[ "${MODEL_NAME}" == "swin" ]]; then
206+
EXPORT_SCRIPT=swin_transformer
191207
elif [[ "${MODEL_NAME}" == "albert" ]]; then
192208
EXPORT_SCRIPT=albert
193209
elif [[ "${MODEL_NAME}" == "bert" ]]; then
@@ -196,6 +212,8 @@ test_model_with_qnn() {
196212
EXPORT_SCRIPT=distilbert
197213
elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
198214
EXPORT_SCRIPT=eurobert
215+
elif [[ "${MODEL_NAME}" == "roberta" ]]; then
216+
EXPORT_SCRIPT=roberta
199217
else
200218
echo "Unsupported model $MODEL_NAME"
201219
exit 1
@@ -210,10 +228,13 @@ test_model_with_qnn() {
210228
"dl3"|"mv3"|"mv2"|"ic4"|"ic3"|"vit"|"mb"|"w2l")
211229
SCRIPT_FOLDER=scripts
212230
;;
213-
"albert"|"bert"|"distilbert")
231+
"cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
232+
SCRIPT_FOLDER=oss_scripts
233+
;;
234+
"albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
214235
pip install evaluate
215236
SCRIPT_FOLDER=oss_scripts
216-
# Bert models running in 16bit will encounter op validation fail on some operations,
237+
# 16bit models will encounter op validation fail on some operations,
217238
# which requires CHIPSET >= SM8550.
218239
QNN_CHIPSET=SM8550
219240
;;

.github/workflows/apple.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
id: set_version
4040
shell: bash
4141
run: |
42-
VERSION="0.7.0.$(TZ='PST8PDT' date +%Y%m%d)"
42+
VERSION="0.8.0.$(TZ='PST8PDT' date +%Y%m%d)"
4343
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
4444
4545
build-demo-ios:

.github/workflows/trunk.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ jobs:
470470
docker-image: executorch-ubuntu-22.04-qnn-sdk
471471
submodules: 'recursive'
472472
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
473-
timeout: 900
473+
timeout: 90
474474
script: |
475475
# The generic Linux job chooses to use base env, not the one setup by the image
476476
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -489,14 +489,14 @@ jobs:
489489
strategy:
490490
matrix:
491491
dtype: [fp32]
492-
model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now
492+
model: [cvt, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now
493493
fail-fast: false
494494
with:
495495
runner: linux.2xlarge
496496
docker-image: executorch-ubuntu-22.04-qnn-sdk
497497
submodules: 'recursive'
498498
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
499-
timeout: 900
499+
timeout: 90
500500
script: |
501501
# The generic Linux job chooses to use base env, not the one setup by the image
502502
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")

CMakeLists.txt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -706,11 +706,7 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
706706

707707
add_executable(executor_runner ${_executor_runner__srcs})
708708
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
709-
if(APPLE)
710-
target_link_options(executor_runner PRIVATE "LINKER:-dead_strip")
711-
else()
712-
target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
713-
endif()
709+
target_link_options_gc_sections(executor_runner)
714710
endif()
715711
target_link_libraries(executor_runner ${_executor_runner_libs})
716712
target_compile_options(executor_runner PUBLIC ${_common_compile_options})

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .convert_split_to_slice import ConvertSplitToSlicePass # noqa
2323
from .convert_squeezes_to_view import ConvertSqueezesToViewPass # noqa
2424
from .convert_to_clamp import ConvertToClampPass # noqa
25+
from .decompose_acosh_pass import DecomposeAcoshPass # noqa
2526
from .decompose_atan_pass import DecomposeAtanPass # noqa
2627
from .decompose_avg_pool2d import DecomposeAvgPool2d # noqa
2728
from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
ConvertSplitToSlicePass,
2626
ConvertSqueezesToViewPass,
2727
ConvertToClampPass,
28+
DecomposeAcoshPass,
2829
DecomposeAtanPass,
2930
DecomposeAvgPool2d,
3031
DecomposeBatchNormNoStatsPass,
@@ -151,6 +152,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
151152

152153
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
153154
self.add_pass(DecomposeRoundPass())
155+
self.add_pass(DecomposeAcoshPass())
154156
self.add_pass(DecomposeSqrtPass())
155157
self.add_pass(DecomposeAtanPass())
156158
self.add_pass(ConvertIntPowToMuls())
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# pyre-unsafe
7+
8+
from executorch.backends.arm._passes import ArmPass
9+
from executorch.exir.dialects._ops import ops as exir_ops
10+
11+
# For MI case
12+
edge_acosh_op = exir_ops.edge.aten.acosh.default
13+
14+
15+
class DecomposeAcoshPass(ArmPass):
16+
"""
17+
Decomposes acosh to supported TOSA-operations.
18+
This decomposition is based on the mathematical identity:
19+
acosh(x) = log(x + sqrt((x-1)(x+1))
20+
"""
21+
22+
def call_operator(self, op, args, kwargs, meta, updated=False):
23+
24+
if op is not edge_acosh_op:
25+
return super().call_operator(op, args, kwargs, meta, updated)
26+
27+
log_op, sqrt_op, mul_op, sub_op, add_op, add_op_scalar = (
28+
exir_ops.edge.aten.log.default,
29+
exir_ops.edge.aten.sqrt.default,
30+
exir_ops.edge.aten.mul.Tensor,
31+
exir_ops.edge.aten.sub.Scalar,
32+
exir_ops.edge.aten.add.Tensor,
33+
exir_ops.edge.aten.add.Scalar,
34+
)
35+
36+
x = args[0]
37+
38+
# (x-1)(x+1)
39+
sub = super().call_operator(sub_op, (x, 1.0), {}, meta, True)
40+
add = super().call_operator(add_op_scalar, (x, 1.0), {}, meta, True)
41+
mul = super().call_operator(mul_op, (sub, add), {}, meta, True)
42+
43+
# sqrt((x-1)(x+1))
44+
sqrt = super().call_operator(sqrt_op, (mul,), {}, meta, True)
45+
46+
# x + sqrt((x-1)(x+1))
47+
add = super().call_operator(add_op, (x, sqrt), {}, meta, True)
48+
49+
# out = ln(x + sqrt((x-1)(x+1))
50+
out = super().call_operator(log_op, (add,), {}, meta, True)
51+
52+
return out

backends/arm/_passes/insert_table_ops.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class TableOps:
5555
exir_ops.edge.aten.hardsigmoid.default: torch.nn.functional.hardsigmoid,
5656
exir_ops.edge.aten.hardswish.default: torch.nn.functional.hardswish,
5757
exir_ops.edge.aten.sinh.default: torch.sinh,
58+
exir_ops.edge.aten.acosh.default: torch.acosh,
5859
}
5960

6061
# Targets that must be treated explicitly

backends/arm/_passes/match_arg_ranks_pass.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(self, exported_program):
5151
exir_ops.edge.aten.gt.Tensor,
5252
exir_ops.edge.aten.ge.Tensor,
5353
exir_ops.edge.aten.lt.Tensor,
54+
exir_ops.edge.aten.le.Tensor,
5455
exir_ops.edge.aten.pow.Tensor_Tensor,
5556
exir_ops.edge.aten.where.self,
5657
]

backends/arm/_passes/replace_scalar_with_tensor_pass.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
exir_ops.edge.aten.gt.Scalar: exir_ops.edge.aten.gt.Tensor,
3333
exir_ops.edge.aten.ge.Scalar: exir_ops.edge.aten.ge.Tensor,
3434
exir_ops.edge.aten.lt.Scalar: exir_ops.edge.aten.lt.Tensor,
35+
exir_ops.edge.aten.le.Scalar: exir_ops.edge.aten.le.Tensor,
3536
exir_ops.edge.aten.ne.Scalar: exir_ops.edge.aten.ne.Tensor,
3637
torch.ops.aten.add.Scalar: torch.ops.aten.add.Tensor,
3738
torch.ops.aten.sub.Scalar: torch.ops.aten.sub.Tensor,
@@ -43,6 +44,7 @@
4344
torch.ops.aten.gt.Scalar: torch.ops.aten.gt.Tensor,
4445
torch.ops.aten.ge.Scalar: torch.ops.aten.ge.Tensor,
4546
torch.ops.aten.lt.Scalar: torch.ops.aten.lt.Tensor,
47+
torch.ops.aten.le.Scalar: torch.ops.aten.le.Tensor,
4648
torch.ops.aten.ne.Scalar: torch.ops.aten.ne.Tensor,
4749
}
4850

0 commit comments

Comments
 (0)