Skip to content

Commit ef99b85

Browse files
committed
Update
[ghstack-poisoned]
2 parents 4f6424b + 8e55d73 commit ef99b85

File tree

216 files changed

+5330
-1191
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

216 files changed

+5330
-1191
lines changed

.ci/scripts/build_android_instrumentation.sh

Lines changed: 0 additions & 21 deletions
This file was deleted.

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ run_and_verify() {
154154
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
155155
else
156156
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
157-
EXPECTED_PREFIX="ASSISTANT:"
157+
EXPECTED_PREFIX="ASSISTANT: image"
158158
fi
159159
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
160160
echo "Expected result prefix: ${EXPECTED_PREFIX}"

.github/workflows/_android.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
with:
1515
runner: linux.2xlarge
1616
docker-image: executorch-ubuntu-22.04-clang12-android
17-
submodules: 'true'
17+
submodules: 'recursive'
1818
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1919
timeout: 90
2020
upload-artifact: android-apps
@@ -36,7 +36,8 @@ jobs:
3636
cp ${BUILD_AAR_DIR}/executorch.aar $ARTIFACTS_DIR_NAME
3737
3838
mkdir -p ${ARTIFACTS_DIR_NAME}/library_test_dir
39-
bash .ci/scripts/build_android_instrumentation.sh
39+
bash extension/android/executorch_android/android_test_setup.sh
40+
(cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest)
4041
cp extension/android/executorch_android/build/outputs/apk/androidTest/debug/executorch_android-debug-androidTest.apk "${ARTIFACTS_DIR_NAME}/library_test_dir"
4142
4243
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ jobs:
345345
with:
346346
runner: linux.2xlarge
347347
docker-image: executorch-ubuntu-22.04-clang12-android
348-
submodules: 'true'
348+
submodules: 'recursive'
349349
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
350350
timeout: 90
351351
upload-artifact: android-apps

.github/workflows/android-release-artifacts.yml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ on:
1111
description: Upload the AAR to maven staging repository
1212
required: false
1313
type: boolean
14+
schedule:
15+
- cron: 0 10 * * *
1416

1517
concurrency:
1618
group: ${{ github.workflow }}-${{ github.ref }}
@@ -26,6 +28,10 @@ jobs:
2628
shell: bash
2729
run: |
2830
VERSION="${{ inputs.version }}"
31+
if [ -z "$VERSION" ]; then
32+
echo "No version name specified. Will create a snapshot AAR"
33+
exit 0
34+
fi
2935
if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar" | grep "200 OK"; then
3036
echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar"
3137
echo "Will skip build/upload"
@@ -45,7 +51,7 @@ jobs:
4551
secrets-env: EXECUTORCH_MAVEN_SIGNING_KEYID EXECUTORCH_MAVEN_SIGNING_PASSWORD EXECUTORCH_MAVEN_CENTRAL_PASSWORD EXECUTORCH_MAVEN_CENTRAL_USERNAME EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS
4652
runner: linux.2xlarge
4753
docker-image: executorch-ubuntu-22.04-clang12-android
48-
submodules: 'true'
54+
submodules: 'recursive'
4955
ref: ${{ github.sha }}
5056
timeout: 90
5157
upload-artifact: android-apps
@@ -107,6 +113,8 @@ jobs:
107113
pip install awscli==1.32.18
108114
AWS_CMD="aws s3 cp"
109115
VERSION="${{ inputs.version }}"
110-
VERSION_NAME="${VERSION:-temp_snapshot}"
111-
${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION_NAME}/executorch.aar --acl public-read
112-
${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION_NAME}/executorch.aar.sha256sums --acl public-read
116+
if [ -z "$VERSION" ]; then
117+
VERSION="snapshot-$(date +"%Y%m%d")"
118+
fi
119+
${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}/executorch.aar --acl public-read
120+
${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}/executorch.aar.sha256sums --acl public-read

.mypy.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ ignore_missing_imports = True
8080
[mypy-serializer.*]
8181
ignore_missing_imports = True
8282

83+
[mypy-tosa_tools.*]
84+
ignore_missing_imports = True
85+
8386
[mypy-setuptools.*]
8487
ignore_missing_imports = True
8588

CONTRIBUTING.md

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ executorch
5858
│ ├── <a href="exir/verification">verification</a> - IR verification.
5959
├── <a href="extension">extension</a> - Extensions built on top of the runtime.
6060
│ ├── <a href="extension/android">android</a> - ExecuTorch wrappers for Android apps. Please refer to the <a href="docs/source/using-executorch-android.md">Android documentation</a> and <a href="https://pytorch.org/executorch/main/javadoc/">Javadoc</a> for more information.
61-
│ ├── <a href="extension/apple">apple</a> - ExecuTorch wrappers for iOS apps. Please refer to the <a href="docs/source/using-executorch-ios.md">iOS documentation</a> and <a href="https://pytorch.org/executorch/stable/apple-runtime.html">how to integrate into Apple platform</a> for more information.
61+
│ ├── <a href="extension/apple">apple</a> - ExecuTorch wrappers for iOS apps. Please refer to the <a href="docs/source/using-executorch-ios.md">iOS documentation</a> and <a href="https://pytorch.org/executorch/main/using-executorch-ios.html">how to integrate into Apple platform</a> for more information.
6262
│ ├── <a href="extension/aten_util">aten_util</a> - Converts to and from PyTorch ATen types.
6363
│ ├── <a href="extension/data_loader">data_loader</a> - 1st party data loader implementations.
6464
│ ├── <a href="extension/evalue_util">evalue_util</a> - Helpers for working with EValue objects.
@@ -102,6 +102,8 @@ executorch
102102
## Contributing workflow
103103
We actively welcome your pull requests (PRs).
104104

105+
If you're completely new to open-source projects, GitHub, or ExecuTorch, please see our [New Contributor Guide](./docs/source/new-contributor-guide.md) for a step-by-step walkthrough on making your first contribution. Otherwise, read on.
106+
105107
1. [Claim an issue](#claiming-issues), if present, before starting work. If an
106108
issue doesn't cover the work you plan to do, consider creating one to provide
107109
context about it, and to build consensus about the scope and solution.
@@ -407,18 +409,9 @@ for basics.
407409
- If the reviewers have requests or questions, follow up with them.
408410
- The goal of the reviewer is to ensure that the code in the `main` branch of
409411
the repo is consistent, maintainable, and of high quality.
410-
1. Once the PR has been approved,
411-
- If you have the "write permission" in this repo, you can merge it yourself
412-
by clicking the "Squash and merge" button once it is green and all CI
413-
signals are passing.
414-
- If you don't have "write permission" in this repo, the reviewer will take
415-
care of the PR. The reviewer may import the PR into Meta's internal system
416-
to validate it against internal CI.
417-
- If the PR is approved but not merged within 5 business days, please comment
418-
on the PR to ask about its status.
419-
- Note that if the `main` [CI](#continuous-integration) jobs are broken, we
420-
will only merge PRs that fix the broken jobs until all critical jobs are
421-
fixed.
412+
1. Once the PR has been approved, you can merge it yourself
413+
by clicking the "Squash and merge" button once it is
414+
green and all CI signals are passing.
422415

423416
&nbsp;
424417

backends/apple/coreml/README.md

Lines changed: 1 addition & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
# ExecuTorch Core ML Delegate
22

3-
43
This subtree contains the Core ML Delegate implementation for ExecuTorch.
5-
Core ML is an optimized framework for running machine learning models on Apple devices. The delegate is the mechanism for leveraging the Core ML framework to accelerate operators when running on Apple devices.
4+
Core ML is an optimized framework for running machine learning models on Apple devices. The delegate is the mechanism for leveraging the Core ML framework to accelerate operators when running on Apple devices. To learn how to use the CoreML delegate, see the [documentation](https://github.com/pytorch/executorch/blob/main/docs/source/backends-coreml.md).
65

76
## Layout
87
- `compiler/` : Lowers a module to Core ML backend.
@@ -19,110 +18,6 @@ Core ML is an optimized framework for running machine learning models on Apple d
1918
- `workspace` : Xcode workspace for the runtime.
2019
- `third-party/`: External dependencies.
2120

22-
## Partition and Delegation
23-
24-
To delegate a Program to the **Core ML** backend, the client must call `to_backend` with the **CoreMLPartitioner**.
25-
26-
```python
27-
import torch
28-
import executorch.exir
29-
30-
from executorch.backends.apple.coreml.compiler import CoreMLBackend
31-
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
32-
33-
class Model(torch.nn.Module):
34-
def __init__(self):
35-
super().__init__()
36-
37-
def forward(self, x):
38-
return torch.sin(x)
39-
40-
source_model = Model()
41-
example_inputs = (torch.ones(1), )
42-
43-
# Export the source model to Edge IR representation
44-
aten_program = torch.export.export(source_model, example_inputs)
45-
edge_program_manager = executorch.exir.to_edge(aten_program)
46-
47-
# Delegate to Core ML backend
48-
delegated_program_manager = edge_program_manager.to_backend(CoreMLPartitioner())
49-
50-
# Serialize delegated program
51-
executorch_program = delegated_program_manager.to_executorch()
52-
with open("model.pte", "wb") as f:
53-
f.write(executorch_program.buffer)
54-
```
55-
56-
The module will be fully or partially delegated to **Core ML**, depending on whether all or part of ops are supported by the **Core ML** backend. User may force skip certain ops by `CoreMLPartitioner(skip_ops_for_coreml_delegation=...)`
57-
58-
The `to_backend` implementation is a thin wrapper over [coremltools](https://apple.github.io/coremltools/docs-guides/), `coremltools` is responsible for converting an **ExportedProgram** to a **MLModel**. The converted **MLModel** data is saved, flattened, and returned as bytes to **ExecuTorch**.
59-
60-
## Quantization
61-
62-
To quantize a Program in a Core ML favored way, the client may utilize **CoreMLQuantizer**.
63-
64-
```python
65-
import torch
66-
import executorch.exir
67-
68-
from torch.export import export_for_training
69-
from torch.ao.quantization.quantize_pt2e import (
70-
convert_pt2e,
71-
prepare_pt2e,
72-
prepare_qat_pt2e,
73-
)
74-
75-
from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
76-
from coremltools.optimize.torch.quantization.quantization_config import (
77-
LinearQuantizerConfig,
78-
QuantizationScheme,
79-
)
80-
81-
class Model(torch.nn.Module):
82-
def __init__(self) -> None:
83-
super().__init__()
84-
self.conv = torch.nn.Conv2d(
85-
in_channels=3, out_channels=16, kernel_size=3, padding=1
86-
)
87-
self.relu = torch.nn.ReLU()
88-
89-
def forward(self, x: torch.Tensor) -> torch.Tensor:
90-
a = self.conv(x)
91-
return self.relu(a)
92-
93-
source_model = Model()
94-
example_inputs = (torch.randn((1, 3, 256, 256)), )
95-
96-
pre_autograd_aten_dialect = export_for_training(source_model, example_inputs).module()
97-
98-
quantization_config = LinearQuantizerConfig.from_dict(
99-
{
100-
"global_config": {
101-
"quantization_scheme": QuantizationScheme.symmetric,
102-
"activation_dtype": torch.quint8,
103-
"weight_dtype": torch.qint8,
104-
"weight_per_channel": True,
105-
}
106-
}
107-
)
108-
quantizer = CoreMLQuantizer(quantization_config)
109-
110-
# For post-training quantization, use `prepare_pt2e`
111-
# For quantization-aware trainin,g use `prepare_qat_pt2e`
112-
prepared_graph = prepare_pt2e(pre_autograd_aten_dialect, quantizer)
113-
114-
prepared_graph(*example_inputs)
115-
converted_graph = convert_pt2e(prepared_graph)
116-
```
117-
118-
The `converted_graph` is the quantized torch model, and can be delegated to **Core ML** similarly through **CoreMLPartitioner**
119-
120-
## Runtime
121-
122-
To execute a Core ML delegated program, the application must link to the `coremldelegate` library. Once linked there are no additional steps required, ExecuTorch when running the program would call the Core ML runtime to execute the Core ML delegated part of the program.
123-
124-
Please follow the instructions described in the [Core ML setup](/backends/apple/coreml/setup.md) to link the `coremldelegate` library.
125-
12621
## Help & Improvements
12722
If you have problems or questions or have suggestions for ways to make
12823
implementation and testing better, please create an issue on [github](https://www.github.com/pytorch/executorch/issues).

backends/arm/_passes/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@
2020
from .convert_to_clamp import ConvertToClampPass # noqa
2121
from .decompose_batchnorm_pass import DecomposeBatchNormPass # noqa
2222
from .decompose_div_pass import DecomposeDivPass # noqa
23+
from .decompose_gelu_pass import DecomposeGeluPass # noqa
2324
from .decompose_layernorm_pass import DecomposeLayerNormPass # noqa
2425
from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass # noqa
2526
from .decompose_linear_pass import DecomposeLinearPass # noqa
2627
from .decompose_meandim_pass import DecomposeMeanDimPass # noqa
2728
from .decompose_select import DecomposeSelectPass # noqa
2829
from .decompose_softmax_pass import DecomposeSoftmaxPass # noqa
2930
from .decompose_softmax_unstable_pass import DecomposeSoftmaxUnstablePass # noqa
31+
from .decompose_sqrt_pass import DecomposeSqrtPass # noqa
3032
from .decompose_var_pass import DecomposeVarPass # noqa
3133
from .fold_qdq_with_annotated_qparams_pass import ( # noqa
3234
FoldAndAnnotateQParamsPass,

backends/arm/_passes/arm_pass_manager.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@
2525
ConvertToClampPass,
2626
DecomposeBatchNormPass,
2727
DecomposeDivPass,
28+
DecomposeGeluPass,
2829
DecomposeLayerNormPass,
2930
DecomposeLeakyReLUPass,
3031
DecomposeLinearPass,
3132
DecomposeMeanDimPass,
3233
DecomposeSelectPass,
3334
DecomposeSoftmaxPass,
3435
DecomposeSoftmaxUnstablePass,
36+
DecomposeSqrtPass,
3537
DecomposeVarPass,
3638
FoldAndAnnotateQParamsPass,
3739
FuseBatchnorm2DPass,
@@ -115,6 +117,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
115117
return self._transform(exported_program.graph_module)
116118

117119
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
120+
self.add_pass(DecomposeSqrtPass())
118121
self.add_pass(ReplaceScalarWithTensorArgPassTOSAMI())
119122
self.add_pass(FuseQuantizedActivationPass())
120123
self.add_pass(RemoveGetItemPass())
@@ -130,6 +133,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
130133
self.add_pass(ConvertMeanDimToAveragePoolPass())
131134
self.add_pass(DecomposeDivPass())
132135
self.add_pass(DecomposeSoftmaxPass())
136+
self.add_pass(DecomposeGeluPass())
133137
self.add_pass(ConvertFullLikeToFullPass())
134138
self.add_pass(ConvertToClampPass())
135139
self.add_pass(ConvertMinMaxPass())
@@ -162,12 +166,22 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
162166

163167
return self._transform(exported_program.graph_module)
164168

169+
def _tosa_1_0_int_quantized_pipeline(self, exported_program: ExportedProgram):
170+
return self._tosa_080_BI_pipeline(exported_program)
171+
172+
def _tosa_1_0_fp_pipeline(self, exported_program: ExportedProgram):
173+
return self._tosa_080_MI_pipeline(exported_program)
174+
165175
def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
166176
"""Apply passes before transforming program to backend"""
167177
if self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+BI"):
168178
return self._tosa_080_BI_pipeline(exported_program)
169179
elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+MI"):
170180
return self._tosa_080_MI_pipeline(exported_program)
181+
elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-1.0+FP"):
182+
return self._tosa_1_0_fp_pipeline(exported_program)
183+
elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-1.0+INT"):
184+
return self._tosa_1_0_int_quantized_pipeline(exported_program)
171185
else:
172186
raise NotImplementedError(
173187
f"No pass pipeline implemented for {self.tosa_spec=}"
@@ -181,6 +195,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
181195
self.add_pass(DecomposeMeanDimPass())
182196
self.add_pass(DecomposeDivPass())
183197
self.add_pass(DecomposeLeakyReLUPass())
198+
self.add_pass(DecomposeSqrtPass())
184199

185200
if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
186201
# Numerically stable softmax uses amax which is not supported on Ethos-U55

backends/arm/_passes/cast_int64_pass.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from torch._export.utils import is_buffer
1313

1414
logger = logging.getLogger(__name__)
15-
logger.setLevel(logging.WARNING)
1615

1716

1817
class CastInt64BuffersToInt32Pass(ExportPass):

0 commit comments

Comments
 (0)