Update

swolchok · swolchok · commit db7c3cd6840d · 2025-07-24T17:59:20.000-07:00
[ghstack-poisoned]
diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
@@ -128,7 +128,7 @@ def ethosu_compile_spec(
         self.compiler_flags.append("--output-format=raw")
         self.compiler_flags.append("--debug-force-regor")
 
-        base_tosa_version = "TOSA-0.80+BI"
+        base_tosa_version = "TOSA-1.0+INT"
         if "u55" in target:
             # Add the Ethos-U55 extension marker
             base_tosa_version += "+u55"
diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh
@@ -228,7 +228,7 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=2.00 -DET_RTOL=2.00"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
-    python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
+    #python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"  # Takes long time to run
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=resnet18 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=resnet50 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -228,6 +228,9 @@ def _lower_ep_to_edge(
     """
     Lower an ExportedProgram to an EdgeProgramManager (in edge IR).
     """
+    # Apply passes which transform the ExportedProgram before it gets lowered to edge.
+    expo_program = apply_torch_ops_passes(expo_program)
+
     # Call to_edge to convert the graph to edge IR.
     # Note: dim_order is skipped (https://github.com/pytorch/executorch/issues/3704)
     edge_prog_manager = to_edge(
@@ -263,9 +266,6 @@ def export_to_edge(
     # Export the model into an ExportedProgram.
     expo_program = trace(model, inputs)
 
-    # Apply passes which transform the ExportedProgram before it gets lowered to edge.
-    expo_program = apply_torch_ops_passes(expo_program)
-
     # Lower the model to edge IR.
     edge_prog_manager = _lower_ep_to_edge(
         expo_program, dump_graphs, constant_methods, core_aten_exceptions
diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py
@@ -276,6 +276,14 @@
     "requantize.per_tensor_out(Tensor input, float in_scale, int in_zero_point, float out_scale, "
     "int out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)"
 )
+lib.define(
+    "roi_align_box_processor.out(Tensor rois, int output_size_h, int output_size_w, "
+    "int sampling_ratio, bool aligned, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+    "roi_align_box_processor(Tensor rois, int output_size_h, int output_size_w, "
+    "int sampling_ratio, bool aligned) -> (Tensor out)"
+)
 
 # Custom ops with aten namespace. Need to specify the lib var as FRAGMENT type as aten library is already defined
 aten_lib = Library("aten", "FRAGMENT")
@@ -1038,3 +1046,14 @@ def idma_store_impl(
     channel: int = 0,
 ) -> torch.Tensor:
     return copy_idma_copy_impl(src, task_num, channel)
+
+
+@register_fake("cadence::roi_align_box_processor")
+def roi_align_box_processor_meta(
+    rois: torch.Tensor,
+    output_size_h: int,
+    output_size_w: int,
+    sampling_ratio: int,
+    aligned: bool,
+) -> torch.Tensor:
+    return rois.new_empty((rois.shape[0], 80), dtype=torch.uint8)
diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py
@@ -2328,12 +2328,15 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
 
             # Extract an argument to a separate full op.
             with graph_module.graph.inserting_before(mul_node):
-                full_tensor = graph_module.graph.call_function(
+                full_node = graph_module.graph.call_function(
                     torch.ops.aten.full.default, args=([1], full_arg)
                 )
+                full_node.meta = mul_node.meta
+                full_node.meta["val"] = [1]
                 new_mul_node = graph_module.graph.call_function(
-                    torch.ops.aten.mul.Tensor, args=(x_arg, full_tensor)
+                    torch.ops.aten.mul.Tensor, args=(x_arg, full_node)
                 )
+                new_mul_node.meta = mul_node.meta
             # Replace the old mul with a newly created mul.
             mul_node.replace_all_uses_with(new_mul_node)
             graph_module.graph.erase_node(mul_node)
diff --git a/backends/xnnpack/test/recipes/test_xnnpack_recipes.py b/backends/xnnpack/test/recipes/test_xnnpack_recipes.py
@@ -18,7 +18,7 @@
 from executorch.examples.models.model_factory import EagerModelFactory
 from executorch.examples.xnnpack import MODEL_NAME_TO_OPTIONS, QuantType
 from executorch.exir.schema import DelegateCall, Program
-from executorch.export import export, ExportRecipe
+from executorch.export import export, ExportRecipe, recipe_registry
 from torch import nn
 from torch.testing._internal.common_quantization import TestHelperModules
 
@@ -27,6 +27,7 @@ class TestXnnpackRecipes(unittest.TestCase):
     def setUp(self) -> None:
         torch._dynamo.reset()
         super().setUp()
+        recipe_registry.register_backend_recipe_provider(XNNPACKRecipeProvider())
 
     def tearDown(self) -> None:
         super().tearDown()
diff --git a/examples/arm/setup.sh b/examples/arm/setup.sh
@@ -60,7 +60,7 @@ fi
 
 # Vela
 vela_repo_url="https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela"
-vela_rev="8cac2b9a7204b57125a8718049519b091a98846c"
+vela_rev="d37febc1715edf0d236c2ff555739a8a9aadcf9a"
 
 # MLSDK dependencies
 mlsdk_manifest_dir="ml-sdk-for-vulkan-manifest"
diff --git a/examples/qualcomm/oss_scripts/llama/CMakeLists.txt b/examples/qualcomm/oss_scripts/llama/CMakeLists.txt
@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-
 # model sharding with custom op
 set(CUSTOM_OP_SRCS_FILE
     "${EXECUTORCH_SOURCE_DIR}/extension/llm/custom_ops/op_fallback.cpp"
@@ -50,9 +49,7 @@ list(APPEND _llama_runner__srcs)
 # build qnn llama runner
 add_executable(qnn_llama_runner ${_llama_runner__srcs})
 target_include_directories(
-  qnn_llama_runner
-  PUBLIC
-    ${_common_include_directories}
+  qnn_llama_runner PUBLIC ${_common_include_directories}
 )
 
 executorch_target_link_options_shared_lib(quantized_ops_lib)
@@ -66,7 +63,6 @@ target_link_libraries(
   extension_llm_runner
   extension_module
   extension_tensor
-  tokenizers
   gflags
   custom_ops
   quantized_ops_lib
@@ -75,8 +71,7 @@ target_link_libraries(
 )
 
 target_include_directories(
-  qnn_llama_runner
-  PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
+  qnn_llama_runner PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
 
 target_compile_options(qnn_llama_runner PUBLIC ${_common_compile_options})
diff --git a/extension/llm/apple/CMakeLists.txt b/extension/llm/apple/CMakeLists.txt
@@ -42,3 +42,7 @@ set_source_files_properties(
   ${OBJC_SOURCES} PROPERTIES COMPILE_FLAGS "-fobjc-arc" "-fno-exceptions"
                                                         "-fno-rtti"
 )
+
+set_target_properties(extension_llm_apple PROPERTIES
+  XCODE_ATTRIBUTE_BUILD_LIBRARY_FOR_DISTRIBUTION YES
+)
diff --git a/pytest.ini b/pytest.ini
@@ -33,7 +33,8 @@ addopts =
     exir/program/test
     exir/tests/
     # executorch/export
-    executorch/export/tests
+    export/tests
+    --ignore=export/tests/test_export_stages.py
     # kernels/
     kernels/prim_ops/test
     kernels/quantized
diff --git a/scripts/build_apple_frameworks.sh b/scripts/build_apple_frameworks.sh
@@ -249,9 +249,9 @@ cp -r $FRAMEWORK_EXECUTORCH_HEADERS_PATH/executorch/runtime/core/portable_type/c
 
 cp "$SOURCE_ROOT_DIR/extension/apple/$FRAMEWORK_EXECUTORCH_MODULE_NAME/Exported/"*.h "$FRAMEWORK_EXECUTORCH_HEADERS_PATH/$FRAMEWORK_EXECUTORCH_MODULE_NAME"
 
-cat > "$FRAMEWORK_EXECUTORCH_HEADERS_PATH/$FRAMEWORK_EXECUTORCH_MODULE_NAME/module.modulemap" << EOF
+cat > "$FRAMEWORK_EXECUTORCH_HEADERS_PATH/module.modulemap" << EOF
 module ${FRAMEWORK_EXECUTORCH_MODULE_NAME} {
-  umbrella header "${FRAMEWORK_EXECUTORCH_MODULE_NAME}.h"
+  umbrella header "${FRAMEWORK_EXECUTORCH_MODULE_NAME}/${FRAMEWORK_EXECUTORCH_MODULE_NAME}.h"
   export *
 }
 EOF

Original file line number	Diff line number	Diff line change
`@@ -42,3 +42,7 @@ set_source_files_properties(`
`42`	`42`	`${OBJC_SOURCES} PROPERTIES COMPILE_FLAGS "-fobjc-arc" "-fno-exceptions"`
`43`	`43`	`"-fno-rtti"`
`44`	`44`	`)`
	`45`	`+`
	`46`	`+set_target_properties(extension_llm_apple PROPERTIES`
	`47`	`+ XCODE_ATTRIBUTE_BUILD_LIBRARY_FOR_DISTRIBUTION YES`
	`48`	`+)`