pytorch
diff --git a/‎.Package.swift/executorch_llm/dummy.swift
Lines changed: 1 addition & 0 deletions b/‎.Package.swift/executorch_llm/dummy.swift
Lines changed: 1 addition & 0 deletions
diff --git a/‎.Package.swift/executorch_llm_debug/dummy.swift
Lines changed: 1 addition & 0 deletions b/‎.Package.swift/executorch_llm_debug/dummy.swift
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 63 additions & 85 deletions b/‎CMakeLists.txt
Lines changed: 63 additions & 85 deletions
diff --git a/‎Package.swift
Lines changed: 5 additions & 0 deletions b/‎Package.swift
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py
Lines changed: 2 additions & 2 deletions b/‎backends/apple/coreml/compiler/coreml_preprocess.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/partition/coreml_partitioner.py
Lines changed: 92 additions & 11 deletions b/‎backends/apple/coreml/partition/coreml_partitioner.py
Lines changed: 92 additions & 11 deletions
@@ -0,0 +1 @@
+
@@ -0,0 +1 @@
+
@@ -1 +1 @@
-90f1e7bed15ca5e48c61c5b6dc5ad4810524f82f
+ab43fe4bdf5ccd82897f0e982c451a0127bd175e
@@ -45,7 +45,9 @@
 # ~~~
 #
 
-cmake_minimum_required(VERSION 3.24)
+# TODO Lower to 3.24 when XNNPACK dependency is updated to include
+# https://github.com/google/XNNPACK/commit/c690daa67f883e1b627aadf7684c06797e9a0684
+cmake_minimum_required(VERSION 3.29)
 project(executorch)
 
 include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
@@ -176,99 +178,74 @@ endif()
 
 if(EXECUTORCH_BUILD_CPUINFO)
   # --- cpuinfo
-  set(CPUINFO_SOURCE_DIR
-      "${CMAKE_CURRENT_LIST_DIR}/backends/xnnpack/third-party/cpuinfo"
+  set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
+      ${CMAKE_POSITION_INDEPENDENT_CODE}
   )
-  set(CPUINFO_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/cpuinfo")
-  set(CPUINFO_INSTALL_DIR "${CPUINFO_BINARY_DIR}/install")
-  set(CPUINFO_LIBRARY
-      "${CPUINFO_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libcpuinfo.a"
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+  set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
+  set(CPUINFO_BUILD_TOOLS
+      OFF
+      CACHE BOOL ""
   )
-
-  get_extra_cmake_args_for_external_project(CPUINFO_EXTRA_CMAKE_ARGS)
-  ExternalProject_Add(
-    cpuinfoExternalProject
-    SOURCE_DIR "${CPUINFO_SOURCE_DIR}"
-    BINARY_DIR "${CPUINFO_BINARY_DIR}"
-    INSTALL_DIR "${CPUINFO_INSTALL_DIR}"
-    BUILD_BYPRODUCTS "${CPUINFO_LIBRARY}"
-    CMAKE_ARGS "${CPUINFO_EXTRA_CMAKE_ARGS}"
-               -D
-               CMAKE_POSITION_INDEPENDENT_CODE=ON
-               -D
-               CPUINFO_BUILD_TOOLS=OFF
-               -D
-               CPUINFO_BUILD_UNIT_TESTS=OFF
-               -D
-               CPUINFO_BUILD_MOCK_TESTS=OFF
-               -D
-               CPUINFO_BUILD_BENCHMARKS=OFF
-               -D
-               CPUINFO_LIBRARY_TYPE=static
-               -D
-               CPUINFO_LOG_LEVEL=error
-               -D
-               CMAKE_INSTALL_PREFIX=<INSTALL_DIR>
-               -D
-               CLOG_SOURCE_DIR="${CPUINFO_SOURCE_DIR}/deps/clog"
+  set(CPUINFO_BUILD_UNIT_TESTS
+      OFF
+      CACHE BOOL ""
+  )
+  set(CPUINFO_BUILD_MOCK_TESTS
+      OFF
+      CACHE BOOL ""
+  )
+  set(CPUINFO_BUILD_BENCHMARKS
+      OFF
+      CACHE BOOL ""
+  )
+  set(CPUINFO_LIBRARY_TYPE
+      "static"
+      CACHE STRING ""
   )
-  add_library(cpuinfo STATIC IMPORTED)
-  set_property(TARGET cpuinfo PROPERTY IMPORTED_LOCATION "${CPUINFO_LIBRARY}")
-  add_dependencies(cpuinfo cpuinfoExternalProject)
-  # Trailing slash matters here! "Move everything from the temporary pthreadpool
-  # install directory to the proper install directory."
-  install(DIRECTORY "${CPUINFO_INSTALL_DIR}/"
-          DESTINATION "${CMAKE_INSTALL_PREFIX}"
+  set(CPUINFO_LOG_LEVEL
+      "error"
+      CACHE STRING ""
+  )
+  set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
+  add_subdirectory("${CPUINFO_SOURCE_DIR}")
+  set(CMAKE_POSITION_INDEPENDENT_CODE
+      ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG}
   )
 endif()
 
 if(EXECUTORCH_BUILD_PTHREADPOOL)
   # --- pthreadpool
-  set(PTHREADPOOL_SOURCE_DIR
-      "${CMAKE_CURRENT_LIST_DIR}/backends/xnnpack/third-party/pthreadpool"
+  set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
+      ${CMAKE_POSITION_INDEPENDENT_CODE}
   )
-  include(ExternalProject)
-  include(GNUInstallDirs)
-  set(PTHREADPOOL_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/pthreadpool")
-  set(PTHREADPOOL_INSTALL_DIR "${PTHREADPOOL_BINARY_DIR}/install")
-  set(PTHREADPOOL_LIBRARY
-      "${PTHREADPOOL_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libpthreadpool.a"
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+  set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
+  set(PTHREADPOOL_BUILD_TESTS
+      OFF
+      CACHE BOOL ""
+  )
+  set(PTHREADPOOL_BUILD_BENCHMARKS
+      OFF
+      CACHE BOOL ""
+  )
+  set(PTHREADPOOL_LIBRARY_TYPE
+      "static"
+      CACHE STRING ""
+  )
+  set(PTHREADPOOL_ALLOW_DEPRECATED_API
+      ON
+      CACHE BOOL ""
   )
-  get_extra_cmake_args_for_external_project(PTHREADPOOL_EXTRA_CMAKE_ARGS)
   if(APPLE)
-    list(APPEND PTHREADPOOL_EXTRA_CMAKE_ARGS -D
-         PTHREADPOOL_SYNC_PRIMITIVE=condvar
+    set(PTHREADPOOL_SYNC_PRIMITIVE
+        "condvar"
+        CACHE STRING ""
     )
   endif()
-  ExternalProject_Add(
-    pthreadpoolExternalProject
-    SOURCE_DIR "${PTHREADPOOL_SOURCE_DIR}"
-    BINARY_DIR "${PTHREADPOOL_BINARY_DIR}"
-    INSTALL_DIR "${PTHREADPOOL_INSTALL_DIR}"
-    BUILD_BYPRODUCTS "${PTHREADPOOL_LIBRARY}"
-    CMAKE_ARGS ${PTHREADPOOL_EXTRA_CMAKE_ARGS}
-               -D
-               PTHREADPOOL_BUILD_TESTS=OFF
-               -D
-               PTHREADPOOL_BUILD_BENCHMARKS=OFF
-               -D
-               PTHREADPOOL_LIBRARY_TYPE=static
-               -D
-               PTHREADPOOL_ALLOW_DEPRECATED_API=ON
-               -D
-               CMAKE_POSITION_INDEPENDENT_CODE=ON
-               -D
-               CMAKE_INSTALL_PREFIX=<INSTALL_DIR>
-  )
-  add_library(pthreadpool STATIC IMPORTED)
-  set_property(
-    TARGET pthreadpool PROPERTY IMPORTED_LOCATION "${PTHREADPOOL_LIBRARY}"
-  )
-  add_dependencies(pthreadpool pthreadpoolExternalProject)
-  # Trailing slash matters here! "Move everything from the temporary pthreadpool
-  # install directory to the proper install directory."
-  install(DIRECTORY "${PTHREADPOOL_INSTALL_DIR}/"
-          DESTINATION "${CMAKE_INSTALL_PREFIX}"
+  add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
+  set(CMAKE_POSITION_INDEPENDENT_CODE
+      ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG}
   )
 endif()
 
@@ -585,6 +562,10 @@ if(EXECUTORCH_BUILD_EXTENSION_LLM)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/tokenizers)
 endif()
 
+if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
+endif()
+
 if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
 endif()
@@ -756,10 +737,7 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
     endif()
 
     set(CMAKE_EXECUTABLE_SUFFIX ".html")
-    target_link_options(
-      executor_runner PUBLIC -sALLOW_MEMORY_GROWTH --embed-file
-      "${WASM_MODEL_DIR}@/"
-    )
+    target_link_options(executor_runner PUBLIC -sALLOW_MEMORY_GROWTH --embed-file "${WASM_MODEL_DIR}@/")
   endif()
 endif()
 
 
@@ -69,6 +69,11 @@ let products = deliverables([
       "c++",
     ],
   ],
+  "executorch_llm": [
+    "targets": [
+      "executorch",
+    ],
+  ],
   "kernels_llm": [:],
   "kernels_optimized": [
     "frameworks": [
 
@@ -365,7 +365,7 @@ def preprocess_model(
 
         match model_type:
             case CoreMLBackend.MODEL_TYPE.COMPILED_MODEL:
-                shutil.rmtree(str(model_path.resolve()))
+                shutil.rmtree(str(model_path.resolve()), ignore_errors=True)
                 model_path = model_dir_path / MODEL_PATHS.COMPILED_MODEL.value
                 compiled_model_path = mlmodel.get_compiled_model_path()
                 shutil.move(
@@ -396,7 +396,7 @@ def preprocess_model(
                 for key, value in model_debug_info.debugSymbolToHandles.items()
             }
 
-        shutil.rmtree(str(dir_path.resolve()))
+        shutil.rmtree(str(dir_path.resolve()), ignore_errors=True)
         return PreprocessResult(
             processed_bytes=processed_bytes,
             debug_handle_map=debug_handle_map,
 
@@ -28,12 +28,21 @@
 
 class OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
     def __init__(
-        self, skip_ops_for_coreml_delegation: Optional[List[str]] = None
+        self,
+        skip_ops_for_coreml_delegation: Optional[List[str]] = None,
+        lower_full_graph: bool = False,
     ) -> None:
         if skip_ops_for_coreml_delegation is None:
             skip_ops_for_coreml_delegation = []
         super().__init__()
         self.skip_ops_for_coreml_delegation = skip_ops_for_coreml_delegation
+        self.lower_full_graph = lower_full_graph
+        self._logged_msgs = set()
+
+    def log_once(self, msg: str) -> None:
+        if msg not in self._logged_msgs:
+            logging.info(msg)
+            self._logged_msgs.add(msg)
 
     def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
         # get_attr node can always be supported on any backend
@@ -44,14 +53,63 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
             # skip ops if specified by user
             node_target_name = getattr(node.target, "__name__", "").lower()
             if node_target_name in (self.skip_ops_for_coreml_delegation or []):
+                self.log_once(
+                    "Skipping op for CoreML delegation because it is in skip_ops_for_coreml_delegation: "
+                    + node_target_name
+                )
+                assert (
+                    not self.lower_full_graph
+                ), "Cannot have skip_ops_for_coreml_delegation when lower_full_graph is True"
                 return False
+
+            # TODO: enable this after bugs in ExecuTorch's partitioner are fixed
+            # # If lower_full_graph=False, do not partition nodes with symbolic args because it can result in symbolic args
+            # # in the placeholders due to partitioning, which CoreML does not support
+            # if not self.lower_full_graph and any(
+            #     isinstance(arg, torch.fx.Node)
+            #     and isinstance(
+            #         arg.meta.get("val", None),
+            #         (torch.SymInt, torch.SymBool, torch.SymFloat),
+            #     )
+            #     for arg in node.args
+            # ):
+            #     self.log_once(
+            #         "Skipping op for CoreML delegation because it contains symbolic args: "
+            #         + node_target_name
+            #     )
+            #     assert not self.lower_full_graph
+            #     return False
+
             # query coremltools to see if node is supported
-            return ct.converters.mil.frontend.torch.is_torch_fx_node_supported(node)
+            is_supported = ct.converters.mil.frontend.torch.is_torch_fx_node_supported(
+                node
+            )
+            if not is_supported:
+                if self.lower_full_graph:
+                    raise NotImplementedError(
+                        f"""CoreML does not support the op {node_target_name}, but you have set lower_full_graph=True in the CoreMLPartitioner.
+
+Please set lower_full_graph=False in the CoreMLPartitioner to allow running unsupported ops outside of CoreML.  Note that setting lower_full_graph=False may affect performance of CoreML and the available features.
+As an alternative to setting lower_full_graph=False, you can try rewriting your model to avoid using this op.
+
+Also consider filing an issue with Apple's coremltools repo to request support for the op: https://github.com/apple/coremltools/issues
+Do not file an issue with ExecuTorch for op support.
+"""
+                    )
+                self.log_once(
+                    "Skipping op for CoreML delegation because it is not supported by CoreML: "
+                    + node_target_name
+                )
+            return is_supported
         # cowardly refuse to support all other types of node:
         # 1. placeholder / output nodes should not be tagged
         #    reference: https://github.com/pytorch/executorch/pull/1398
         # 2. call_module / call_method should have been replaced with call_function?
         else:
+            self.log_once(
+                "Skipping op for CoreML delegation because it is not get_attr or call_function: "
+                + node.op
+            )
             return False
 
 
@@ -62,6 +120,8 @@ def __init__(
         skip_ops_for_coreml_delegation: Optional[List[str]] = None,
         compile_specs: Optional[List[CompileSpec]] = None,
         take_over_mutable_buffer: Optional[bool] = True,
+        lower_full_graph: bool = False,
+        take_over_constant_data: bool = True,
     ) -> None:
         if skip_ops_for_coreml_delegation is None:
             skip_ops_for_coreml_delegation = []
@@ -71,6 +131,20 @@ def __init__(
             compile_specs=compile_specs if compile_specs is not None else [],
         )
         self.take_over_mutable_buffer = take_over_mutable_buffer
+        self.lower_full_graph = lower_full_graph
+        self.take_over_constant_data = take_over_constant_data
+        self._logged_msgs = set()
+
+        if self.lower_full_graph:
+            assert (
+                len(self.skip_ops_for_coreml_delegation) == 0
+            ), "When lower_full_graph=True, you cannot set skip_ops_for_coreml_delegation"
+            assert (
+                self.take_over_constant_data
+            ), "When lower_full_graph=True, you must set take_over_constant_data=True"
+            assert (
+                self.take_over_mutable_buffer
+            ), "When lower_full_graph=True, you must set take_over_mutable_buffer=True"
 
     def partition(self, exported_program: ExportedProgram) -> PartitionResult:
         # Run the CapabilityBasedPartitioner to return the largest possible
@@ -80,7 +154,9 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
 
         capability_partitioner = CapabilityBasedPartitioner(
             exported_program.graph_module,
-            OperatorsSupportedForCoreMLBackend(self.skip_ops_for_coreml_delegation),
+            OperatorsSupportedForCoreMLBackend(
+                self.skip_ops_for_coreml_delegation, self.lower_full_graph
+            ),
             allows_single_node_partition=True,
         )
         partition_list = capability_partitioner.propose_partitions()
@@ -90,7 +166,8 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
                 node.meta["delegation_tag"] = tag
                 partition_tags[tag] = self.delegation_spec
 
-        tag_constant_data(exported_program)
+        if self.take_over_constant_data:
+            tag_constant_data(exported_program)
         if self.take_over_mutable_buffer:
             logger.info(
                 "Core ML partitioner will take over torch mutable buffer as Core ML state, "
@@ -105,12 +182,18 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
             tagged_exported_program=exported_program, partition_tags=partition_tags
         )
 
+    def log_once(self, msg: str) -> None:
+        if msg not in self._logged_msgs:
+            logging.info(msg)
+            self._logged_msgs.add(msg)
+
     def ops_to_not_decompose(
         self, ep: ExportedProgram
     ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
         do_not_decompose = []
-        op_support = OperatorsSupportedForCoreMLBackend()
-        _logged_warnings = set()
+        op_support = OperatorsSupportedForCoreMLBackend(
+            self.skip_ops_for_coreml_delegation, self.lower_full_graph
+        )
 
         # CoreML prevents certain ops (like triu) from lowering to CoreML when put in the ExecuTorch op namespace
         # TODO: upstream fixes, but pending ET consuming a new published version of coremltools with the
@@ -134,9 +217,7 @@ def ops_to_not_decompose(
                 except Exception as e:
                     # CoreML's op_support.is_node_supported will sometimes throw
                     # for unsupported ops, rather than returning False
-                    warn_str = f"Encountered exception when checking node support: {e}"
-                    if warn_str not in _logged_warnings:
-                        logger.warning(warn_str)
-                        _logged_warnings.add(warn_str)
-
+                    self.log_once(
+                        f"Encountered exception when checking node support, treating node as unsupported: {e}"
+                    )
         return do_not_decompose, None
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-90f1e7bed15ca5e48c61c5b6dc5ad4810524f82f`
	`1`	`+ab43fe4bdf5ccd82897f0e982c451a0127bd175e`