pytorch
diff --git a/‎.ci/docker/common/install_java.sh
Lines changed: 12 additions & 0 deletions b/‎.ci/docker/common/install_java.sh
Lines changed: 12 additions & 0 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile
Lines changed: 4 additions & 0 deletions b/‎.ci/docker/ubuntu/Dockerfile
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/doc-build.yml
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/doc-build.yml
Lines changed: 6 additions & 0 deletions
diff --git a/‎CODEOWNERS
Lines changed: 8 additions & 8 deletions b/‎CODEOWNERS
Lines changed: 8 additions & 8 deletions
diff --git a/‎backends/apple/coreml/test/test_coreml_partitioner.py
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/test/test_coreml_partitioner.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/mps/runtime/MPSBackend.mm
Lines changed: 5 additions & 2 deletions b/‎backends/apple/mps/runtime/MPSBackend.mm
Lines changed: 5 additions & 2 deletions
diff --git a/‎backends/arm/runtime/EthosUBackend.cpp
Lines changed: 5 additions & 2 deletions b/‎backends/arm/runtime/EthosUBackend.cpp
Lines changed: 5 additions & 2 deletions
diff --git a/‎backends/cadence/aot/remove_ops.py
Lines changed: 66 additions & 0 deletions b/‎backends/cadence/aot/remove_ops.py
Lines changed: 66 additions & 0 deletions
diff --git a/‎backends/cadence/aot/tests/test_remove_ops_passes.py
Lines changed: 52 additions & 0 deletions b/‎backends/cadence/aot/tests/test_remove_ops_passes.py
Lines changed: 52 additions & 0 deletions
diff --git a/‎backends/mediatek/runtime/NeuronBackend.cpp
Lines changed: 6 additions & 2 deletions b/‎backends/mediatek/runtime/NeuronBackend.cpp
Lines changed: 6 additions & 2 deletions
@@ -0,0 +1,12 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+apt-get update
+
+apt-get install -y --no-install-recommends openjdk-17-jdk
@@ -30,6 +30,10 @@ ARG BUCK2_VERSION
 COPY ./common/install_buck.sh install_buck.sh
 RUN bash ./install_buck.sh && rm install_buck.sh
 
+# Install java
+COPY ./common/install_java.sh install_java.sh
+RUN bash ./install_java.sh && rm install_java.sh
+
 # Setup user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 
@@ -68,6 +68,12 @@ jobs:
         make html
         cd ..
 
+        # Build javadoc:
+        cd extension/android
+        ./gradlew javadoc
+        cp -rf build/docs/javadoc "${RUNNER_DOCS_DIR}"
+        cd ../..
+
         # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
         echo "GitHub Ref: ${GITHUB_REF}"
         if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
 
@@ -15,7 +15,7 @@
 /backends/vulkan @SS-JIA
 /backends/xnnpack @digantdesai @mcr229
 
-/build @GregoryComer @dbort @kirklandsign
+/build @GregoryComer @kirklandsign
 
 /codegen @larryliu0820 @lucylq
 
@@ -47,32 +47,32 @@
 /extension/apple @shoumikhin
 /extension/aten_util @JacobSzwejbka
 /extension/benchmark @tarun292
-/extension/data_loader @JacobSzwejbka @lucylq @dbort
-/extension/evalue_util @GregoryComer @dbort
+/extension/data_loader @JacobSzwejbka @lucylq
+/extension/evalue_util @GregoryComer
 /extension/export_util @kimishpatel
 /extension/flat_tensor @lucylq
 /extension/gguf_util @larryliu0820
 /extension/kernel_util @kimishpatel @manuelcandales
 /extension/llm @jackzhxng @iseeyuan @larryliu0820
-/extension/memory_allocator @JacobSzwejbka @dbort
+/extension/memory_allocator @JacobSzwejbka
 /extension/module @shoumikhin
 /extension/parallel @kimishpatel
 /extension/pybindings @JacobSzwejbka @larryliu0820
 /extension/pytree @JacobSzwejbka
-/extension/runner_util @dbort
+# /extension/runner_util @dbort
 /extension/tensor @shoumikhin
-/extension/testing_util @dbort
+# /extension/testing_util @dbort
 /extension/threadpool @kimishpatel
 /extension/training @JacobSzwejbka
 
 /kernels @manuelcandales
 
 /profiler @tarun292 @Gasoonjia
 
-/runtime @dbort @JacobSzwejbka @lucylq
+/runtime @JacobSzwejbka @lucylq
 /runtime/backend @cccclai
 
-/schema @dbort @JacobSzwejbka @lucylq
+/schema @JacobSzwejbka @lucylq
 
 /scripts @GregoryComer
 
 
@@ -117,7 +117,7 @@ def forward(self, q, k, v, mask):
         v = torch.randn(batch_size, n_heads, max_seq_length, embedding_dim)
         mask = torch.randn(seq_len, max_seq_length)
         example_inputs = (q, k, v, mask)
-        ep = torch.export.export(model, example_inputs)
+        ep = torch.export.export(model, example_inputs, strict=True)
         coreml_partitioner = CoreMLPartitioner()
 
         # Using to_edge_transform_and_lower, we expect SDPA will be preserved and show up in delegated graph
 
@@ -43,8 +43,11 @@ bool is_available() const override {
       BackendInitContext& context,
       FreeableBuffer* processed,
       ArrayRef<CompileSpec> compile_specs) const override {
-    auto executor = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
-        context.get_runtime_allocator(), mps::delegate::MPSExecutor);
+    auto executor = context.get_runtime_allocator()->allocateInstance<mps::delegate::MPSExecutor>();
+    if (executor == nullptr) {
+      return Error::MemoryAllocationFailed;
+    }
+
     // NOTE: Since we use placement new and since this type is not trivially
     // destructible, we must call the destructor manually in destroy().
     new (executor) mps::delegate::MPSExecutor;
 
@@ -120,8 +120,11 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
     }
 
     MemoryAllocator* allocator = context.get_runtime_allocator();
-    ExecutionHandle* handle =
-        ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle);
+    ExecutionHandle* handle = allocator->allocateInstance<ExecutionHandle>();
+    if (handle == nullptr) {
+      return Error::MemoryAllocationFailed;
+    }
+
     handle->processed = processed;
 
     // Return the same buffer we were passed - this data will be
 
@@ -807,6 +807,72 @@ def remove_branched(
                 user.replace_all_uses_with(node.args[0])
 
 
+class RemoveCatFromSliceCopyPass(ExportPass):
+    def _remove_unused_cat(self, graph_module: torch.fx.GraphModule) -> None:
+        slice_copy_nodes = [
+            node
+            for node in graph_module.graph.nodes
+            if node.target == exir_ops.edge.aten.slice_copy.Tensor
+        ]
+        for slice_copy_node in slice_copy_nodes:
+            slice_dim, start_idx, end_idx, step = 0, 0, float("inf"), 1
+            input_node, *other_args = slice_copy_node.args
+            if len(other_args) >= 1:
+                slice_dim = other_args[0]
+            if len(other_args) >= 2:
+                start_idx = other_args[1]
+            if len(other_args) >= 3:
+                end_idx = other_args[2]
+            if len(other_args) >= 4:
+                step = other_args[3]
+            if step != 1:
+                continue
+            slice_copy_dtype = slice_copy_node.meta["val"].dtype
+            if input_node.target != exir_ops.edge.aten.cat.default:
+                continue
+            cat_dtype = input_node.meta["val"].dtype
+            if slice_copy_dtype != cat_dtype:
+                continue
+            cat_dim = input_node.args[1:]
+            if len(cat_dim) == 0:
+                cat_dim = 0
+            if cat_dim != slice_dim:
+                continue
+            cat_output_shape = input_node.meta["val"].shape
+            start_idx = (
+                cat_output_shape[cat_dim] + start_idx if start_idx < 0 else start_idx
+            )
+            end_idx = (
+                cat_output_shape[cat_dim]
+                if end_idx > cat_output_shape[cat_dim]
+                else end_idx
+            )
+            base_idx = 0
+            cat_input_to_keep = None
+            for cat_input_node in input_node.args[0]:
+                cat_input_dtype = cat_input_node.meta["val"].dtype
+                if slice_copy_dtype != cat_input_dtype:
+                    continue
+                cat_input_shape = cat_input_node.meta["val"].shape
+
+                # check if the slice range overlaps with the cat range
+                if (
+                    base_idx <= start_idx
+                    and end_idx <= list(cat_input_shape)[cat_dim] + base_idx
+                ):
+                    cat_input_to_keep = cat_input_node
+                    break
+                base_idx += list(cat_input_shape)[cat_dim]
+            if cat_input_to_keep is not None:
+                slice_copy_node.replace_input_with(input_node, cat_input_to_keep)
+
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        self._remove_unused_cat(graph_module)
+        graph_module.recompile()
+        graph_module.graph.eliminate_dead_code()
+        return super().call(graph_module)
+
+
 # The following class consolidates functions to remove ops that are redundant
 # in Jarvis. Currently, each function in this class iterates over each node of
 # the graph module once. In future, we could consolidate them into a monolithic
 
@@ -22,6 +22,7 @@
 from executorch.backends.cadence.aot.remove_ops import (
     RemoveAliasCopyOpPass,
     RemoveBranchedQuantDequant,
+    RemoveCatFromSliceCopyPass,
     RemoveCloneOpPass,
     RemoveContiguousOpPass,
     RemoveDetachCopyPass,
@@ -741,3 +742,54 @@ def forward(self, x):
                 },
             )
         )
+
+    def test_remove_cat_from_slice_copy_all_removal(self) -> None:
+        class M(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x, y):
+                x1 = torch.cat((x, y), 0)  # (2, 4)
+                return torch.slice_copy(x1, dim=0, start=0, end=1)
+
+        inputs = tuple(torch.randn(2, 4) for _ in range(2))
+        graph_module = export_to_edge(M(), inputs).exported_program().graph_module
+        p = RemoveCatFromSliceCopyPass()
+        graph_module = cast(PassResult, p(graph_module)).graph_module
+
+        # Ensure both cat nodes were removed
+        self.assertEqual(count_node(graph_module, exir_ops.edge.aten.cat.default), 0)
+
+    def test_remove_cat_from_slice_copy_no_removal(self) -> None:
+        class M(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x, y):
+                x1 = torch.cat((x, y), 0)  # (2, 4)
+                return torch.slice_copy(x1, dim=0, start=0, end=3)
+
+        inputs = tuple(torch.randn(2, 4) for _ in range(2))
+        graph_module = export_to_edge(M(), inputs).exported_program().graph_module
+        p = RemoveCatFromSliceCopyPass()
+        graph_module = cast(PassResult, p(graph_module)).graph_module
+
+        # Ensure both cat nodes were removed
+        self.assertEqual(count_node(graph_module, exir_ops.edge.aten.cat.default), 1)
+
+    def test_remove_cat_from_slice_copy_zero_range(self) -> None:
+        class M(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x, y):
+                x1 = torch.cat((x, y), 0)  # (2, 4)
+                return torch.slice_copy(x1, dim=0, start=0, end=0)
+
+        inputs = tuple(torch.randn(2, 4) for _ in range(2))
+        graph_module = export_to_edge(M(), inputs).exported_program().graph_module
+        p = RemoveCatFromSliceCopyPass()
+        graph_module = cast(PassResult, p(graph_module)).graph_module
+
+        # Ensure both cat nodes were removed
+        self.assertEqual(count_node(graph_module, exir_ops.edge.aten.cat.default), 0)
@@ -68,8 +68,12 @@ Result<DelegateHandle*> NeuronBackend::init(
       processed->size());
 
   MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
-  NeuronExecuTorchDelegate* delegate = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
-      runtime_allocator, NeuronExecuTorchDelegate);
+  NeuronExecuTorchDelegate* delegate =
+      runtime_allocator->allocateInstance<NeuronExecuTorchDelegate>();
+  if (delegate == nullptr) {
+    return Error::MemoryAllocationFailed;
+  }
+
   new (delegate) NeuronExecuTorchDelegate();
 
   if (delegate == nullptr) {