pytorch
diff --git a/‎.github/workflows/apple-perf.yml
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/apple-perf.yml
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/apple.yml
Lines changed: 2 additions & 4 deletions b/‎.github/workflows/apple.yml
Lines changed: 2 additions & 4 deletions
diff --git a/‎backends/qualcomm/_passes/__init__.py
Lines changed: 4 additions & 0 deletions b/‎backends/qualcomm/_passes/__init__.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py
Lines changed: 45 additions & 0 deletions b/‎backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py
Lines changed: 45 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/annotate_quant_attrs.py
Lines changed: 2 additions & 1 deletion b/‎backends/qualcomm/_passes/annotate_quant_attrs.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/qualcomm/_passes/annotate_stack.py
Lines changed: 2 additions & 1 deletion b/‎backends/qualcomm/_passes/annotate_stack.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/qualcomm/_passes/annotate_unbind.py
Lines changed: 2 additions & 1 deletion b/‎backends/qualcomm/_passes/annotate_unbind.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/qualcomm/_passes/decompose_roll.py
Lines changed: 93 additions & 0 deletions b/‎backends/qualcomm/_passes/decompose_roll.py
Lines changed: 93 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/expand_broadcast_tensor_shape.py
Lines changed: 1 addition & 2 deletions b/‎backends/qualcomm/_passes/expand_broadcast_tensor_shape.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/qualcomm/_passes/fold_qdq.py
Lines changed: 1 addition & 2 deletions b/‎backends/qualcomm/_passes/fold_qdq.py
Lines changed: 1 addition & 2 deletions
@@ -386,8 +386,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Build ExecuTorch iOS frameworks"
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-          scripts/build_apple_frameworks.sh --Release --Debug --coreml --custom --mps --optimized --portable --quantized --xnnpack
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output scripts/build_apple_frameworks.sh
         echo "::endgroup::"
 
         # NB: Although exported models can be copied to this directory and bundled together with the
 
@@ -173,8 +173,7 @@ jobs:
         backends/apple/mps/install_requirements.sh
 
         # Build iOS Frameworks
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-        scripts/build_apple_frameworks.sh --Release --Debug --coreml --custom --mps --optimized --portable --quantized --xnnpack
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output scripts/build_apple_frameworks.sh
 
         # Bundle iOS Frameworks
         for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
@@ -314,8 +313,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Build ExecuTorch iOS frameworks"
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-          scripts/build_apple_frameworks.sh --Release --Debug --coreml --custom --mps --optimized --portable --quantized --xnnpack
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output scripts/build_apple_frameworks.sh
         echo "::endgroup::"
 
         echo "::group::Build ExecuTorch benchmark app"
 
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from .annotate_adaptive_avg_pool1d import AnnotateAdaptiveAvgPool1D
 from .annotate_quant_attrs import AnnotateQuantAttrs
 from .annotate_stack import AnnotateStack
 from .annotate_unbind import AnnotateUnbind
@@ -16,6 +17,7 @@
 from .decompose_einsum import DecomposeEinsum
 from .decompose_expm1 import DecomposeExpM1
 from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
+from .decompose_roll import DecomposeRoll
 from .decompose_silu import DecomposeSilu
 from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
 from .fixed_linear_keep_dim import FixedLinearKeepDim
@@ -39,6 +41,7 @@
 
 
 __all__ = [
+    AnnotateAdaptiveAvgPool1D,
     AnnotateQuantAttrs,
     AnnotateStack,
     AnnotateUnbind,
@@ -51,6 +54,7 @@
     DecomposeEinsum,
     DecomposeExpM1,
     DecomposeLinalgVectorNorm,
+    DecomposeRoll,
     DecomposeSilu,
     ExpandBroadcastTensorShape,
     FixedLinearKeepDim,
 
@@ -0,0 +1,45 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from executorch.backends.qualcomm.builders.node_visitor import q_ops
+from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
+
+from .utils import get_quant_attrs
+
+
+class AnnotateAdaptiveAvgPool1D(ExportPass):
+    """
+    Add "quant_attrs" to graph nodes' meta from the QDQ information
+    generated after quantization process.
+    adaptive_avg_pool1d got decomposed to unsqueeze -> adaptive_avg_pool2d -> squeeze
+    """
+
+    decomp_ops = [torch.ops.aten.adaptive_avg_pool2d.default]
+
+    def __init__(self, edge_program: torch.export.ExportedProgram):
+        super(AnnotateAdaptiveAvgPool1D, self).__init__()
+        self.edge_program = edge_program
+
+    def _annotate_adaptive_avg_pool1d(self, graph_module: torch.fx.GraphModule):
+        partitions = get_source_partitions(
+            graph_module.graph, [torch.ops.aten.adaptive_avg_pool1d.default]
+        )
+        for src_partitions in partitions.values():
+            for src_partition in src_partitions:
+                output = src_partition.output_nodes[0]
+                if (list(output.users)[0].target) in q_ops:
+                    quant_attrs = get_quant_attrs(
+                        self.edge_program, list(output.users)[0]
+                    )
+                    for n in src_partition.nodes:
+                        n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        self._annotate_adaptive_avg_pool1d(graph_module)
+        graph_module.recompile()
+        return PassResult(graph_module, True)
@@ -7,6 +7,7 @@
 from typing import Any, Dict
 
 import torch
+from executorch.backends.qualcomm.builders.node_visitor import dq_ops, q_ops
 from executorch.backends.qualcomm.builders.utils import get_parameter
 from executorch.backends.qualcomm.utils.constants import (
     QCOM_DTYPE,
@@ -20,7 +21,7 @@
 )
 from executorch.exir.pass_base import ExportPass, PassResult
 
-from .utils import dq_ops, get_quant_attrs, q_ops
+from .utils import get_quant_attrs
 
 
 class AnnotateQuantAttrs(ExportPass):
 
@@ -4,11 +4,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import torch
+from executorch.backends.qualcomm.builders.node_visitor import q_ops
 from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
-from .utils import get_quant_attrs, q_ops
+from .utils import get_quant_attrs
 
 
 class AnnotateStack(ExportPass):
 
@@ -4,11 +4,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import torch
+from executorch.backends.qualcomm.builders.node_visitor import dq_ops
 from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
-from .utils import dq_ops, get_quant_attrs
+from .utils import get_quant_attrs
 
 
 class AnnotateUnbind(ExportPass):
 
@@ -0,0 +1,93 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+
+from executorch.exir.pass_base import ExportPass, PassResult
+
+from .utils import copy_nn_module_stack
+
+
+class SliceCopy(torch.nn.Module):
+    def __init__(self, val_shape, shifts, dims):
+        super().__init__()
+        self.val_shape = val_shape
+        if dims[0] is None:
+            self.shifts = [shifts[0] % torch.numel(torch.tensor(val_shape))]
+        else:
+            self.shifts = [shift % val_shape[dim] for shift, dim in zip(shifts, dims)]
+        self.dims = dims
+
+    def forward(self, x):
+        if self.dims[0] is None:
+            y = x.flatten()
+            y = torch.cat((y[-self.shifts[0] :], y[: -self.shifts[0]]))
+            return y.view(self.val_shape)
+
+        for shift, dim in zip(self.shifts, self.dims):
+            x = torch.cat(
+                (
+                    x[(slice(None),) * dim + (slice(-shift, None),)],
+                    x[(slice(None),) * dim + (slice(0, -shift),)],
+                ),
+                dim=dim,
+            )
+        return x
+
+
+class DecomposeRoll(ExportPass):
+    """
+    Decompose roll into slice and cat.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        graph = graph_module.graph
+        for node in graph.nodes:
+            if "roll" in str(node.target):
+                input_node, shifts = node.args[0], node.args[1]
+                dims = node.args[2] if len(node.args) == 3 else None
+
+                # Normalize shifts and dims to lists
+                shifts = shifts if isinstance(shifts, (list, tuple)) else [shifts]
+                dims = dims if isinstance(dims, (list, tuple)) else [dims]
+
+                model = SliceCopy(input_node.meta["val"].shape, shifts, dims)
+                decomposed_module = torch.export.export(
+                    model, (input_node.meta["val"],), strict=True
+                ).module()
+
+                with graph.inserting_before(node):
+                    # remap is used to map original node values to new node values,
+                    # which ensures that reference to nodes are correctly updated in the new graph
+                    remap = {"x": input_node}
+
+                    for decomposed_node in decomposed_module.graph.nodes:
+                        copy_nn_module_stack(node, decomposed_node)
+                        # no need to copy existent 'output'
+                        if decomposed_node.op == "output":
+                            for user in node.users.copy():
+                                # remap
+                                user.replace_input_with(
+                                    node,
+                                    remap[decomposed_node.args[0][0]],
+                                )
+                        # no need to copy existent placeholders
+                        elif decomposed_node.op == "placeholder":
+                            # replace node map from string to graph node
+                            remap[decomposed_node] = remap.pop(decomposed_node.name)
+                        else:
+                            remap[decomposed_node] = graph.node_copy(
+                                decomposed_node,
+                                arg_transform=lambda x, remap=remap: remap[x],
+                            )
+
+                    graph.erase_node(node)
+
+        graph.eliminate_dead_code()
+        graph_module.recompile()
+        return PassResult(graph_module, True)
@@ -5,12 +5,11 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
+from executorch.backends.qualcomm.builders.node_visitor import dq_ops
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from executorch.exir.passes import dead_code_elimination_pass
 
-from .utils import dq_ops
-
 
 class ExpandBroadcastTensorShape(ExportPass):
     """
 
@@ -4,14 +4,13 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import torch
+from executorch.backends.qualcomm.builders.node_visitor import dq_ops, q_ops
 from executorch.backends.qualcomm.builders.utils import is_parameter
 from executorch.backends.qualcomm.utils.constants import QCOM_BYPASS_NODE
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from executorch.exir.passes import dead_code_elimination_pass
 
-from .utils import dq_ops, q_ops
-
 
 class FoldQDQ(ExportPass):
     """