Fix stories model export (#3622)

SS-JIA · facebook-github-bot · commit d34527848c11 · 2024-05-15T14:06:11.000-07:00
Summary: Pull Request resolved: #3622 ## Context WIth the amount of newly added operators, exporting the Stories model via the `export_llama` script is currently broken. This changeset mainly upgrades `VulkanPartitioner` to allow more fine grained control over which nodes are partitioned: 1. Update how operators are listed and allow specifying features for specific operators 2. Check node arguments and output to see that they are valid before marking them as supported. 3. Allow partitioning to select only operators that support dynamic shapes ghstack-source-id: 226482740 Reviewed By: copyrightly, jorgep31415 Differential Revision: D57385871 fbshipit-source-id: 0ac6f3c1394541d2a5b3db9a443cca4b48dd89eb
diff --git a/backends/vulkan/partitioner/TARGETS b/backends/vulkan/partitioner/TARGETS
@@ -5,6 +5,7 @@ oncall("executorch")
 runtime.python_library(
     name = "vulkan_partitioner",
     srcs = [
+        "supported_ops.py",
         "vulkan_partitioner.py",
     ],
     visibility = [
diff --git a/backends/vulkan/partitioner/supported_ops.py b/backends/vulkan/partitioner/supported_ops.py
@@ -0,0 +1,148 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import operator
+
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+class OpFeatures:
+    __slots__ = ["supports_texture", "supports_buffer", "supports_dynamic_shape"]
+
+    def __init__(
+        self,
+        supports_dynamic_shape: bool = False,
+        supports_buffer: bool = False,
+        supports_texture: bool = True,
+    ):
+        self.supports_dynamic_shape = supports_dynamic_shape
+        self.supports_texture = supports_texture
+        self.supports_buffer = supports_buffer
+
+
+class OpList:
+    def __init__(self):
+        self._ops = {}
+
+    def __getitem__(self, op):
+        if op not in self._ops:
+            self._ops[op] = OpFeatures()
+        return self._ops[op]
+
+    def __contains__(self, op):
+        return op in self._ops
+
+
+PRIM_OPS = [
+    operator.getitem,
+]
+
+BINARY_OPS = [
+    exir_ops.edge.aten.add.Tensor,
+    exir_ops.edge.aten.sub.Tensor,
+    exir_ops.edge.aten.mul.Tensor,
+    exir_ops.edge.aten.div.Tensor,
+    exir_ops.edge.aten.div.Tensor_mode,
+    exir_ops.edge.aten.pow.Tensor_Tensor,
+]
+
+UNARY_OPS = [
+    exir_ops.edge.aten.abs.default,
+    exir_ops.edge.aten.clamp.default,
+    exir_ops.edge.aten.gelu.default,
+    exir_ops.edge.aten.hardtanh.default,
+    exir_ops.edge.aten.relu.default,
+    exir_ops.edge.aten.sigmoid.default,
+    exir_ops.edge.aten.sqrt.default,
+    exir_ops.edge.aten.tanh.default,
+]
+
+MATMUL_OPS = [
+    exir_ops.edge.aten.bmm.default,
+    exir_ops.edge.aten.mm.default,
+    exir_ops.edge.aten.addmm.default,
+    exir_ops.edge.aten.linear.default,
+]
+
+POOLING_OPS = [
+    exir_ops.edge.aten.max_pool2d_with_indices.default,
+]
+
+CONVOLUTION_OPS = [
+    exir_ops.edge.aten.convolution.default,
+]
+
+REDUCTION_OPS = [
+    exir_ops.edge.aten.sum.dim_IntList,
+    exir_ops.edge.aten._softmax.default,
+    exir_ops.edge.aten._log_softmax.default,
+]
+
+NORMALIZATION_OPS = [
+    exir_ops.edge.aten.native_layer_norm.default,
+]
+
+SHAPE_MANIPULATION_OPS = [
+    exir_ops.edge.aten.unsqueeze_copy.default,
+    exir_ops.edge.aten.view_copy.default,
+    exir_ops.edge.aten.permute_copy.default,
+    exir_ops.edge.aten.t_copy.default,
+]
+
+INDEXING_OPS = [
+    exir_ops.edge.aten.select_copy.int,
+    exir_ops.edge.aten.slice_copy.Tensor,
+]
+
+ORCHESTRATION_OPS = [
+    exir_ops.edge.aten.cat.default,
+    exir_ops.edge.aten.split_with_sizes_copy.default,
+    exir_ops.edge.aten.split.Tensor,
+    exir_ops.edge.aten.repeat.default,
+]
+
+CREATION_OPS = [
+    exir_ops.edge.aten.clone.default,
+    exir_ops.edge.aten.full.default,
+]
+
+
+def register_prim_ops(ops: OpList):
+    for op in PRIM_OPS:
+        ops[op].supports_texture = True
+        ops[op].supports_buffer = True
+        ops[op].supports_dynamic_shape = True
+
+
+def register_no_dynamic_shape_ops(ops: OpList):
+    for op in [
+        *REDUCTION_OPS,
+        *NORMALIZATION_OPS,
+        *SHAPE_MANIPULATION_OPS,
+        *INDEXING_OPS,
+        *ORCHESTRATION_OPS,
+        *CREATION_OPS,
+    ]:
+        ops[op].supports_dynamic_shape = False
+
+
+def register_dynamic_shape_ops(ops: OpList):
+    for op in [
+        *BINARY_OPS,
+        *UNARY_OPS,
+        *MATMUL_OPS,
+        *POOLING_OPS,
+        *CONVOLUTION_OPS,
+    ]:
+        ops[op].supports_dynamic_shape = True
+
+
+def enumerate_supported_ops():
+    ops = OpList()
+    register_prim_ops(ops)
+    register_no_dynamic_shape_ops(ops)
+    register_dynamic_shape_ops(ops)
+    return ops
diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py
@@ -4,12 +4,13 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import operator
 from typing import Any, Dict, final, List, Optional
 
 import executorch.backends.vulkan.serialization.vulkan_graph_schema as vk_graph_schema
 
 import torch
+
+from executorch.backends.vulkan.partitioner.supported_ops import enumerate_supported_ops
 from executorch.backends.vulkan.vulkan_preprocess import VulkanBackend
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.backend.partitioner import (
@@ -19,88 +20,113 @@
 )
 from executorch.exir.backend.utils import tag_constant_data
 from executorch.exir.dialects._ops import ops as exir_ops
+
+from torch._subclasses.fake_tensor import FakeTensor
 from torch.export.exported_program import ExportedProgram
 from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
 
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 
 class VulkanSupportedOperators(OperatorSupportBase):
-    def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
-        supported = node.op == "call_function" and node.target in [
-            # Binary arithmetic operators
-            exir_ops.edge.aten.add.Tensor,
-            exir_ops.edge.aten.sub.Tensor,
-            exir_ops.edge.aten.mul.Tensor,
-            exir_ops.edge.aten.div.Tensor,
-            exir_ops.edge.aten.div.Tensor_mode,
-            exir_ops.edge.aten.pow.Tensor_Tensor,
-            # Unary operators
-            exir_ops.edge.aten.abs.default,
-            exir_ops.edge.aten.clamp.default,
-            exir_ops.edge.aten.gelu.default,
-            exir_ops.edge.aten.hardtanh.default,
-            exir_ops.edge.aten.relu.default,
-            exir_ops.edge.aten.sigmoid.default,
-            exir_ops.edge.aten.sqrt.default,
-            exir_ops.edge.aten.tanh.default,
-            # Matrix multiplication operators
-            exir_ops.edge.aten.bmm.default,
+    _ops = enumerate_supported_ops()
+
+    def __init__(self, require_dynamic_shape: bool = False):
+        super().__init__()
+        self.require_dynamic_shapes = require_dynamic_shape
+
+    def node_val_is_compatible(self, node_val: Any) -> bool:
+        # Skip nodes that don't have a value
+        if node_val is None:
+            return True
+
+        # TODO(ssjia) support symbolic ints
+        if isinstance(node_val, torch.SymInt):
+            return False
+
+        if isinstance(node_val, FakeTensor):
+            # Vulkan currently only supports tensors of up to 4D
+            if len(node_val.shape) > 4:
+                return False
+
+        if isinstance(node_val, (list, tuple)):
+            for item in node_val:
+                if not self.node_val_is_compatible(item):
+                    return False
+
+        return True
+
+    def all_args_compatible(self, node: torch.fx.Node) -> bool:
+        node_val = node.meta.get("val", None)
+        if not self.node_val_is_compatible(node_val):
+            return False
+
+        for arg in node.args:
+            if not isinstance(arg, torch.fx.Node):
+                continue
+
+            arg_val = arg.meta.get("val", None)
+            if not self.node_val_is_compatible(arg_val):
+                return False
+
+        return True
+
+    def is_linear_permute(self, node: torch.fx.Node) -> bool:
+        if node.target not in [
+            exir_ops.edge.aten.t_copy.default,
+            exir_ops.edge.aten.permute_copy.default,
+        ]:
+            return False
+
+        if len(node.users) != 1:
+            return False
+
+        if list(node.users.keys())[0].target in [
             exir_ops.edge.aten.mm.default,
             exir_ops.edge.aten.addmm.default,
-            # Pooling operators
-            exir_ops.edge.aten.max_pool2d_with_indices.default,
-            # Sum
-            exir_ops.edge.aten.sum.dim_IntList,
-            # Convolution operators
-            exir_ops.edge.aten.convolution.default,
-            # Normalization
-            exir_ops.edge.aten.native_layer_norm.default,
-            # Shape-related operators
-            exir_ops.edge.aten.select_copy.int,
-            exir_ops.edge.aten.unsqueeze_copy.default,
-            exir_ops.edge.aten.view_copy.default,
-            # Copy-releated operators
-            exir_ops.edge.aten.permute_copy.default,
-            exir_ops.edge.aten.clone.default,
-            exir_ops.edge.aten.cat.default,
-            exir_ops.edge.aten.split_with_sizes_copy.default,
-            exir_ops.edge.aten.split.Tensor,
-            exir_ops.edge.aten.slice_copy.Tensor,
-            exir_ops.edge.aten.repeat.default,
-            # Softmax
-            exir_ops.edge.aten._softmax.default,
-            exir_ops.edge.aten._log_softmax.default,
-            # Other
-            operator.getitem,
-            exir_ops.edge.aten.full.default,
-        ]
-        return supported
-
-
-def parse_compile_options(
-    compile_options: Optional[Dict[str, Any]] = None
-) -> List[CompileSpec]:
+        ]:
+            return True
+
+        return False
+
+    def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
+        if self.is_linear_permute(node):
+            return True
+
+        if node.target not in VulkanSupportedOperators._ops:
+            return False
+
+        features = VulkanSupportedOperators._ops[node.target]
+
+        if self.require_dynamic_shapes and not features.supports_dynamic_shape:
+            return False
+
+        return self.all_args_compatible(node)
+
+
+def parse_compile_options(compile_options: Dict[str, Any]) -> List[CompileSpec]:
     compile_specs = []
-    if compile_options is None:
-        return compile_specs
 
     for key, value in compile_options.items():
         if isinstance(
             value, (vk_graph_schema.VkStorageType, vk_graph_schema.VkMemoryLayout)
         ):
             value_bytes = int(value).to_bytes(4, byteorder="little")
             compile_specs.append(CompileSpec(key, value_bytes))
-        else:
-            raise RuntimeError(f"Invalid compile option {key} with type {type(value)}")
+
+        # Unhandled options are ignored
 
     return compile_specs
 
 
 @final
 class VulkanPartitioner(Partitioner):
     def __init__(self, compile_options: Optional[Dict[str, Any]] = None) -> None:
-        compile_spec = parse_compile_options(compile_options)
+        self.options: Dict[str, Any] = {}
+        if compile_options is not None:
+            self.options = compile_options
+
+        compile_spec = parse_compile_options(self.options)
         self.delegation_spec = DelegationSpec(VulkanBackend.__name__, compile_spec)
 
     def partition(self, exported_program: ExportedProgram) -> PartitionResult:
@@ -110,7 +136,7 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
 
         capability_partitioner = CapabilityBasedPartitioner(
             exported_program.graph_module,
-            VulkanSupportedOperators(),
+            VulkanSupportedOperators(self.options.get("require_dynamic_shapes", False)),
             allows_single_node_partition=True,
         )
         partition_list = capability_partitioner.propose_partitions()
diff --git a/backends/vulkan/serialization/vulkan_graph_builder.py b/backends/vulkan/serialization/vulkan_graph_builder.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 import operator
 from types import NoneType
 from typing import cast, List, Optional, Union
@@ -36,6 +37,9 @@ def __init__(self, program: ExportedProgram) -> None:
         # Mapping from Node to VkValue id
         self.node_to_value_ids = {}
 
+        # For logging
+        self.seen_ops = set()
+
     @staticmethod
     def get_vk_datatype(torch_dtype: torch.dtype) -> vk_graph_schema.VkDataType:
         if torch_dtype == torch.bool:
@@ -230,6 +234,7 @@ def get_or_create_value_for(self, arg: _Argument):
             or isinstance(arg, torch.device)
             or isinstance(arg, torch.dtype)
             or isinstance(arg, torch.layout)
+            or isinstance(arg, torch.memory_format)
         ):
             return self.create_null_value()
         elif isinstance(arg, _ScalarType):
@@ -271,6 +276,8 @@ def process_getitem_node(self, node: Node) -> None:
     def process_call_function_node(self, node) -> None:
         operator_call_args = []
 
+        self.seen_ops.add(node.target)
+
         for i, schema_arg in enumerate(node.target._schema.arguments):
             if not schema_arg.kwarg_only and i < len(node.args):
                 function_arg = node.args[i]
@@ -325,6 +332,10 @@ def build_graph(self) -> vk_graph_schema.VkGraph:
         for node in self.program.graph_module.graph.nodes:
             self.process_node(node)
 
+        logging.info("Operators included in this Vulkan partition: ")
+        for op in self.seen_ops:
+            logging.info(f"    {op.__name__}")
+
         return vk_graph_schema.VkGraph(
             version="0",
             chain=self.chain,
diff --git a/examples/models/llama2/lib/partitioner_lib.py b/examples/models/llama2/lib/partitioner_lib.py