fix nanobind differences (and others)

makslevental · makslevental · commit c5174ebd1e1a · 2024-12-25T02:49:46.000-05:00
diff --git a/examples/mwe.py b/examples/mwe.py
@@ -131,6 +131,8 @@ def pats():
     .finalize_memref_to_llvm()
     # Convert Func to LLVM (always needed).
     .convert_func_to_llvm()
+    .convert_arith_to_llvm()
+    .convert_cf_to_llvm()
     # Convert Index to LLVM (always needed).
     .convert_index_to_llvm()
     # Convert remaining unrealized_casts (always needed).
diff --git a/examples/vectorization_e2e.ipynb b/examples/vectorization_e2e.ipynb
@@ -424,6 +424,8 @@
     "    .finalize_memref_to_llvm()\n",
     "    # Convert Func to LLVM (always needed).\n",
     "    .convert_func_to_llvm()\n",
+    "    .convert_arith_to_llvm()\n",
+    "    .convert_cf_to_llvm()\n",
     "    # Convert Index to LLVM (always needed).\n",
     "    .convert_index_to_llvm()\n",
     "    # Convert remaining unrealized_casts (always needed).\n",
diff --git a/mlir/extras/dialects/ext/arith.py b/mlir/extras/dialects/ext/arith.py
@@ -7,6 +7,7 @@
 from typing import Optional, Tuple, Union
 
 from bytecode import ConcreteBytecode
+from einspect.structs import PyTypeObject
 
 from ...ast.canonicalize import StrictTransformer, Canonicalizer, BytecodePatcher
 from ...ast.util import ast_call
@@ -138,7 +139,13 @@ def index_cast(
     )
 
 
-class ArithValueMeta(type(Value)):
+nb_meta_cls = type(Value)
+
+_Py_TPFLAGS_BASETYPE = 1 << 10
+PyTypeObject.from_object(nb_meta_cls).tp_flags |= _Py_TPFLAGS_BASETYPE
+
+
+class ArithValueMeta(nb_meta_cls):
     """Metaclass that orchestrates the Python object protocol
     (i.e., calling __new__ and __init__) for Indexing dialect extension values
     (created using `mlir_value_subclass`).
diff --git a/mlir/extras/dialects/ext/memref.py b/mlir/extras/dialects/ext/memref.py
@@ -1,5 +1,6 @@
 import inspect
-from typing import Sequence, Union
+from functools import cached_property, reduce
+from typing import Sequence, Union, Tuple
 
 import numpy as np
 
@@ -129,7 +130,40 @@ def store(
 
 
 @register_value_caster(MemRefType.static_typeid)
-class MemRef(Value, ShapedValue):
+class MemRef(Value):
+    @cached_property
+    def literal_value(self) -> np.ndarray:
+        if not self.is_constant:
+            raise ValueError("Can't build literal from non-constant value")
+        return np.array(DenseElementsAttr(self.owner.opview.value), copy=False)
+
+    @cached_property
+    def _shaped_type(self) -> ShapedType:
+        return ShapedType(self.type)
+
+    def has_static_shape(self) -> bool:
+        return self._shaped_type.has_static_shape
+
+    def has_rank(self) -> bool:
+        return self._shaped_type.has_rank
+
+    @cached_property
+    def rank(self) -> int:
+        return self._shaped_type.rank
+
+    @cached_property
+    def shape(self) -> Tuple[int, ...]:
+        return tuple(self._shaped_type.shape)
+
+    @cached_property
+    def n_elements(self) -> int:
+        assert self.has_static_shape()
+        return reduce(lambda acc, v: acc * v, self._shaped_type.shape, 1)
+
+    @cached_property
+    def dtype(self) -> Type:
+        return self._shaped_type.element_type
+
     def __str__(self):
         return f"{self.__class__.__name__}({self.get_name()}, {self.type})"
 
diff --git a/mlir/extras/dialects/ext/tensor.py b/mlir/extras/dialects/ext/tensor.py
@@ -1,5 +1,6 @@
 import inspect
 from dataclasses import dataclass
+from functools import cached_property, reduce
 from typing import Any, List, Optional, Tuple, Union, Sequence
 
 # noinspection PyUnresolvedReferences
@@ -20,7 +21,7 @@
 from ....dialects.linalg.opdsl.lang.emitter import _is_index_type
 from ....dialects.tensor import *
 from ....dialects.transform.structured import _get_int_array_array_attr
-from ....ir import RankedTensorType, ShapedType, Type, Value
+from ....ir import RankedTensorType, ShapedType, Type, Value, DenseElementsAttr
 
 S = ShapedType.get_dynamic_size()
 
@@ -109,7 +110,40 @@ def insert_slice(
 
 # TODO(max): unify vector/memref/tensor
 @register_value_caster(RankedTensorType.static_typeid)
-class Tensor(ShapedValue, ArithValue):
+class Tensor(ArithValue):
+    @cached_property
+    def literal_value(self) -> np.ndarray:
+        if not self.is_constant:
+            raise ValueError("Can't build literal from non-constant value")
+        return np.array(DenseElementsAttr(self.owner.opview.value), copy=False)
+
+    @cached_property
+    def _shaped_type(self) -> ShapedType:
+        return ShapedType(self.type)
+
+    def has_static_shape(self) -> bool:
+        return self._shaped_type.has_static_shape
+
+    def has_rank(self) -> bool:
+        return self._shaped_type.has_rank
+
+    @cached_property
+    def rank(self) -> int:
+        return self._shaped_type.rank
+
+    @cached_property
+    def shape(self) -> Tuple[int, ...]:
+        return tuple(self._shaped_type.shape)
+
+    @cached_property
+    def n_elements(self) -> int:
+        assert self.has_static_shape()
+        return reduce(lambda acc, v: acc * v, self._shaped_type.shape, 1)
+
+    @cached_property
+    def dtype(self) -> Type:
+        return self._shaped_type.element_type
+
     def __getitem__(self, idx: tuple) -> "Tensor":
         loc = get_user_code_loc()
 
diff --git a/mlir/extras/dialects/ext/vector.py b/mlir/extras/dialects/ext/vector.py
@@ -1,5 +1,8 @@
 import inspect
-from typing import List
+from functools import cached_property, reduce
+from typing import List, Tuple, Type
+
+import numpy as np
 
 from ._shaped_value import ShapedValue
 from .arith import ArithValue, FastMathFlags, constant, Scalar
@@ -10,11 +13,45 @@
 # noinspection PyUnresolvedReferences
 from ....dialects.vector import *
 from ....extras import types as T
-from ....ir import AffineMap, VectorType, Value
+from ....ir import AffineMap, VectorType, Value, DenseElementsAttr, ShapedType
 
 
 @register_value_caster(VectorType.static_typeid)
-class Vector(ShapedValue, ArithValue):
+class Vector(ArithValue):
+
+    @cached_property
+    def literal_value(self) -> np.ndarray:
+        if not self.is_constant:
+            raise ValueError("Can't build literal from non-constant value")
+        return np.array(DenseElementsAttr(self.owner.opview.value), copy=False)
+
+    @cached_property
+    def _shaped_type(self) -> ShapedType:
+        return ShapedType(self.type)
+
+    def has_static_shape(self) -> bool:
+        return self._shaped_type.has_static_shape
+
+    def has_rank(self) -> bool:
+        return self._shaped_type.has_rank
+
+    @cached_property
+    def rank(self) -> int:
+        return self._shaped_type.rank
+
+    @cached_property
+    def shape(self) -> Tuple[int, ...]:
+        return tuple(self._shaped_type.shape)
+
+    @cached_property
+    def n_elements(self) -> int:
+        assert self.has_static_shape()
+        return reduce(lambda acc, v: acc * v, self._shaped_type.shape, 1)
+
+    @cached_property
+    def dtype(self) -> Type:
+        return self._shaped_type.element_type
+
     def __getitem__(self, idx: tuple) -> "Vector":
         loc = get_user_code_loc()
 
@@ -105,7 +142,7 @@ def transfer_read(
     if isinstance(padding, int):
         padding = constant(padding, type=source.type.element_type)
     if in_bounds is None:
-        in_bounds = [None] * len(permutation_map.results)
+        raise ValueError("in_bounds cannot be None")
 
     return _transfer_read(
         vector=vector_t,
diff --git a/mlir/extras/runtime/passes.py b/mlir/extras/runtime/passes.py
@@ -252,6 +252,11 @@ def affine_expand_index_ops(self):
         self.add_pass("affine-expand-index-ops")
         return self
 
+    def affine_expand_index_ops_as_affine(self):
+        """Lower affine operations operating on indices into affine.apply operations"""
+        self.add_pass("affine-expand-index-ops-as-affine")
+        return self
+
     def affine_loop_coalescing(self):
         """Coalesce nested loops with independent bounds into a single loop"""
         self.add_pass("affine-loop-coalescing")
@@ -1363,10 +1368,6 @@ def convert_func_to_llvm(
         returns are updated accordingly. Block argument types are updated to use
         LLVM IR types.
 
-        Note that until https://github.com/llvm/llvm-project/issues/70982 is resolved,
-        this pass includes patterns that lower `arith` and `cf` to LLVM. This is legacy
-        code due to when they were all converted in the same pass.
-
         Args:
             use-bare-ptr-memref-call-conv: Replace FuncOp's MemRef arguments with bare pointers to the MemRef element types
             index-bitwidth: Bitwidth of the index type, 0 to use size of machine word
@@ -1398,12 +1399,12 @@ def convert_gpu_launch_to_vulkan_launch(self):
         self.add_pass("convert-gpu-launch-to-vulkan-launch")
         return self
 
-    def convert_gpu_to_llvm_spv(self, index_bitwidth: int = None):
+    def convert_gpu_to_llvm_spv(self, use_64bit_index: bool = None):
         """Generate LLVM operations to be ingested by a SPIR-V backend for gpu operations
         Args:
-            index-bitwidth: Bitwidth of the index type, 0 to use size of machine word
+            use-64bit-index: Use 64-bit integers to convert index types
         """
-        self.add_pass("convert-gpu-to-llvm-spv", index_bitwidth=index_bitwidth)
+        self.add_pass("convert-gpu-to-llvm-spv", use_64bit_index=use_64bit_index)
         return self
 
     def convert_gpu_to_nvvm(
@@ -1597,6 +1598,20 @@ def convert_memref_to_spirv(
         )
         return self
 
+    def convert_mesh_to_mpi(self):
+        """Convert Mesh dialect to MPI dialect.
+
+        This pass converts communication operations from the Mesh dialect to the
+        MPI dialect.
+        If it finds a global named "static_mpi_rank" it will use that splat value
+        instead of calling MPI_Comm_rank. This allows optimizations like constant
+        shape propagation and fusion because shard/partition sizes depend on the
+        rank.
+
+        """
+        self.add_pass("convert-mesh-to-mpi")
+        return self
+
     def convert_nvgpu_to_nvvm(self):
         """Convert NVGPU dialect to NVVM dialect
 
@@ -1715,17 +1730,26 @@ def convert_tensor_to_spirv(self, emulate_lt_32_bit_scalar_types: bool = None):
         )
         return self
 
-    def convert_to_llvm(self, filter_dialects: List[str] = None):
+    def convert_to_llvm(self, filter_dialects: List[str] = None, dynamic: bool = None):
         """Convert to LLVM via dialect interfaces found in the input IR
 
         This is a generic pass to convert to LLVM, it uses the
         `ConvertToLLVMPatternInterface` dialect interface to delegate to dialects
         the injection of conversion patterns.
 
+        If `dynamic` is set to `true`, the pass will look for
+        `ConvertToLLVMAttrInterface` attributes and use them to further configure
+        the conversion process. This option also uses the `DataLayoutAnalysis`
+        analysis to configure the type converter. Enabling this option incurs in
+        extra overhead.
+
         Args:
             filter-dialects: Test conversion patterns of only the specified dialects
+            dynamic: Use op conversion attributes to configure the conversion
         """
-        self.add_pass("convert-to-llvm", filter_dialects=filter_dialects)
+        self.add_pass(
+            "convert-to-llvm", filter_dialects=filter_dialects, dynamic=dynamic
+        )
         return self
 
     def convert_to_spirv(
@@ -2082,23 +2106,6 @@ def finalize_memref_to_llvm(
         )
         return self
 
-    def finalizing_bufferize(self):
-        """Finalize a partial bufferization
-
-        A bufferize pass that finalizes a partial bufferization by removing
-        remaining `bufferization.to_tensor` and `bufferization.to_buffer` operations.
-
-        The removal of those operations is only possible if the operations only
-        exist in pairs, i.e., all uses of `bufferization.to_tensor` operations are
-        `bufferization.to_buffer` operations.
-
-        This pass will fail if not all operations can be removed or if any operation
-        with tensor typed operands remains.
-
-        """
-        self.add_pass("finalizing-bufferize")
-        return self
-
     def fold_memref_alias_ops(self):
         """Fold memref alias ops into consumer load/store ops
 
@@ -2201,6 +2208,7 @@ def gpu_module_to_binary(
         l: List[str] = None,
         opts: str = None,
         format: str = None,
+        section: str = None,
     ):
         """Transforms a GPU module into a GPU binary.
 
@@ -2219,9 +2227,15 @@ def gpu_module_to_binary(
             l: Extra files to link to.
             opts: Command line options to pass to the tools.
             format: The target representation of the compilation process.
+            section: ELF section where binary is to be located.
         """
         self.add_pass(
-            "gpu-module-to-binary", toolkit=toolkit, l=l, opts=opts, format=format
+            "gpu-module-to-binary",
+            toolkit=toolkit,
+            l=l,
+            opts=opts,
+            format=format,
+            section=section,
         )
         return self
 
@@ -2893,6 +2907,7 @@ def one_shot_bufferize(
         no_analysis_func_filter: List[str] = None,
         function_boundary_type_conversion: str = None,
         must_infer_memory_space: bool = None,
+        use_encoding_for_memory_space: bool = None,
         test_analysis_only: bool = None,
         print_conflicts: bool = None,
         unknown_type_conversion: str = None,
@@ -3017,6 +3032,7 @@ def one_shot_bufferize(
             no-analysis-func-filter: Skip analysis of functions with these symbol names.Set copyBeforeWrite to true when bufferizing them.
             function-boundary-type-conversion: Controls layout maps when bufferizing function signatures.
             must-infer-memory-space: The memory space of an memref types must always be inferred. If unset, a default memory space of 0 is used otherwise.
+            use-encoding-for-memory-space: Use the Tensor encoding attribute for the memory space. Exclusive to the 'must-infer-memory-space' option
             test-analysis-only: Test only: Only run inplaceability analysis and annotate IR
             print-conflicts: Test only: Annotate IR with RaW conflicts. Requires test-analysis-only.
             unknown-type-conversion: Controls layout maps for non-inferrable memref types.
@@ -3036,6 +3052,7 @@ def one_shot_bufferize(
             no_analysis_func_filter=no_analysis_func_filter,
             function_boundary_type_conversion=function_boundary_type_conversion,
             must_infer_memory_space=must_infer_memory_space,
+            use_encoding_for_memory_space=use_encoding_for_memory_space,
             test_analysis_only=test_analysis_only,
             print_conflicts=print_conflicts,
             unknown_type_conversion=unknown_type_conversion,
diff --git a/tests/test_async.py b/tests/test_async.py
@@ -67,6 +67,7 @@ def test_simple_parfor(ctx: MLIRContext, backend: LLVMJITBackend):
         .convert_arith_to_llvm()
         .finalize_memref_to_llvm()
         .convert_func_to_llvm()
+        .convert_cf_to_llvm()
         .reconcile_unrealized_casts(),
         generate_kernel_wrapper=True,
         generate_return_consumer=True,
diff --git a/tests/test_transform.py b/tests/test_transform.py
diff --git a/tests/test_vector.py b/tests/test_vector.py