makslevental
diff --git a/‎tests/test_arith.py‎
Lines changed: 2 additions & 6 deletions b/‎tests/test_arith.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎tests/test_async.py‎
Lines changed: 1 addition & 3 deletions b/‎tests/test_async.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎tests/test_func.py‎
Lines changed: 17 additions & 21 deletions b/‎tests/test_func.py‎
Lines changed: 17 additions & 21 deletions
diff --git a/‎tests/test_gpu.py‎
Lines changed: 26 additions & 29 deletions b/‎tests/test_gpu.py‎
Lines changed: 26 additions & 29 deletions
diff --git a/‎tests/test_linalg.py‎
Lines changed: 1 addition & 4 deletions b/‎tests/test_linalg.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎tests/test_llvm.py‎
Lines changed: 1 addition & 2 deletions b/‎tests/test_llvm.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎tests/test_memref.py‎
Lines changed: 6 additions & 7 deletions b/‎tests/test_memref.py‎
Lines changed: 6 additions & 7 deletions
@@ -1,13 +1,9 @@
-from textwrap import dedent
-
 import mlir.extras.types as T
 import pytest
 
 from mlir.extras.ast.canonicalize import canonicalize
 from mlir.extras.dialects.ext import arith
-from mlir.extras.dialects.ext.arith import Scalar
 from mlir.extras.dialects.ext.func import func
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.testing import (
     mlir_ctx as ctx,
@@ -50,8 +46,8 @@ def test_arithmetic(ctx: MLIRContext):
         one // two
     except ValueError as e:
         assert (
-            str(e)
-            == "floordiv not supported for lhs=Scalar(%cst = arith.constant 1.000000e+00 : f32)"
+                str(e)
+                == "floordiv not supported for lhs=Scalar(%cst = arith.constant 1.000000e+00 : f32)"
         )
     one % two
 
 
@@ -1,13 +1,11 @@
 import platform
-
-import numpy as np
 from textwrap import dedent
 
+import numpy as np
 import pytest
 
 from mlir.extras.runtime.passes import Pipeline
 from mlir.extras.runtime.refbackend import LLVMJITBackend
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.testing import mlir_ctx as ctx, filecheck, MLIRContext, backend
 
 
@@ -1,20 +1,17 @@
 import inspect
 import sys
 import threading
-from textwrap import dedent
 from typing import TypeVar
 
-import pytest
-
 import mlir.extras.types as T
+import pytest
+from mlir.ir import FunctionType
 
 from mlir.extras.ast.canonicalize import canonicalize
 from mlir.extras.context import mlir_mod_ctx, RAIIMLIRContextModule
+from mlir.extras.dialects.ext import linalg, arith, scf
 from mlir.extras.dialects.ext.arith import constant
 from mlir.extras.dialects.ext.func import func
-from mlir.extras.dialects.ext import linalg, arith, scf, memref
-from mlir.ir import FunctionType
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.testing import (
     mlir_ctx as ctx,
@@ -46,7 +43,8 @@ def demo_fun1():
 
 
 def test_declare_byte_rep(ctx: MLIRContext):
-    def demo_fun1(): ...
+    def demo_fun1():
+        ...
 
     if sys.version_info.minor == 13:
         assert demo_fun1.__code__.co_code == b"\x95\x00g\x00"
@@ -152,9 +150,9 @@ def foo1():
 
 @func(generics=list(map(TypeVar, ["M", "N"])))
 def matmul_i32_i32(
-    A: "T.memref(M, N, T.i32())",
-    B: "T.memref(M, N, T.i32())",
-    C: "T.memref(M, N, T.i32())",
+        A: "T.memref(M, N, T.i32())",
+        B: "T.memref(M, N, T.i32())",
+        C: "T.memref(M, N, T.i32())",
 ):
     linalg.matmul(A, B, C)
 
@@ -171,12 +169,11 @@ def test_func_no_context_2(ctx: MLIRContext):
 
 
 def test_generics_just_args(ctx: MLIRContext):
-
     @func(generics=generics)
     def mat_product_kernel(
-        A: "T.memref(M, K, dtype)",
-        B: "T.memref(K, N, dtype)",
-        C: "T.memref(M, N, dtype)",
+            A: "T.memref(M, K, dtype)",
+            B: "T.memref(K, N, dtype)",
+            C: "T.memref(M, N, dtype)",
     ):
         one = arith.constant(1.0, dtype)
 
@@ -195,9 +192,9 @@ def test_generics_closure(ctx: MLIRContext):
 
     @func(generics=generics)
     def mat_product_kernel(
-        A: "T.memref(M, K, dtype)",
-        B: "T.memref(K, N, dtype)",
-        C: "T.memref(M, N, dtype)",
+            A: "T.memref(M, K, dtype)",
+            B: "T.memref(K, N, dtype)",
+            C: "T.memref(M, N, dtype)",
     ):
         one = arith.constant(1, dtype)
 
@@ -212,15 +209,14 @@ def mat_product_kernel(
 
 
 def test_generics_with_canonicalizations(ctx: MLIRContext):
-
     generics = M, K, N, dtype = list(map(TypeVar, ["M", "K", "N", "dtype"]))
 
     @func(generics=generics)
     @canonicalize(using=(arith.canonicalizer, scf.canonicalizer))
     def mat_product_kernel(
-        A: "T.memref(M, K, dtype)",
-        B: "T.memref(K, N, dtype)",
-        C: "T.memref(M, N, dtype)",
+            A: "T.memref(M, K, dtype)",
+            B: "T.memref(K, N, dtype)",
+            C: "T.memref(M, N, dtype)",
     ):
         x = arith.constant(1, index=True)
         y = arith.constant(1, index=True)
 
@@ -17,7 +17,6 @@
 from mlir.extras.ast.canonicalize import canonicalize
 from mlir.extras.dialects.ext import arith, scf, memref, rocdl, gpu
 from mlir.extras.dialects.ext.func import func
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.dialects.ext.gpu import (
     all_reduce,
@@ -38,7 +37,6 @@
 from mlir.extras.dialects.ext.scf import forall, in_parallel_
 from mlir.extras.dialects.ext.vector import outer, load, shuffle, print_
 from mlir.extras.runtime.passes import run_pipeline, Pipeline
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.testing import (
     mlir_ctx as ctx,
@@ -78,10 +76,10 @@ def test_forall_insert_slice_no_region_with_for_with_gpu_mapping(ctx: MLIRContex
     alpha = arith.constant(1, T.f32())
 
     for i, j in forall(
-        [1, 1],
-        [2, 2],
-        [3, 3],
-        device_mapping=[thread("x"), thread("y")],
+            [1, 1],
+            [2, 2],
+            [3, 3],
+            device_mapping=[thread("x"), thread("y")],
     ):
         a = memref.load(x, (i, j))
         b = memref.load(y, (i, j))
@@ -119,9 +117,9 @@ class MyClass1(metaclass=GPUModuleMeta, targets=["#nvvm.target"]):
         @gpu_func(emit=True)
         @canonicalize(using=scf.canonicalizer)
         def mat_product_kernel(
-            A: T.memref(M, N, T.f32()),
-            B: T.memref(N, K, T.f32()),
-            C: T.memref(M, K, T.f32()),
+                A: T.memref(M, N, T.f32()),
+                B: T.memref(N, K, T.f32()),
+                C: T.memref(M, K, T.f32()),
         ):
             x = block_idx.x
             y = block_idx.y
@@ -156,9 +154,9 @@ class MyClass1(metaclass=GPUModuleMeta, targets=["#nvvm.target"]):
         @gpu_func(emit=True, emit_grid=True)
         @canonicalize(using=scf.canonicalizer)
         def mat_product_kernel(
-            A: T.memref(M, N, T.f32()),
-            B: T.memref(N, K, T.f32()),
-            C: T.memref(M, K, T.f32()),
+                A: T.memref(M, N, T.f32()),
+                B: T.memref(N, K, T.f32()),
+                C: T.memref(M, K, T.f32()),
         ):
             x = block_idx.x
             y = block_idx.y
@@ -214,9 +212,9 @@ class MyClass1(metaclass=GPUModuleMeta, targets=["#nvvm.target"]):
         @gpu_func(emit=True, emit_grid=True)
         @canonicalize(using=scf.canonicalizer)
         def mat_product_kernel(
-            A: T.memref(M, N, T.f32()),
-            B: T.memref(N, K, T.f32()),
-            C: T.memref(M, K, T.f32()),
+                A: T.memref(M, N, T.f32()),
+                B: T.memref(N, K, T.f32()),
+                C: T.memref(M, K, T.f32()),
         ):
             x = block_idx.x
             y = block_idx.y
@@ -283,9 +281,9 @@ class MyClass1(metaclass=GPUModuleMeta, targets=["#nvvm.target"]):
         @gpu_func(emit=True, emit_grid=True)
         @canonicalize(using=scf.canonicalizer)
         def mat_product_kernel(
-            A: T.memref(M, N, T.f32()),
-            B: T.memref(N, K, T.f32()),
-            C: T.memref(M, K, T.f32()),
+                A: T.memref(M, N, T.f32()),
+                B: T.memref(N, K, T.f32()),
+                C: T.memref(M, K, T.f32()),
         ):
             x = block_idx.x
             y = block_idx.y
@@ -349,7 +347,7 @@ def main():
         data = memref.alloc((2, 6), T.i32())
         sum = memref.alloc((2,), T.i32())
 
-        power_csts = [arith.constant(0)] + [arith.constant(2**i) for i in range(5)]
+        power_csts = [arith.constant(0)] + [arith.constant(2 ** i) for i in range(5)]
         odd_csts = [
             arith.constant(3),
             arith.constant(6),
@@ -440,7 +438,7 @@ def main():
         data = memref.alloc((2, 6), T.i32())
         sum = memref.alloc((2,), T.i32())
 
-        power_csts = [arith.constant(0)] + [arith.constant(2**i) for i in range(5)]
+        power_csts = [arith.constant(0)] + [arith.constant(2 ** i) for i in range(5)]
         odd_csts = [
             arith.constant(3),
             arith.constant(6),
@@ -710,14 +708,13 @@ def _():
 
 
 def test_amdgpu(ctx: MLIRContext):
-
     set_container_module(ctx.module)
 
     M, K, N, dtype = 32, 32, 32, T.f32()
 
     @gpu_func
     def mat_product_kernel(
-        A: T.memref(M, K, dtype), B: T.memref(K, N, dtype), C: T.memref(M, N, dtype)
+            A: T.memref(M, K, dtype), B: T.memref(K, N, dtype), C: T.memref(M, N, dtype)
     ):
         x = block_dim.x * block_idx.x + thread_idx.x
         y = block_dim.y * block_idx.y + thread_idx.y
@@ -829,7 +826,7 @@ def test_amdgpu_square(ctx: MLIRContext):
 
     @gpu_func
     def mat_product_kernel(
-        A: T.memref(M, K, dtype), B: T.memref(K, N, dtype), C: T.memref(M, N, dtype)
+            A: T.memref(M, K, dtype), B: T.memref(K, N, dtype), C: T.memref(M, N, dtype)
     ):
         x = block_dim.x * block_idx.x + thread_idx.x
         y = block_dim.y * block_idx.y + thread_idx.y
@@ -943,9 +940,9 @@ def test_amdgpu_vector(ctx: MLIRContext):
 
     @gpu_func
     def smol_matmul(
-        A: T.memref(M, K, T.f32()),
-        B: T.memref(K, N, T.f32()),
-        C: T.memref(M, N, T.f32()),
+            A: T.memref(M, K, T.f32()),
+            B: T.memref(K, N, T.f32()),
+            C: T.memref(M, N, T.f32()),
     ):
         cst = arith.constant(np.full([4], 0.0, np.float32), T.vector(4, T.f32()))
         cst_0 = arith.constant(
@@ -1186,9 +1183,9 @@ def test_amdgpu_vector_wmma(ctx: MLIRContext):
     @gpu_func
     @canonicalize(using=scf.canonicalizer)
     def smol_matmul(
-        a: T.memref(M, K, T.f16()),
-        b: T.memref(K, N, T.f16()),
-        c: T.memref(M, N, T.f16()),
+            a: T.memref(M, K, T.f16()),
+            b: T.memref(K, N, T.f16()),
+            c: T.memref(M, N, T.f16()),
     ):
         lIdx = thread_idx.x
         # a and b fragments are stored in 8 VGPRs each, in packed format, so 16 elements each for a and b
 
@@ -1,10 +1,7 @@
-from textwrap import dedent
-
+import mlir.extras.types as T
 import pytest
 
-import mlir.extras.types as T
 from mlir.extras.dialects.ext import linalg, memref, tensor
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.testing import (
     MLIRContext,
 
@@ -1,11 +1,10 @@
 from textwrap import dedent
 
+import mlir.extras.types as T
 import pytest
 
-import mlir.extras.types as T
 from mlir.extras.dialects.ext import llvm
 from mlir.extras.dialects.ext.func import func
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.testing import MLIRContext, filecheck, mlir_ctx as ctx
 from util import llvm_bindings_not_installed
 
@@ -24,7 +24,6 @@
     yield_,
     canonicalizer,
 )
-
 # noinspection PyUnresolvedReferences
 from mlir.extras.testing import (
     mlir_ctx as ctx,
@@ -187,8 +186,8 @@ def test_ellipsis_and_full_slice_plus_coordinate_1(ctx: MLIRContext):
         w = mem[1, :, :, :, :]
     except IndexError as e:
         assert (
-            str(e)
-            == "Too many indices for shaped type with rank: 5 non-None/Ellipsis indices for dim 4."
+                str(e)
+                == "Too many indices for shaped type with rank: 5 non-None/Ellipsis indices for dim 4."
         )
 
 
@@ -205,7 +204,7 @@ def test_ellipsis_and_full_slice_plus_coordinate_2(ctx: MLIRContext):
 
     golden_w_1_strides = (np.array(golden_w_1.strides) // dtype_size_in_bytes).tolist()
     golden_w_1_rank_reduce_strides = (
-        np.array(golden_w_1_rank_reduce.strides) // dtype_size_in_bytes
+            np.array(golden_w_1_rank_reduce.strides) // dtype_size_in_bytes
     ).tolist()
     golden_w_2_strides = (np.array(golden_w_2.strides) // dtype_size_in_bytes).tolist()
     golden_w_3_strides = (np.array(golden_w_3.strides) // dtype_size_in_bytes).tolist()
@@ -214,7 +213,7 @@ def test_ellipsis_and_full_slice_plus_coordinate_2(ctx: MLIRContext):
 
     golden_w_1_offset = get_np_view_offset(golden_w_1) // dtype_size_in_bytes
     golden_w_1_rank_reduce_offset = (
-        get_np_view_offset(golden_w_1_rank_reduce) // dtype_size_in_bytes
+            get_np_view_offset(golden_w_1_rank_reduce) // dtype_size_in_bytes
     )
     golden_w_2_offset = get_np_view_offset(golden_w_2) // dtype_size_in_bytes
     golden_w_3_offset = get_np_view_offset(golden_w_3) // dtype_size_in_bytes
@@ -276,10 +275,10 @@ def test_ellipsis_and_full_slice_plus_coordinate_3(ctx: MLIRContext):
     golden_w_8_strides = (np.array(golden_w_8.strides) // dtype_size_in_bytes).tolist()
     golden_w_9_strides = (np.array(golden_w_9.strides) // dtype_size_in_bytes).tolist()
     golden_w_10_strides = (
-        np.array(golden_w_10.strides) // dtype_size_in_bytes
+            np.array(golden_w_10.strides) // dtype_size_in_bytes
     ).tolist()
     golden_w_11_strides = (
-        np.array(golden_w_11.strides) // dtype_size_in_bytes
+            np.array(golden_w_11.strides) // dtype_size_in_bytes
     ).tolist()
 
     golden_w_1_offset = get_np_view_offset(golden_w_1) // dtype_size_in_bytes