diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a143738c..80fedf99 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,6 +59,25 @@ jobs: python-version: ${{ matrix.py_version }} allow-prereleases: true + - name: Free disk space + if: contains(matrix.os, 'ubuntu') + uses: descriptinc/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false # This frees space on the wrong partition. + + - uses: Jimver/cuda-toolkit@v0.2.15 + if: contains(matrix.os, 'ubuntu') + id: cuda-toolkit + with: + cuda: '12.4.1' + linux-local-args: '["--toolkit"]' + log-file-suffix: "${{ matrix.os }}-${{ matrix.py_version }}.txt" + - name: Install and configure shell: bash run: | diff --git a/mlir/extras/dialects/ext/gpu.py b/mlir/extras/dialects/ext/gpu.py index b2e4ec2c..8d7af826 100644 --- a/mlir/extras/dialects/ext/gpu.py +++ b/mlir/extras/dialects/ext/gpu.py @@ -166,9 +166,7 @@ def __init__( if isinstance(t, str): targets[i] = Attribute.parse(t) _ods_context = get_default_loc_context(loc) - super().__init__(targets=ArrayAttr.get(targets), loc=loc, ip=ip) - self.regions[0].blocks.append() - self.operation.attributes["sym_name"] = ( + sym_name = ( sym_name if ( issubclass(type(sym_name), Attribute) @@ -176,6 +174,10 @@ def __init__( ) else AttrBuilder.get("SymbolNameAttr")(sym_name, context=_ods_context) ) + super().__init__( + sym_name=sym_name, targets=ArrayAttr.get(targets), loc=loc, ip=ip + ) + self.regions[0].blocks.append() @property def body(self): diff --git a/tests/test_nvgpu_nvvm.py b/tests/test_nvgpu_nvvm.py index 91badbc5..b4d34c3d 100644 --- a/tests/test_nvgpu_nvvm.py +++ b/tests/test_nvgpu_nvvm.py @@ -1,4 +1,5 @@ import re +import subprocess from pathlib import Path from textwrap import dedent @@ -8,12 +9,12 @@ from mlir.dialects.memref import cast from mlir.dialects.nvgpu import ( TensorMapDescriptorType, - TensorMapSwizzleKind, + TensorMapInterleaveKind, TensorMapL2PromoKind, TensorMapOOBKind, - TensorMapInterleaveKind, + TensorMapSwizzleKind, + tma_create_descriptor, ) -from mlir.dialects.nvgpu import tma_create_descriptor from mlir.dialects.transform import any_op_t from mlir.dialects.transform.extras import named_sequence from mlir.dialects.transform.structured import MatchInterfaceEnum @@ -21,15 +22,15 @@ from mlir import _mlir_libs from mlir.extras.ast.canonicalize import canonicalize -from mlir.extras.dialects.ext import arith, memref, scf, gpu, linalg, transform, nvgpu +from mlir.extras.dialects.ext import arith, gpu, linalg, memref, nvgpu, scf, transform from mlir.extras.dialects.ext.func import func from mlir.extras.dialects.ext.gpu import smem_space from mlir.extras.dialects.ext.llvm import llvm_ptr_t -from mlir.extras.runtime.passes import run_pipeline, Pipeline +from mlir.extras.runtime.passes import Pipeline, run_pipeline from mlir.extras.runtime.refbackend import LLVMJITBackend # noinspection PyUnresolvedReferences -from mlir.extras.testing import mlir_ctx as ctx, filecheck, MLIRContext +from mlir.extras.testing import MLIRContext, filecheck, mlir_ctx as ctx from mlir.extras.util import find_ops # needed since the fix isn't defined here nor conftest.py @@ -200,7 +201,8 @@ def payload(): compute_linspace_val.emit() @func - def printMemrefF32(x: T.memref(T.f32())): ... + def printMemrefF32(x: T.memref(T.f32())): + ... printMemrefF32_.append(printMemrefF32) @@ -421,8 +423,15 @@ def main(module: any_op_t()): CUDA_RUNTIME_LIB_PATH = Path(_mlir_libs.__file__).parent / f"libmlir_cuda_runtime.so" +NVIDIA_GPU = False +try: + subprocess.check_output("nvidia-smi") + NVIDIA_GPU = True +except Exception: + print("No Nvidia GPU in system!") + # based on https://github.com/llvm/llvm-project/blob/9cc2122bf5a81f7063c2a32b2cb78c8d615578a1/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir#L6 -@pytest.mark.skipif(not CUDA_RUNTIME_LIB_PATH.exists(), reason="no cuda library") +@pytest.mark.skipif(not NVIDIA_GPU, reason="no cuda library") def test_transform_mma_sync_matmul_f16_f16_accum_run(ctx: MLIRContext, capfd): range_ = scf.range_ @@ -549,7 +558,8 @@ def payload(): compute_linspace_val.emit() @func - def printMemrefF32(x: T.memref(T.f32())): ... + def printMemrefF32(x: T.memref(T.f32())): + ... printMemrefF32_.append(printMemrefF32)