diff --git a/cmake/functions.cmake b/cmake/functions.cmake index cbd173e75..1ba8accc9 100644 --- a/cmake/functions.cmake +++ b/cmake/functions.cmake @@ -117,4 +117,36 @@ function(gc_add_mlir_dialect_library name) if(GcInterface IN_LIST ARGN) target_link_libraries(obj.${name} PUBLIC GcInterface) endif() -endfunction() \ No newline at end of file +endfunction() + +macro(gc_add_mlir_tool name) + # the dependency list copied from mlir/tools/mlir-cpu-runner/CMakeLists.txt of upstream + if(NOT DEFINED LLVM_LINK_COMPONENTS) + set(LLVM_LINK_COMPONENTS + Core + Support + nativecodegen + native + ) + endif() + if(NOT DEFINED MLIR_LINK_COMPONENTS) + gc_set_mlir_link_components(MLIR_LINK_COMPONENTS + MLIRAnalysis + MLIRBuiltinToLLVMIRTranslation + MLIRExecutionEngine + MLIRIR + MLIRJitRunner + MLIRLLVMDialect + MLIRLLVMToLLVMIRTranslation + MLIRToLLVMIRTranslationRegistration + MLIRParser + MLIRTargetLLVMIRExport + MLIRSupport + ) + endif() + add_mlir_tool(${ARGV}) + #LLVM_LINK_COMPONENTS is processed by LLVM cmake in add_llvm_executable + target_link_libraries(${name} PRIVATE GcInterface ${MLIR_LINK_COMPONENTS}) + llvm_update_compile_flags(${name}) + set_property(GLOBAL APPEND PROPERTY GC_TOOLS ${name}) +endmacro() \ No newline at end of file diff --git a/include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h b/include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h index c54f0d94e..282c68503 100644 --- a/include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h +++ b/include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h @@ -240,6 +240,7 @@ struct OclModule { struct OclModuleBuilderOpts { StringRef funcName = {}; + bool printIr = false; bool enableObjectDump = false; ArrayRef sharedLibPaths = {}; void (*pipeline)(OpPassManager &) = nullptr; @@ -267,6 +268,7 @@ struct OclModuleBuilder { private: ModuleOp mlirModule; + const bool printIr; const bool enableObjectDump; const ArrayRef sharedLibPaths; void (*const pipeline)(OpPassManager &); diff --git a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp index 6a33c9d05..1f12bc2f2 100644 --- a/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp +++ b/lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp @@ -749,7 +749,8 @@ ArrayRef getArgTypes(const StringRef &funcName, ModuleOp &mod) { OclModuleBuilder::OclModuleBuilder(ModuleOp module, const OclModuleBuilderOpts &opts) - : mlirModule(module), enableObjectDump(opts.enableObjectDump), + : mlirModule(module), printIr(opts.printIr), + enableObjectDump(opts.enableObjectDump), sharedLibPaths(opts.sharedLibPaths), pipeline(opts.pipeline ? opts.pipeline @@ -799,6 +800,10 @@ OclModuleBuilder::build(const OclRuntime::Ext &ext) { auto staticMain = createStaticMain(mod, funcName, argTypes); + if (printIr) { + mod.dump(); + } + ExecutionEngineOptions opts; opts.jitCodeGenOptLevel = llvm::CodeGenOptLevel::Aggressive; opts.enableObjectDump = enableObjectDump; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a1af4a91a..873924bd4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,21 @@ +################################################################################ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 +################################################################################ + add_subdirectory(dnnl) add_subdirectory(gc-cpu-runner) +add_subdirectory(gc-gpu-runner) add_subdirectory(gc-opt) diff --git a/src/gc-cpu-runner/CMakeLists.txt b/src/gc-cpu-runner/CMakeLists.txt index eaab1242d..a0037d6b7 100644 --- a/src/gc-cpu-runner/CMakeLists.txt +++ b/src/gc-cpu-runner/CMakeLists.txt @@ -29,38 +29,8 @@ if(GC_DEV_LINK_LLVM_DYLIB) MLIRExecutionEngineShared MLIRJitRunner ) -else() - # the dependency list copied from mlir/tools/mlir-cpu-runner/CMakeLists.txt of upstream - set(LLVM_LINK_COMPONENTS - Core - Support - nativecodegen - native - ) - set(MLIR_LINK_COMPONENTS - MLIRAnalysis - MLIRBuiltinToLLVMIRTranslation - MLIRExecutionEngine - MLIRIR - MLIRJitRunner - MLIRLLVMDialect - MLIRLLVMToLLVMIRTranslation - MLIRToLLVMIRTranslationRegistration - MLIRParser - MLIRTargetLLVMIRExport - MLIRSupport - ) endif() -#LLVM_LINK_COMPONENTS is processed by LLVM cmake in add_llvm_executable -set(gc_cpu_runner_libs - ${MLIR_LINK_COMPONENTS} - GcCpuRuntime) -add_mlir_tool(gc-cpu-runner - gc-cpu-runner.cpp - -) -llvm_update_compile_flags(gc-cpu-runner) - -target_link_libraries(gc-cpu-runner PRIVATE GcInterface ${gc_cpu_runner_libs}) +gc_add_mlir_tool(gc-cpu-runner gc-cpu-runner.cpp) +target_link_libraries(gc-cpu-runner PRIVATE GcCpuRuntime) mlir_check_all_link_libraries(gc-cpu-runner) diff --git a/src/gc-gpu-runner/CMakeLists.txt b/src/gc-gpu-runner/CMakeLists.txt new file mode 100644 index 000000000..47a685353 --- /dev/null +++ b/src/gc-gpu-runner/CMakeLists.txt @@ -0,0 +1,28 @@ +################################################################################ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 +################################################################################ + +if(NOT GC_ENABLE_TOOLS OR NOT GC_ENABLE_IMEX) + message(STATUS "Gpu runner is not enabled.") + return() +endif() + +gc_add_mlir_tool(gc-gpu-runner GpuRunner.cpp) +target_link_libraries(gc-gpu-runner PRIVATE + GcJitWrapper + GcGpuOclRuntime +) +mlir_check_all_link_libraries(gc-gpu-runner) diff --git a/src/gc-gpu-runner/GpuRunner.cpp b/src/gc-gpu-runner/GpuRunner.cpp new file mode 100644 index 000000000..6cae0dd51 --- /dev/null +++ b/src/gc-gpu-runner/GpuRunner.cpp @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "gc/ExecutionEngine/Driver/Driver.h" +#include "gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h" +#include "gc/Transforms/Passes.h" +#include "gc/Utils/Error.h" + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/ExecutionEngine/JitRunner.h" +#include "mlir/ExecutionEngine/OptUtils.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Tools/ParseUtilities.h" +#include "mlir/Transforms/Passes.h" + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" + +using namespace mlir; + +namespace { +struct Options { + llvm::cl::OptionCategory runnerCategory{"GPU runner options"}; + llvm::cl::opt inputFilename{ + llvm::cl::Positional, llvm::cl::desc(""), llvm::cl::init("-"), + llvm::cl::cat(runnerCategory)}; + llvm::cl::opt mainFuncName{ + "e", + llvm::cl::desc("The function to be executed. If not specified, the " + "first matching function in the module to be used."), + llvm::cl::value_desc("function name"), llvm::cl::cat(runnerCategory)}; + llvm::cl::opt skipPipeline{ + "skip-pipeline", + llvm::cl::desc("Skip the GPU pipeline. It's expected, that the input is " + "already lowered with 'gc-op --gc-gpu-pipeline'."), + llvm::cl::init(false), llvm::cl::cat(runnerCategory)}; + llvm::cl::list sharedLibs{ + "shared-libs", + llvm::cl::desc("Comma separated library paths to link dynamically."), + llvm::cl::MiscFlags::CommaSeparated, llvm::cl::desc(""), + llvm::cl::cat(runnerCategory)}; + llvm::cl::opt printIr{ + "print-ir", + llvm::cl::desc("Print the resulting IR before the execution."), + llvm::cl::init(false), llvm::cl::cat(runnerCategory)}; + llvm::cl::opt objDumpFile{ + "obj-dump-file", + llvm::cl::desc("Dump the compiled object to the specified file."), + llvm::cl::value_desc("file path"), llvm::cl::cat(runnerCategory)}; +}; +} // namespace + +void findFunc(Options &opts, ModuleOp mod) { + bool (*matcher)(ArrayRef, ModuleOp &); + + if (opts.skipPipeline) { + matcher = [](ArrayRef args, ModuleOp &mod) { + if (args.size() != 3) + return false; + auto ctx = mod.getContext(); + auto ptrType = LLVM::LLVMPointerType::get(ctx); + return args[0] == ptrType && args[1] == ptrType && + args[2] == IntegerType::get(ctx, 64); + }; + } else { + matcher = [](ArrayRef args, ModuleOp &) { return args.empty(); }; + } + + if (opts.mainFuncName.empty()) { + auto setFuncName = [&](auto funcOp) { + if (funcOp && !funcOp.isExternal() && funcOp.isPublic() && + matcher(funcOp.getArgumentTypes(), mod)) { + opts.mainFuncName = funcOp.getName().str(); + return true; + } + return false; + }; + + for (auto &op : mod.getBody()->getOperations()) { + if (setFuncName(dyn_cast(op)) || + setFuncName(dyn_cast(op))) { + return; + } + } + gcReportErr("No matching function found."); + } + + ArrayRef args; + if (auto llvmFunc = mod.lookupSymbol(opts.mainFuncName)) { + args = llvmFunc.getArgumentTypes(); + } else if (auto func = mod.lookupSymbol(opts.mainFuncName)) { + args = func.getArgumentTypes(); + } else { + gcReportErr("The function '", opts.mainFuncName.c_str(), "' not found."); + } + + if (!matcher(args, mod)) { + if (opts.skipPipeline) { + gcReportErr("The function '", opts.mainFuncName.c_str(), + "' signature does not match (!llvm.ptr, !llvm.ptr, i64)."); + } + gcReportErr("The function '", opts.mainFuncName.c_str(), + "' must have no arguments."); + } +} + +int main(int argc, char **argv) { + Options opts; + llvm::cl::ParseCommandLineOptions(argc, argv, "GraphCompiler GPU runner\n"); + + std::string errMsg; + auto file = openInputFile(opts.inputFilename, &errMsg); + if (!file) { + gcReportErr("Failed to read input IR: ", errMsg.c_str()); + } + + auto srcMgr = std::make_shared(); + srcMgr->AddNewSourceBuffer(std::move(file), SMLoc()); + MLIRContext mlirCtx{gc::initCompilerAndGetDialects()}; + auto mlirMod = parseSourceFile(srcMgr, {&mlirCtx}); + findFunc(opts, *mlirMod); + + gc::gpu::OclModuleBuilderOpts builderOpts; + SmallVector sharedLibs(opts.sharedLibs.begin(), + opts.sharedLibs.end()); + builderOpts.funcName = opts.mainFuncName; + builderOpts.printIr = opts.printIr; + builderOpts.enableObjectDump = !opts.objDumpFile.getValue().empty(); + builderOpts.sharedLibPaths = sharedLibs; + builderOpts.pipeline = + opts.skipPipeline ? [](OpPassManager &) {} : [](OpPassManager &pm) { + gc::GPUPipelineOptions pipelineOpts; + pipelineOpts.isUsmArgs = false; + pipelineOpts.callFinish = true; + populateGPUPipeline(pm, pipelineOpts); + }; + + gc::gpu::OclModuleBuilder builder{mlirMod, builderOpts}; + auto runtime = gcGetOrReport(gc::gpu::OclRuntime::get()); + auto oclMod = gcGetOrReport(builder.build(runtime)); + assert(oclMod->isStatic); + + if (!opts.objDumpFile.getValue().empty()) { + gcLogD("Dumping the compiled object to ", opts.objDumpFile.getValue()); + oclMod->dumpToObjectFile(opts.objDumpFile.getValue()); + } + + auto queue = gcGetOrReport(runtime.createQueue()); + gc::gpu::OclContext ctx{runtime, queue}; + gc::gpu::StaticExecutor<0> exec{oclMod}; + gcLogD("Executing function ", opts.mainFuncName.c_str(), "()"); + exec(ctx); + gcGetOrReport(ctx.finish()); + gcGetOrReport(runtime.releaseQueue(queue)); + return 0; +} diff --git a/test/mlir/test/CMakeLists.txt b/test/mlir/test/CMakeLists.txt index d631a194f..0e2764c79 100644 --- a/test/mlir/test/CMakeLists.txt +++ b/test/mlir/test/CMakeLists.txt @@ -32,7 +32,7 @@ set(GC_OPT_TEST_DEPENDS if(GC_ENABLE_IMEX) include(imex) - list(APPEND GC_OPT_TEST_DEPENDS GcOpenclRuntime) + list(APPEND GC_OPT_TEST_DEPENDS gc-gpu-runner) endif() if(GC_ENABLE_BINDINGS_PYTHON) diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir index 27d1cbb63..1302d6809 100644 --- a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir +++ b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir @@ -1,6 +1,5 @@ -// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \ -// RUN: | gc-cpu-runner -e main --entry-point-result=void \ -// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s + module{ func.func @linalg_matmul(%arg0: tensor<128x256xf16>, diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir index 7047a728b..0a06bda8e 100644 --- a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir +++ b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir @@ -1,6 +1,5 @@ -// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \ -// RUN: | gc-cpu-runner -e main --entry-point-result=void \ -// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s + module{ func.func @linalg_matmul(%arg0: tensor<128x256xf16>, diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir index 3978c457d..8b5fbbdea 100644 --- a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir +++ b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir @@ -1,6 +1,5 @@ -// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \ -// RUN: | gc-cpu-runner -e main --entry-point-result=void \ -// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s + module{ func.func @linalg_matmul(%arg0: tensor<64x64xf16>, diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir index c08fc99a0..cb3f59728 100644 --- a/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir +++ b/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir @@ -1,6 +1,4 @@ -// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \ -// RUN: | gc-cpu-runner -e main --entry-point-result=void \ -// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s module { func.func @linalg_mlp(%arg0: tensor<32x4096xf16>, %arg1: tensor<4096x4096xf16>, %arg2 : tensor<32x4096xf16>, diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir index f1b662981..ff88f71ec 100644 --- a/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir +++ b/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir @@ -1,6 +1,4 @@ -// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \ -// RUN: | gc-cpu-runner -e main --entry-point-result=void \ -// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s module { func.func @linalg_mlp(%arg0: tensor<32x4096xf16>, %arg1: tensor<4096x4096xf16>, %arg2 : tensor<32x4096xf16>, diff --git a/test/mlir/test/gc/gpu-runner/mlp.mlir b/test/mlir/test/gc/gpu-runner/mlp.mlir index c6cf901a7..6f914beb8 100644 --- a/test/mlir/test/gc/gpu-runner/mlp.mlir +++ b/test/mlir/test/gc/gpu-runner/mlp.mlir @@ -1,4 +1,5 @@ -// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s +// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s + #map0 = affine_map<(d0, d1) -> (d1)> #map1 = affine_map<(d0, d1) -> (d0, d1)> #map2 = affine_map<(d0, d1, d2) -> (d0, d2)> diff --git a/test/mlir/test/lit.cfg.py b/test/mlir/test/lit.cfg.py index 09b0451e5..08fc42a33 100644 --- a/test/mlir/test/lit.cfg.py +++ b/test/mlir/test/lit.cfg.py @@ -49,7 +49,7 @@ llvm_config.with_environment("PATH", config.llvm_tools_dir, append_path=True) tool_dirs = [config.gc_tools_dir, config.llvm_tools_dir] -tools = ["gc-opt", "gc-cpu-runner"] +tools = ["gc-opt", "gc-cpu-runner", "gc-gpu-runner"] llvm_config.add_tool_substitutions(tools, tool_dirs)