Skip to content

[clang-sycl-linker] Add AOT compilation support for Intel GPUs/CPUs #133194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 13, 2025
28 changes: 27 additions & 1 deletion clang/test/Driver/clang-sycl-linker-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
//
// Test a simple case with a random file (not bitcode) as input.
// RUN: touch %t.o
// RUN: not clang-sycl-linker -triple spirv64 %t.o -o a.spv 2>&1 \
// RUN: not clang-sycl-linker -triple=spirv64 %t.o -o a.spv 2>&1 \
// RUN: | FileCheck %s --check-prefix=FILETYPEERROR
// FILETYPEERROR: Unsupported file type
//
Expand All @@ -31,3 +31,29 @@
// RUN: not clang-sycl-linker --dry-run -triple=spirv64 %t_1.bc %t_2.bc --library-path=%T --device-libs=lib1.bc,lib2.bc,lib3.bc -o a.spv 2>&1 \
// RUN: | FileCheck %s --check-prefix=DEVLIBSERR2
// DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found
//
// Test AOT compilation for an Intel GPU.
// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o a.out 2>&1 \
// RUN: --ocloc-options="-a -b" \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU
// AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// AOT-INTEL-GPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv
// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b {{.*}}-output a_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv
//
// Test AOT compilation for an Intel CPU.
// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=graniterapids %t_1.bc %t_2.bc -o a.out 2>&1 \
// RUN: --opencl-aot-options="-a -b" \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU
// AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// AOT-INTEL-CPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv
// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a_0.out [[SPIRVTRANSLATIONOUT]]_0.spv
//
// Check that the output file must be specified.
// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \
// RUN: | FileCheck %s --check-prefix=NOOUTPUT
// NOOUTPUT: Output file must be specified
//
// Check that the target triple must be.
// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc -o a.out 2>& 1 \
// RUN: | FileCheck %s --check-prefix=NOTARGET
// NOTARGET: Target triple must be specified
160 changes: 150 additions & 10 deletions clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// target-specific device code.
//===---------------------------------------------------------------------===//

#include "clang/Basic/OffloadArch.h"
#include "clang/Basic/Version.h"

#include "llvm/ADT/StringExtras.h"
Expand Down Expand Up @@ -54,6 +55,7 @@
using namespace llvm;
using namespace llvm::opt;
using namespace llvm::object;
using namespace clang;

/// Save intermediary results.
static bool SaveTemps = false;
Expand Down Expand Up @@ -128,6 +130,12 @@ const OptTable &getOptTable() {
exit(EXIT_FAILURE);
}

std::string getMainExecutable(const char *Name) {
void *Ptr = (void *)(intptr_t)&getMainExecutable;
auto COWPath = sys::fs::getMainExecutable(Name, Ptr);
return sys::path::parent_path(COWPath).str();
}

Expected<StringRef> createTempFile(const ArgList &Args, const Twine &Prefix,
StringRef Extension) {
SmallString<128> OutputFile;
Expand All @@ -145,6 +153,40 @@ Expected<StringRef> createTempFile(const ArgList &Args, const Twine &Prefix,
return TempFiles.back();
}

Expected<std::string> findProgram(const ArgList &Args, StringRef Name,
ArrayRef<StringRef> Paths) {
if (Args.hasArg(OPT_dry_run))
return Name.str();
ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
if (!Path)
Path = sys::findProgramByName(Name);
if (!Path)
return createStringError(Path.getError(),
"Unable to find '" + Name + "' in path");
return *Path;
}

void printCommands(ArrayRef<StringRef> CmdArgs) {
if (CmdArgs.empty())
return;

llvm::errs() << " \"" << CmdArgs.front() << "\" ";
llvm::errs() << llvm::join(std::next(CmdArgs.begin()), CmdArgs.end(), " ")
<< "\n";
}

/// Execute the command \p ExecutablePath with the arguments \p Args.
Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
if (Verbose || DryRun)
printCommands(Args);

if (!DryRun)
if (sys::ExecuteAndWait(ExecutablePath, Args))
return createStringError(
"'%s' failed", sys::path::filename(ExecutablePath).str().c_str());
return Error::success();
}

Expected<SmallVector<std::string>> getInput(const ArgList &Args) {
// Collect all input bitcode files to be passed to the device linking stage.
SmallVector<std::string> BitcodeFiles;
Expand Down Expand Up @@ -338,6 +380,92 @@ static Error runSPIRVCodeGen(StringRef File, const ArgList &Args,
return Error::success();
}

/// Run AOT compilation for Intel CPU.
/// Calls opencl-aot tool to generate device code for the Intel OpenCL CPU
/// Runtime.
/// \param InputFile The input SPIR-V file.
/// \param OutputFile The output file name.
/// \param Args Encompasses all arguments required for linking and wrapping
/// device code and will be parsed to generate options required to be passed
/// into the SYCL AOT compilation step.
static Error runAOTCompileIntelCPU(StringRef InputFile, StringRef OutputFile,
const ArgList &Args) {
SmallVector<StringRef, 8> CmdArgs;
Expected<std::string> OpenCLAOTPath =
findProgram(Args, "opencl-aot", {getMainExecutable("opencl-aot")});
if (!OpenCLAOTPath)
return OpenCLAOTPath.takeError();

CmdArgs.push_back(*OpenCLAOTPath);
CmdArgs.push_back("--device=cpu");
StringRef ExtraArgs = Args.getLastArgValue(OPT_opencl_aot_options_EQ);
ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
CmdArgs.push_back("-o");
CmdArgs.push_back(OutputFile);
CmdArgs.push_back(InputFile);
if (Error Err = executeCommands(*OpenCLAOTPath, CmdArgs))
return Err;
return Error::success();
}

/// Run AOT compilation for Intel GPU.
/// Calls ocloc tool to generate device code for the Intel Graphics Compute
/// Runtime.
/// \param InputFile The input SPIR-V file.
/// \param OutputFile The output file name.
/// \param Args Encompasses all arguments required for linking and wrapping
/// device code and will be parsed to generate options required to be passed
/// into the SYCL AOT compilation step.
static Error runAOTCompileIntelGPU(StringRef InputFile, StringRef OutputFile,
const ArgList &Args) {
SmallVector<StringRef, 8> CmdArgs;
Expected<std::string> OclocPath =
findProgram(Args, "ocloc", {getMainExecutable("ocloc")});
if (!OclocPath)
return OclocPath.takeError();

CmdArgs.push_back(*OclocPath);
// The next line prevents ocloc from modifying the image name
CmdArgs.push_back("-output_no_suffix");
CmdArgs.push_back("-spirv_input");

StringRef Arch(Args.getLastArgValue(OPT_arch_EQ));
if (Arch.empty())
return createStringError(inconvertibleErrorCode(),
"Arch must be specified for AOT compilation");
CmdArgs.push_back("-device");
CmdArgs.push_back(Arch);

StringRef ExtraArgs = Args.getLastArgValue(OPT_ocloc_options_EQ);
ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false);

CmdArgs.push_back("-output");
CmdArgs.push_back(OutputFile);
CmdArgs.push_back("-file");
CmdArgs.push_back(InputFile);
if (Error Err = executeCommands(*OclocPath, CmdArgs))
return Err;
return Error::success();
}

/// Run AOT compilation for Intel CPU/GPU.
/// \param InputFile The input SPIR-V file.
/// \param OutputFile The output file name.
/// \param Args Encompasses all arguments required for linking and wrapping
/// device code and will be parsed to generate options required to be passed
/// into the SYCL AOT compilation step.
static Error runAOTCompile(StringRef InputFile, StringRef OutputFile,
const ArgList &Args) {
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
OffloadArch OffloadArch = StringToOffloadArch(Arch);
if (IsIntelGPUOffloadArch(OffloadArch))
return runAOTCompileIntelGPU(InputFile, OutputFile, Args);
if (IsIntelCPUOffloadArch(OffloadArch))
return runAOTCompileIntelCPU(InputFile, OutputFile, Args);

return createStringError(inconvertibleErrorCode(), "Unsupported arch");
}

/// Performs the following steps:
/// 1. Link input device code (user code and SYCL device library code).
/// 2. Run SPIR-V code generation.
Expand All @@ -349,7 +477,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
// Link all input bitcode files and SYCL device library files, if any.
auto LinkedFile = linkDeviceCode(Files, Args, C);
if (!LinkedFile)
reportError(LinkedFile.takeError());
return LinkedFile.takeError();

// TODO: SYCL post link functionality involves device code splitting and will
// result in multiple bitcode codes.
Expand All @@ -358,15 +486,24 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
SmallVector<std::string> SplitModules;
SplitModules.emplace_back(*LinkedFile);

bool IsAOTCompileNeeded = IsIntelOffloadArch(
StringToOffloadArch(Args.getLastArgValue(OPT_arch_EQ)));

// SPIR-V code generation step.
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
auto Stem = OutputFile.rsplit('.').first;
std::string SPVFile(Stem);
SPVFile.append("_" + utostr(I) + ".spv");
auto Err = runSPIRVCodeGen(SplitModules[I], Args, SPVFile, C);
if (Err)
StringRef Stem = OutputFile.rsplit('.').first;
std::string SPVFile = (Stem + "_" + Twine(I) + ".spv").str();
if (Error Err = runSPIRVCodeGen(SplitModules[I], Args, SPVFile, C))
return Err;
SplitModules[I] = SPVFile;
if (!IsAOTCompileNeeded) {
SplitModules[I] = SPVFile;
} else {
// AOT compilation step.
std::string AOTFile = (Stem + "_" + Twine(I) + ".out").str();
if (Error Err = runAOTCompile(SPVFile, AOTFile, Args))
return Err;
SplitModules[I] = AOTFile;
}
}

// Write the final output into file.
Expand Down Expand Up @@ -440,9 +577,12 @@ int main(int argc, char **argv) {
DryRun = Args.hasArg(OPT_dry_run);
SaveTemps = Args.hasArg(OPT_save_temps);

OutputFile = "a.out";
if (Args.hasArg(OPT_o))
OutputFile = Args.getLastArgValue(OPT_o);
if (!Args.hasArg(OPT_o))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need these additional checks here?

Thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for the AOT compilation functionality these changes are not strictly required, so I could remove them if we want to. For the OPT_o check, I believe there was an earlier review comment noting that the default a.out name conflicts with clang's default a.out name, so I made OPT_o mandatory. For OPT_triple_eq, we use it throughout clang-sycl-linker assuming it is nonempty.

reportError(createStringError("Output file must be specified"));
OutputFile = Args.getLastArgValue(OPT_o);

if (!Args.hasArg(OPT_triple_EQ))
reportError(createStringError("Target triple must be specified"));

if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) {
Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ);
Expand Down
8 changes: 8 additions & 0 deletions clang/tools/clang-sycl-linker/SYCLLinkOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,11 @@ def spirv_dump_device_code_EQ : Joined<["--", "-"], "spirv-dump-device-code=">,
def print_linked_module : Flag<["--"], "print-linked-module">,
Flags<[LinkerOnlyOption]>,
HelpText<"Print the linked module's IR for testing">;

def ocloc_options_EQ : Joined<["--", "-"], "ocloc-options=">,
Flags<[LinkerOnlyOption]>,
HelpText<"Options passed to ocloc for Intel GPU AOT compilation">;

def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">,
Flags<[LinkerOnlyOption]>,
HelpText<"Options passed to opencl-aot for Intel CPU AOT compilation">;