diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index da2a153d819f9..ad52e3ff931eb 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -3,7 +3,9 @@ use std::ops::Deref; use std::{iter, ptr}; pub(crate) mod autodiff; +pub(crate) mod gpu_device; pub(crate) mod gpu_offload; +pub(crate) mod gpu_wrapper; use libc::{c_char, c_uint, size_t}; use rustc_abi as abi; diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_device.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_device.rs new file mode 100644 index 0000000000000..63416743ca322 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_device.rs @@ -0,0 +1,113 @@ +use std::ffi::{CString, c_uint}; + +use llvm::Linkage::*; +use rustc_codegen_ssa::back::write::CodegenContext; + +use crate::llvm::{self, Linkage}; +use crate::{LlvmCodegenBackend, SimpleCx}; + +fn add_unnamed_global_in_addrspace<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, + addrspace: u32, +) -> &'ll llvm::Value { + let llglobal = add_global_in_addrspace(cx, name, initializer, l, addrspace); + unsafe { llvm::LLVMSetUnnamedAddress(llglobal, llvm::UnnamedAddr::Global) }; + llglobal +} + +pub(crate) fn add_global_in_addrspace<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, + addrspace: u32, +) -> &'ll llvm::Value { + let c_name = CString::new(name).unwrap(); + let llglobal: &'ll llvm::Value = llvm::add_global_in_addrspace( + cx.llmod, + cx.val_ty(initializer), + &c_name, + addrspace as c_uint, + ); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, l); + llvm::set_initializer(llglobal, initializer); + llglobal +} + +#[allow(unused)] +pub(crate) fn gen_asdf<'ll>(cgcx: &CodegenContext, _old_cx: &SimpleCx<'ll>) { + let llcx = unsafe { llvm::LLVMRustContextCreate(false) }; + let module_name = CString::new("offload.wrapper.module").unwrap(); + let llmod = unsafe { llvm::LLVMModuleCreateWithNameInContext(module_name.as_ptr(), llcx) }; + let cx = SimpleCx::new(llmod, llcx, cgcx.pointer_size); + let initializer = cx.get_const_i32(0); + add_unnamed_global_in_addrspace(&cx, "__omp_rtl_debug_kind", initializer, WeakODRLinkage, 1); + add_unnamed_global_in_addrspace( + &cx, + "__omp_rtl_assume_teams_oversubscription", + initializer, + WeakODRLinkage, + 1, + ); + add_unnamed_global_in_addrspace( + &cx, + "__omp_rtl_assume_threads_oversubscription", + initializer, + WeakODRLinkage, + 1, + ); + add_unnamed_global_in_addrspace( + &cx, + "__omp_rtl_assume_no_thread_state", + initializer, + WeakODRLinkage, + 1, + ); + add_unnamed_global_in_addrspace( + &cx, + "__oclc_ABI_version", + cx.get_const_i32(500), + WeakODRLinkage, + 4, + ); + unsafe { + llvm::LLVMPrintModuleToFile( + llmod, + CString::new("rustmagic-openmp-amdgcn-amd-amdhsa-gfx90a.ll").unwrap().as_ptr(), + std::ptr::null_mut(), + ); + + // Clean up + llvm::LLVMDisposeModule(llmod); + llvm::LLVMContextDispose(llcx); + } + // TODO: addressspace 1 or 4 +} +// source_filename = "mem.cpp" +// GPU: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CPU: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +// target triple = "amdgcn-amd-amdhsa" +// +// @__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_teams_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_threads_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// +// !llvm.module.flags = !{!0, !1, !2, !3, !4} +// !opencl.ocl.version = !{!5} +// !llvm.ident = !{!6, !7} +// +// !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} +// !1 = !{i32 1, !"wchar_size", i32 4} +// !2 = !{i32 7, !"openmp", i32 51} +// !3 = !{i32 7, !"openmp-device", i32 51} +// !4 = !{i32 8, !"PIC Level", i32 2} +// !5 = !{i32 2, i32 0} +// !6 = !{!"clang version 20.1.5-rust-1.89.0-nightly (https://github.com/rust-lang/llvm-project.git c1118fdbb3024157df7f4cfe765f2b0b4339e8a2)"} +// !7 = !{!"AMD clang version 19.0.0git (https://github.com/RadeonOpenCompute/llvm-project roc-6.4.0 25133 c7fe45cf4b819c5991fe208aaa96edf142730f1d)"} diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs index 1280ab1442a09..da7bde86463bc 100644 --- a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs @@ -12,7 +12,7 @@ use crate::llvm::{self, Linkage, Type, Value}; use crate::{LlvmCodegenBackend, SimpleCx, attributes}; pub(crate) fn handle_gpu_code<'ll>( - _cgcx: &CodegenContext, + cgcx: &CodegenContext, cx: &'ll SimpleCx<'_>, ) { // The offload memory transfer type for each kernel @@ -26,8 +26,8 @@ pub(crate) fn handle_gpu_code<'ll>( kernels.push(kernel); } } - gen_call_handling(&cx, &kernels, &o_types); + crate::builder::gpu_wrapper::gen_image_wrapper_module(&cgcx); } // What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper: diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_wrapper.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_wrapper.rs new file mode 100644 index 0000000000000..037208d656a81 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_wrapper.rs @@ -0,0 +1,119 @@ +use std::ffi::CString; + +use llvm::Linkage::*; +use rustc_abi::Align; +use rustc_codegen_ssa::back::write::CodegenContext; +use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; + +use crate::builder::gpu_offload::*; +use crate::llvm::{self, Visibility}; +use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx}; + +pub(crate) fn create_struct_ty<'ll>( + cx: &'ll SimpleCx<'_>, + name: &str, + tys: &[&'ll llvm::Type], +) -> &'ll llvm::Type { + let entry_struct_name = CString::new(name).unwrap(); + unsafe { + let entry_struct = llvm::LLVMStructCreateNamed(cx.llcx, entry_struct_name.as_ptr()); + llvm::LLVMStructSetBody(entry_struct, tys.as_ptr(), tys.len() as u32, 0); + entry_struct + } +} + +// We don't copy types from other functions because we generate a new module and context. +// Bringing in types from other contexts would likely cause issues. +pub(crate) fn gen_image_wrapper_module(cgcx: &CodegenContext) { + let dl_cstr = CString::new("e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9").unwrap(); + let target_cstr = CString::new("amdgcn-amd-amdhsa").unwrap(); + let name = "offload.wrapper.module"; + let m: crate::ModuleLlvm = + ModuleLlvm::new_simple(name, dl_cstr.into_raw(), target_cstr.into_raw(), &cgcx).unwrap(); + let cx = SimpleCx::new(m.llmod(), m.llcx, cgcx.pointer_size); + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let ti16 = cx.type_i16(); + + let entry_fields = [ti64, ti16, ti16, ti32, tptr, tptr, ti64, ti64, tptr]; + create_struct_ty(&cx, "__tgt_offload_entry", &entry_fields); + create_struct_ty(&cx, "__tgt_device_image", &[tptr, tptr, tptr, tptr]); + create_struct_ty(&cx, "__tgt_bin_desc", &[ti32, tptr, tptr, tptr]); + + let offload_entry_ty = add_tgt_offload_entry(&cx); + let offload_entry_arr = cx.type_array(offload_entry_ty, 0); + + let c_name = CString::new("__start_omp_offloading_entries").unwrap(); + let llglobal = llvm::add_global(cx.llmod, offload_entry_arr, &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, ExternalLinkage); + llvm::set_visibility(llglobal, Visibility::Hidden); + let c_name = CString::new("__stop_omp_offloading_entries").unwrap(); + let llglobal = llvm::add_global(cx.llmod, offload_entry_arr, &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, ExternalLinkage); + llvm::set_visibility(llglobal, Visibility::Hidden); + + let c_name = CString::new("__dummy.omp_offloading_entries").unwrap(); + let llglobal = llvm::add_global(cx.llmod, offload_entry_arr, &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, InternalLinkage); + let c_section_name = CString::new("omp_offloading_entries").unwrap(); + llvm::set_section(llglobal, &c_section_name); + let zeroinit = cx.const_null(offload_entry_arr); + llvm::set_initializer(llglobal, zeroinit); + + CString::new("llvm.compiler.used").unwrap(); + let arr_val = cx.const_array(tptr, &[llglobal]); + let c_section_name = CString::new("llvm.metadata").unwrap(); + let llglobal = add_global(&cx, "llvm.compiler.used", arr_val, AppendingLinkage); + llvm::set_section(llglobal, &c_section_name); + llvm::set_global_constant(llglobal, false); + + //@llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.omp_offloading_entries], section "llvm.metadata" + + let mapper_fn_ty = cx.type_func(&[tptr], cx.type_void()); + crate::declare::declare_simple_fn( + &cx, + &"__tgt_unregister_lib", + llvm::CallConv::CCallConv, + llvm::UnnamedAddr::No, + llvm::Visibility::Default, + mapper_fn_ty, + ); + crate::declare::declare_simple_fn( + &cx, + &"__tgt_register_lib", + llvm::CallConv::CCallConv, + llvm::UnnamedAddr::No, + llvm::Visibility::Default, + mapper_fn_ty, + ); + crate::declare::declare_simple_fn( + &cx, + &"atexit", + llvm::CallConv::CCallConv, + llvm::UnnamedAddr::No, + llvm::Visibility::Default, + cx.type_func(&[tptr], ti32), + ); + + let unknown_txt = "11111111111111"; + let c_entry_name = CString::new(unknown_txt).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let llglobal = + add_unnamed_global(&cx, &".omp_offloading.device_image", initializer, InternalLinkage); + let c_section_name = CString::new(".llvm.offloading").unwrap(); + llvm::set_section(llglobal, &c_section_name); + llvm::set_alignment(llglobal, Align::EIGHT); + + unsafe { + llvm::LLVMPrintModuleToFile( + cx.llmod, + CString::new("rustmagic.openmp.image.wrapper.ll").unwrap().as_ptr(), + std::ptr::null_mut(), + ); + } +} diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index ee77774c68832..2563d2e18e99d 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -159,6 +159,23 @@ fn to_llvm_tls_model(tls_model: TlsModel) -> llvm::ThreadLocalMode { } } +// FIXME(offload): This method is not relying on a tcx. We might still want to try to share some of +// the logic with create_module, e.g. the target_data_layout handling. +pub(crate) unsafe fn create_simple_module<'ll>( + llcx: &'ll llvm::Context, + target_data_layout: *const i8, + target_triple: *const i8, + mod_name: &str, +) -> &'ll llvm::Module { + let mod_name = SmallCStr::new(mod_name); + let llmod = unsafe { llvm::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), llcx) }; + unsafe { + llvm::LLVMSetDataLayout(llmod, target_data_layout); + llvm::LLVMSetTarget(llmod, target_triple); + } + llmod +} + pub(crate) unsafe fn create_module<'ll>( tcx: TyCtxt<'_>, llcx: &'ll llvm::Context, diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index ca84b6de8b11a..f25f7a3106176 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -392,6 +392,21 @@ unsafe impl Send for ModuleLlvm {} unsafe impl Sync for ModuleLlvm {} impl ModuleLlvm { + fn new_simple( + name: &str, + dl_cstr: *const i8, + target_cstr: *const i8, + cgcx: &CodegenContext, + ) -> Result { + unsafe { + let llcx = llvm::LLVMRustContextCreate(false); + let llmod_raw = context::create_simple_module(llcx, dl_cstr, target_cstr, name); + let dcx = cgcx.create_dcx(); + let tm = ModuleLlvm::tm_from_cgcx(cgcx, name, dcx.handle())?; + Ok(ModuleLlvm { llmod_raw, llcx, tm: ManuallyDrop::new(tm) }) + } + } + fn new(tcx: TyCtxt<'_>, mod_name: &str) -> Self { unsafe { let llcx = llvm::LLVMRustContextCreate(tcx.sess.fewer_names()); diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 2443194ff4832..6d35a911572f5 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1015,16 +1015,25 @@ unsafe extern "C" { ) -> MetadataKindId; // Create modules. + pub(crate) fn LLVMCloneModule(M: &Module) -> &Module; pub(crate) fn LLVMModuleCreateWithNameInContext( ModuleID: *const c_char, C: &Context, ) -> &Module; - pub(crate) safe fn LLVMCloneModule(M: &Module) -> &Module; + pub(crate) fn LLVMPrintModuleToFile( + M: &Module, + Name: *const c_char, + Error_message: *mut c_char, + ); + pub(crate) fn LLVMCloneModule(M: &Module) -> &Module; + pub(crate) fn LLVMDisposeModule(M: &Module); /// Data layout. See Module::getDataLayout. pub(crate) fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char; pub(crate) fn LLVMSetDataLayout(M: &Module, Triple: *const c_char); + pub(crate) fn LLVMSetTarget(M: &Module, Name: *const c_char); + /// Append inline assembly to a module. See `Module::appendModuleInlineAsm`. pub(crate) fn LLVMAppendModuleInlineAsm( M: &Module, @@ -1185,6 +1194,12 @@ unsafe extern "C" { // Operations on global variables pub(crate) safe fn LLVMIsAGlobalVariable(GlobalVar: &Value) -> Option<&Value>; pub(crate) fn LLVMAddGlobal<'a>(M: &'a Module, Ty: &'a Type, Name: *const c_char) -> &'a Value; + pub(crate) fn LLVMAddGlobalInAddressSpace<'a>( + M: &'a Module, + Ty: &'a Type, + Name: *const c_char, + addrspace: c_uint, + ) -> &'a Value; pub(crate) fn LLVMGetNamedGlobal(M: &Module, Name: *const c_char) -> Option<&Value>; pub(crate) fn LLVMGetFirstGlobal(M: &Module) -> Option<&Value>; pub(crate) fn LLVMGetNextGlobal(GlobalVar: &Value) -> Option<&Value>; diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs index 154ba4fd69018..a9035ef231f2d 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs @@ -251,6 +251,15 @@ pub(crate) fn add_global<'a>(llmod: &'a Module, ty: &'a Type, name_cstr: &CStr) unsafe { LLVMAddGlobal(llmod, ty, name_cstr.as_ptr()) } } +pub(crate) fn add_global_in_addrspace<'a>( + llmod: &'a Module, + ty: &'a Type, + name_cstr: &CStr, + addrspace: c_uint, +) -> &'a Value { + unsafe { LLVMAddGlobalInAddressSpace(llmod, ty, name_cstr.as_ptr(), addrspace) } +} + pub(crate) fn set_initializer(llglobal: &Value, constant_val: &Value) { unsafe { LLVMSetInitializer(llglobal, constant_val);