From 497c17a3906f26c184dc530b2211d4e737b210da Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Tue, 14 Nov 2023 18:31:28 -0500 Subject: [PATCH 01/17] Add check-cfg to cranelift --- build_system/tests.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build_system/tests.rs b/build_system/tests.rs index 10736ff9a55c8..ff71a567ed3ac 100644 --- a/build_system/tests.rs +++ b/build_system/tests.rs @@ -457,6 +457,8 @@ impl<'a> TestRunner<'a> { cmd.arg("--target"); cmd.arg(&self.target_compiler.triple); cmd.arg("-Cpanic=abort"); + cmd.arg("--check-cfg=cfg(no_unstable_features)"); + cmd.arg("--check-cfg=cfg(jit)"); cmd.args(args); cmd } From dfc669b74c214ad5afeb94211ebe1fd70ac8d43a Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 16 Nov 2023 21:15:07 +0000 Subject: [PATCH 02/17] Merge commit 'def04540a4e2541b995195c752c751295606a388' into sync_cg_clif-2023-11-16 --- .github/workflows/abi-cafe.yml | 4 + .github/workflows/main.yml | 10 + .github/workflows/rustc.yml | 6 + Readme.md | 49 ++++- rust-toolchain | 2 +- scripts/rustup.sh | 2 +- scripts/setup_rust_fork.sh | 10 +- scripts/test_bootstrap.sh | 4 +- src/abi/mod.rs | 1 + src/base.rs | 2 +- src/constant.rs | 43 ++-- src/inline_asm.rs | 166 ++++++---------- src/intrinsics/llvm.rs | 2 + src/intrinsics/llvm_x86.rs | 350 ++++++++++++++++++++++++++++----- src/intrinsics/simd.rs | 14 +- 15 files changed, 473 insertions(+), 192 deletions(-) diff --git a/.github/workflows/abi-cafe.yml b/.github/workflows/abi-cafe.yml index 12aa69d3c7956..bd3b051185b46 100644 --- a/.github/workflows/abi-cafe.yml +++ b/.github/workflows/abi-cafe.yml @@ -35,6 +35,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + if: matrix.os == 'ubuntu-latest' + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 47d9a3b93f729..05dc28d074530 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -66,6 +66,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + if: matrix.os == 'ubuntu-latest' + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: @@ -136,6 +140,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Prepare dependencies run: ./y.sh prepare @@ -159,6 +166,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: diff --git a/.github/workflows/rustc.yml b/.github/workflows/rustc.yml index b49dc3aff7aaa..cb5dd51fee310 100644 --- a/.github/workflows/rustc.yml +++ b/.github/workflows/rustc.yml @@ -11,6 +11,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: @@ -31,6 +34,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: diff --git a/Readme.md b/Readme.md index 1a2b2bbc58812..ca6ecdf1d0e88 100644 --- a/Readme.md +++ b/Readme.md @@ -5,8 +5,48 @@ This has the potential to improve compilation times in debug mode. If your project doesn't use any of the things listed under "Not yet supported", it should work fine. If not please open an issue. +## Download using Rustup + +The Cranelift codegen backend is distributed in nightly builds on Linux and x86_64 macOS. If you want to +install it using Rustup, you can do that by running: + +```bash +$ rustup component add rustc-codegen-cranelift-preview --toolchain nightly +``` + +Once it is installed, you can enable it with one of the following approaches: +- `CARGO_PROFILE_DEV_CODEGEN_BACKEND=cranelift cargo +nightly build -Zcodegen-backend` +- `RUSTFLAGS="-Zcodegen-backend=cranelift" cargo +nightly build` +- Add the following to `.cargo/config.toml`: + ```toml + [unstable] + codegen-backend = true + + [profile.dev] + codegen-backend = "cranelift" + ``` +- Add the following to `Cargo.toml`: + ```toml + # This line needs to come before anything else in Cargo.toml + cargo-features = ["codegen-backend"] + + [profile.dev] + codegen-backend = "cranelift" + ``` + +## Precompiled builds + +You can also download a pre-built version from the [releases] page. +Extract the `dist` directory in the archive anywhere you want. +If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`. +(tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)). + +[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev + ## Building and testing +If you want to build the backend manually, you can download it from GitHub and build it yourself: + ```bash $ git clone https://github.com/rust-lang/rustc_codegen_cranelift $ cd rustc_codegen_cranelift @@ -22,15 +62,6 @@ $ ./test.sh For more docs on how to build and test see [build_system/usage.txt](build_system/usage.txt) or the help message of `./y.sh`. -## Precompiled builds - -Alternatively you can download a pre built version from the [releases] page. -Extract the `dist` directory in the archive anywhere you want. -If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`. -(tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)). - -[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev - ## Usage rustc_codegen_cranelift can be used as a near-drop-in replacement for `cargo build` or `cargo run` for existing projects. diff --git a/rust-toolchain b/rust-toolchain index b832b06e0ffba..80ef1e49f2319 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2023-11-10" +channel = "nightly-2023-11-16" components = ["rust-src", "rustc-dev", "llvm-tools"] diff --git a/scripts/rustup.sh b/scripts/rustup.sh index e62788f2e507d..355282911c255 100755 --- a/scripts/rustup.sh +++ b/scripts/rustup.sh @@ -46,7 +46,7 @@ case $1 in git pull origin master branch=sync_cg_clif-$(date +%Y-%m-%d) git checkout -b "$branch" - "$cg_clif/git-fixed-subtree.sh" pull --prefix=compiler/rustc_codegen_cranelift/ https://github.com/bjorn3/rustc_codegen_cranelift.git master + "$cg_clif/git-fixed-subtree.sh" pull --prefix=compiler/rustc_codegen_cranelift/ https://github.com/rust-lang/rustc_codegen_cranelift.git master git push -u my "$branch" # immediately merge the merge commit into cg_clif to prevent merge conflicts when syncing diff --git a/scripts/setup_rust_fork.sh b/scripts/setup_rust_fork.sh index bbb8a010d965f..731828caae2c4 100644 --- a/scripts/setup_rust_fork.sh +++ b/scripts/setup_rust_fork.sh @@ -1,15 +1,17 @@ #!/usr/bin/env bash set -e +# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we +# the LLVM backend isn't compiled in here. +export CG_CLIF_FORCE_GNU_AS=1 + # Compiletest expects all standard library paths to start with /rustc/FAKE_PREFIX. # CG_CLIF_STDLIB_REMAP_PATH_PREFIX will cause cg_clif's build system to pass # --remap-path-prefix to handle this. -# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we -# the LLVM backend isn't compiled in here. -CG_CLIF_FORCE_GNU_AS=1 CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build +CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build echo "[SETUP] Rust fork" -git clone https://github.com/rust-lang/rust.git || true +git clone https://github.com/rust-lang/rust.git --filter=tree:0 || true pushd rust git fetch git checkout -- . diff --git a/scripts/test_bootstrap.sh b/scripts/test_bootstrap.sh index a8f6d7a202486..791d457993de3 100755 --- a/scripts/test_bootstrap.sh +++ b/scripts/test_bootstrap.sh @@ -11,7 +11,5 @@ rm -r compiler/rustc_codegen_cranelift/{Cargo.*,src} cp ../Cargo.* compiler/rustc_codegen_cranelift/ cp -r ../src compiler/rustc_codegen_cranelift/src -# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we -# the LLVM backend isn't compiled in here. -CG_CLIF_FORCE_GNU_AS=1 ./x.py build --stage 1 library/std +./x.py build --stage 1 library/std popd diff --git a/src/abi/mod.rs b/src/abi/mod.rs index c4572e035258d..0ff1473da4313 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>( args, ret_place, target, + source_info.span, ); return; } diff --git a/src/base.rs b/src/base.rs index 91b1547cb6ea6..71557d49ef2c3 100644 --- a/src/base.rs +++ b/src/base.rs @@ -456,7 +456,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { ); } - crate::inline_asm::codegen_inline_asm( + crate::inline_asm::codegen_inline_asm_terminator( fx, source_info.span, template, diff --git a/src/constant.rs b/src/constant.rs index b0853d30e03b8..cf68a3857c58c 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -1,10 +1,13 @@ //! Handling of `static`s, `const`s and promoted allocations +use std::cmp::Ordering; + use cranelift_module::*; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar}; use rustc_middle::mir::ConstValue; +use rustc_middle::ty::ScalarInt; use crate::prelude::*; @@ -430,9 +433,9 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant pub(crate) fn mir_operand_get_const_val<'tcx>( fx: &FunctionCx<'_, '_, 'tcx>, operand: &Operand<'tcx>, -) -> Option> { +) -> Option { match operand { - Operand::Constant(const_) => Some(eval_mir_constant(fx, const_).0), + Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(), // FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored // inside a temporary before being passed to the intrinsic requiring the const argument. // This code tries to find a single constant defining definition of the referenced local. @@ -440,7 +443,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( if !place.projection.is_empty() { return None; } - let mut computed_const_val = None; + let mut computed_scalar_int = None; for bb_data in fx.mir.basic_blocks.iter() { for stmt in &bb_data.statements { match &stmt.kind { @@ -456,22 +459,38 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( operand, ty, ) => { - if computed_const_val.is_some() { + if computed_scalar_int.is_some() { return None; // local assigned twice } if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) { return None; } - let const_val = mir_operand_get_const_val(fx, operand)?; - if fx.layout_of(*ty).size - != const_val.try_to_scalar_int()?.size() + let scalar_int = mir_operand_get_const_val(fx, operand)?; + let scalar_int = match fx + .layout_of(*ty) + .size + .cmp(&scalar_int.size()) { - return None; - } - computed_const_val = Some(const_val); + Ordering::Equal => scalar_int, + Ordering::Less => match ty.kind() { + ty::Uint(_) => ScalarInt::try_from_uint( + scalar_int.try_to_uint(scalar_int.size()).unwrap(), + fx.layout_of(*ty).size, + ) + .unwrap(), + ty::Int(_) => ScalarInt::try_from_int( + scalar_int.try_to_int(scalar_int.size()).unwrap(), + fx.layout_of(*ty).size, + ) + .unwrap(), + _ => unreachable!(), + }, + Ordering::Greater => return None, + }; + computed_scalar_int = Some(scalar_int); } Rvalue::Use(operand) => { - computed_const_val = mir_operand_get_const_val(fx, operand) + computed_scalar_int = mir_operand_get_const_val(fx, operand) } _ => return None, } @@ -522,7 +541,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( TerminatorKind::Call { .. } => {} } } - computed_const_val + computed_scalar_int } } } diff --git a/src/inline_asm.rs b/src/inline_asm.rs index ce0eecca8a8ba..25d14319f5791 100644 --- a/src/inline_asm.rs +++ b/src/inline_asm.rs @@ -10,7 +10,7 @@ use target_lexicon::BinaryFormat; use crate::prelude::*; -enum CInlineAsmOperand<'tcx> { +pub(crate) enum CInlineAsmOperand<'tcx> { In { reg: InlineAsmRegOrRegClass, value: Value, @@ -34,7 +34,7 @@ enum CInlineAsmOperand<'tcx> { }, } -pub(crate) fn codegen_inline_asm<'tcx>( +pub(crate) fn codegen_inline_asm_terminator<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, span: Span, template: &[InlineAsmTemplatePiece], @@ -42,8 +42,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( options: InlineAsmOptions, destination: Option, ) { - // FIXME add .eh_frame unwind info directives - // Used by panic_abort on Windows, but uses a syntax which only happens to work with // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for // the LLVM backend. @@ -135,15 +133,33 @@ pub(crate) fn codegen_inline_asm<'tcx>( }) .collect::>(); - let mut inputs = Vec::new(); - let mut outputs = Vec::new(); + codegen_inline_asm_inner(fx, template, &operands, options); + + match destination { + Some(destination) => { + let destination_block = fx.get_block(destination); + fx.bcx.ins().jump(destination_block, &[]); + } + None => { + fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); + } + } +} + +pub(crate) fn codegen_inline_asm_inner<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + template: &[InlineAsmTemplatePiece], + operands: &[CInlineAsmOperand<'tcx>], + options: InlineAsmOptions, +) { + // FIXME add .eh_frame unwind info directives let mut asm_gen = InlineAssemblyGenerator { tcx: fx.tcx, arch: fx.tcx.sess.asm_arch.unwrap(), enclosing_def_id: fx.instance.def_id(), template, - operands: &operands, + operands, options, registers: Vec::new(), stack_slots_clobber: Vec::new(), @@ -165,6 +181,8 @@ pub(crate) fn codegen_inline_asm<'tcx>( let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); fx.cx.global_asm.push_str(&generated_asm); + let mut inputs = Vec::new(); + let mut outputs = Vec::new(); for (i, operand) in operands.iter().enumerate() { match operand { CInlineAsmOperand::In { reg: _, value } => { @@ -186,16 +204,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( } call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); - - match destination { - Some(destination) => { - let destination_block = fx.get_block(destination); - fx.bcx.ins().jump(destination_block, &[]); - } - None => { - fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); - } - } } struct InlineAssemblyGenerator<'a, 'tcx> { @@ -637,8 +645,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { ) { match arch { InlineAsmArch::X86_64 => { - write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); - reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + match reg { + InlineAsmReg::X86(reg) + if reg as u32 >= X86InlineAsmReg::xmm0 as u32 + && reg as u32 <= X86InlineAsmReg::xmm15 as u32 => + { + // rustc emits x0 rather than xmm0 + write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap(); + write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32) + .unwrap(); + } + _ => { + write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); + reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + } + } generated_asm.push('\n'); } InlineAsmArch::AArch64 => { @@ -663,8 +684,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { ) { match arch { InlineAsmArch::X86_64 => { - generated_asm.push_str(" mov "); - reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + match reg { + InlineAsmReg::X86(reg) + if reg as u32 >= X86InlineAsmReg::xmm0 as u32 + && reg as u32 <= X86InlineAsmReg::xmm15 as u32 => + { + // rustc emits x0 rather than xmm0 + write!( + generated_asm, + " movups xmm{}", + reg as u32 - X86InlineAsmReg::xmm0 as u32 + ) + .unwrap(); + } + _ => { + generated_asm.push_str(" mov "); + reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap() + } + } writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap(); } InlineAsmArch::AArch64 => { @@ -720,7 +757,12 @@ fn call_inline_asm<'tcx>( fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]); for (offset, place) in outputs { - let ty = fx.clif_type(place.layout().ty).unwrap(); + let ty = if place.layout().ty.is_simd() { + let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx); + fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap() + } else { + fx.clif_type(place.layout().ty).unwrap() + }; let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load( fx, ty, @@ -729,83 +771,3 @@ fn call_inline_asm<'tcx>( place.write_cvalue(fx, CValue::by_val(value, place.layout())); } } - -pub(crate) fn codegen_xgetbv<'tcx>( - fx: &mut FunctionCx<'_, '_, 'tcx>, - xcr_no: Value, - ret: CPlace<'tcx>, -) { - // FIXME add .eh_frame unwind info directives - - let operands = vec![ - CInlineAsmOperand::In { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), - value: xcr_no, - }, - CInlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), - late: true, - place: Some(ret), - }, - CInlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), - late: true, - place: None, - }, - ]; - let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM; - - let mut inputs = Vec::new(); - let mut outputs = Vec::new(); - - let mut asm_gen = InlineAssemblyGenerator { - tcx: fx.tcx, - arch: fx.tcx.sess.asm_arch.unwrap(), - enclosing_def_id: fx.instance.def_id(), - template: &[InlineAsmTemplatePiece::String( - " - xgetbv - // out = rdx << 32 | rax - shl rdx, 32 - or rax, rdx - " - .to_string(), - )], - operands: &operands, - options, - registers: Vec::new(), - stack_slots_clobber: Vec::new(), - stack_slots_input: Vec::new(), - stack_slots_output: Vec::new(), - stack_slot_size: Size::from_bytes(0), - }; - asm_gen.allocate_registers(); - asm_gen.allocate_stack_slots(); - - let inline_asm_index = fx.cx.inline_asm_index.get(); - fx.cx.inline_asm_index.set(inline_asm_index + 1); - let asm_name = format!( - "__inline_asm_{}_n{}", - fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"), - inline_asm_index - ); - - let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); - fx.cx.global_asm.push_str(&generated_asm); - - for (i, operand) in operands.iter().enumerate() { - match operand { - CInlineAsmOperand::In { reg: _, value } => { - inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value)); - } - CInlineAsmOperand::Out { reg: _, late: _, place } => { - if let Some(place) = place { - outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place)); - } - } - _ => unreachable!(), - } - } - - call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); -} diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs index e9b7daf14924d..659e6c133ef5e 100644 --- a/src/intrinsics/llvm.rs +++ b/src/intrinsics/llvm.rs @@ -12,6 +12,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, target: Option, + span: Span, ) { if intrinsic.starts_with("llvm.aarch64") { return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call( @@ -31,6 +32,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( args, ret, target, + span, ); } diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 4c536048626ee..8dd2b6ed014e6 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -1,7 +1,10 @@ //! Emulate x86 LLVM intrinsics +use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; use rustc_middle::ty::GenericArgsRef; +use rustc_target::asm::*; +use crate::inline_asm::{codegen_inline_asm_inner, CInlineAsmOperand}; use crate::intrinsics::*; use crate::prelude::*; @@ -12,6 +15,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, target: Option, + span: Span, ) { match intrinsic { "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => { @@ -24,7 +28,35 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let xcr_no = xcr_no.load_scalar(fx); - crate::inline_asm::codegen_xgetbv(fx, xcr_no, ret); + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String( + " + xgetbv + // out = rdx << 32 | rax + shl rdx, 32 + or rax, rdx + " + .to_string(), + )], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), + value: xcr_no, + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + late: true, + place: Some(ret), + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + late: true, + place: None, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); } "llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => { @@ -688,64 +720,278 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( "llvm.x86.pclmulqdq" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 - intrinsic_args!(fx, args => (a, b, imm8); intrinsic); + intrinsic_args!(fx, args => (a, b, _imm8); intrinsic); - assert_eq!(a.layout(), b.layout()); - let layout = a.layout(); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); - let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); - let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert_eq!(lane_ty, fx.tcx.types.i64); - assert_eq!(ret_lane_ty, fx.tcx.types.i64); - assert_eq!(lane_count, 2); - assert_eq!(ret_lane_count, 2); + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[2]) + { + imm8 + } else { + fx.tcx.sess.span_fatal( + span, + "Index argument for `_mm_clmulepi64_si128` is not a constant", + ); + }; - let imm8 = imm8.load_scalar(fx); + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); - let control0 = fx.bcx.ins().band_imm(imm8, 0b0000_0001); - let a_lane0 = a.value_lane(fx, 0).load_scalar(fx); - let a_lane1 = a.value_lane(fx, 1).load_scalar(fx); - let temp1 = fx.bcx.ins().select(control0, a_lane1, a_lane0); + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pclmulqdq xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } - let control4 = fx.bcx.ins().band_imm(imm8, 0b0001_0000); - let b_lane0 = b.value_lane(fx, 0).load_scalar(fx); - let b_lane1 = b.value_lane(fx, 1).load_scalar(fx); - let temp2 = fx.bcx.ins().select(control4, b_lane1, b_lane0); + "llvm.x86.aesni.aeskeygenassist" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261 + intrinsic_args!(fx, args => (a, _imm8); intrinsic); - fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value { - let tmp = fx.bcx.ins().ushr_imm(val, bit); - fx.bcx.ins().band_imm(tmp, 1) - } + let a = a.load_scalar(fx); - let mut res1 = fx.bcx.ins().iconst(types::I64, 0); - for i in 0..=63 { - let x = extract_bit(fx, temp1, 0); - let y = extract_bit(fx, temp2, i); - let mut temp = fx.bcx.ins().band(x, y); - for j in 1..=i { - let x = extract_bit(fx, temp1, j); - let y = extract_bit(fx, temp2, i - j); - let z = fx.bcx.ins().band(x, y); - temp = fx.bcx.ins().bxor(temp, z); - } - let temp = fx.bcx.ins().ishl_imm(temp, i); - res1 = fx.bcx.ins().bor(res1, temp); - } - ret.place_lane(fx, 0).to_ptr().store(fx, res1, MemFlags::trusted()); - - let mut res2 = fx.bcx.ins().iconst(types::I64, 0); - for i in 64..=127 { - let mut temp = fx.bcx.ins().iconst(types::I64, 0); - for j in i - 63..=63 { - let x = extract_bit(fx, temp1, j); - let y = extract_bit(fx, temp2, i - j); - let z = fx.bcx.ins().band(x, y); - temp = fx.bcx.ins().bxor(temp, z); - } - let temp = fx.bcx.ins().ishl_imm(temp, i); - res2 = fx.bcx.ins().bor(res2, temp); - } - ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted()); + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1]) + { + imm8 + } else { + fx.tcx.sess.span_fatal( + span, + "Index argument for `_mm_aeskeygenassist_si128` is not a constant", + ); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesimc" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260 + intrinsic_args!(fx, args => (a); intrinsic); + + let a = a.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesenc" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesenclast" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesdec" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesdeclast" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256rnds2" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977 + intrinsic_args!(fx, args => (a, b, k); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + let k = k.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256rnds2 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + // Implicit argument to the sha256rnds2 instruction + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + value: k, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256msg1" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32&ig_expand=5975 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256msg1 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256msg2" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32&ig_expand=5976 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256msg2 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); } "llvm.x86.avx.ptestz.256" => { diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index ea137c4ca1e8c..0bd211fd614f0 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -282,11 +282,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant"); }; - let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + let idx: u32 = idx_const + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count), @@ -331,10 +331,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count), From 97ca520c448fbfb405ba3560ce521b2e31c81cc2 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 16 Nov 2023 21:24:33 +0000 Subject: [PATCH 03/17] check-cfg needs -Zunstable-options --- build_system/tests.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build_system/tests.rs b/build_system/tests.rs index ff71a567ed3ac..f6e6ac6f1e8e1 100644 --- a/build_system/tests.rs +++ b/build_system/tests.rs @@ -457,6 +457,7 @@ impl<'a> TestRunner<'a> { cmd.arg("--target"); cmd.arg(&self.target_compiler.triple); cmd.arg("-Cpanic=abort"); + cmd.arg("-Zunstable-options"); cmd.arg("--check-cfg=cfg(no_unstable_features)"); cmd.arg("--check-cfg=cfg(jit)"); cmd.args(args); From cd7218aa9f4cdbfec0bc90ca86a9caa9cd271938 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 11 Nov 2023 13:26:19 +0000 Subject: [PATCH 04/17] Update Cranelift to 0.102 --- Cargo.lock | 60 +++++++++++++++++++++++----------------------- Cargo.toml | 12 +++++----- src/pretty_clif.rs | 16 ++++++++----- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcb6cc57584cf..901d1dbea66b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,9 +21,9 @@ checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "arbitrary" -version = "1.3.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" [[package]] name = "bitflags" @@ -45,18 +45,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cranelift-bforest" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f773437307980ac0f424bf9b9a5d0cd21a0f17248c6860c9a65bec8b5975f3fe" +checksum = "76eb38f2af690b5a4411d9a8782b6d77dabff3ca939e0518453ab9f9a4392d41" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443c2ac50e97fb7de1a0f862753fce3f27215558811a6fcee508eb0c3747fa79" +checksum = "39526c036b92912417e8931f52c1e235796688068d3efdbbd8b164f299d19156" dependencies = [ "bumpalo", "cranelift-bforest", @@ -75,39 +75,39 @@ dependencies = [ [[package]] name = "cranelift-codegen-meta" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5b174c411480c79ce0793c55042fa51bec27e486381d103a53cab3b480cb2db" +checksum = "fdb0deedc9fccf2db53a5a3c9c9d0163e44143b0d004dca9bf6ab6a0024cd79a" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fa0151a528066a369de6debeea4d4b23a32aba68b5add8c46d3dc8091ff434" +checksum = "cea2d1b274e45aa8e61e9103efa1ba82d4b5a19d12bd1fd10744c3b7380ba3ff" [[package]] name = "cranelift-control" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8adf1e6398493c9bea1190e37d28a0eb0eca5fddbc80e01e506cda34db92b1f" +checksum = "6ea5977559a71e63db79a263f0e81a89b996e8a38212c4281e37dd1dbaa8b65c" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4917e2ed3bb5fe87d0ed88395ca6d644018d119a034faedd1f3e1f2c33cd52b2" +checksum = "2f871ada808b58158d84dfc43a6a2e2d2756baaf4ed1c51fd969ca8330e6ca5c" [[package]] name = "cranelift-frontend" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aaadf1e7cf28886bbf046eaf7ef538997bc8a7e020e578ea4957b39da87d5a1" +checksum = "e8e6890f587ef59824b3debe577e68fdf9b307b3808c54b8d93a18fd0b70941b" dependencies = [ "cranelift-codegen", "log", @@ -117,15 +117,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a67fda31b9d69eaa1c49a2081939454c45857596a9d45af6744680541c628b4c" +checksum = "a8d5fc6d5d3b52d1917002b17a8ecce448c2621b5bf394bb4e77e2f676893537" [[package]] name = "cranelift-jit" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bf32710628e7ff298739f1ed80a0bfdafc0c6a3e284c4540b23f18e8889d4b" +checksum = "e8a2d7744f743f59d9646d7589ad22ea17ed0d71e04906eb77c31e99bc13bd8b" dependencies = [ "anyhow", "cranelift-codegen", @@ -143,9 +143,9 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d693e93a0fbf56b4bc93cffe6b107c2e52f070e1111950505fc8c83ac440b9d" +checksum = "b96cb196334698e612c197d7d0ae59af5e07667306ec20d7be414717db400873" dependencies = [ "anyhow", "cranelift-codegen", @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76fb52ba71be98312f35e798d9e98e45ab2586f27584231bf7c644fa9501e8af" +checksum = "3e10c2e7faa65d4ae7de9a83b44f2c31aca7dc638e17d0a79572fdf8103d720b" dependencies = [ "cranelift-codegen", "libc", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2551b2e185022b89e9efa5e04c0f17f679b86ef73d9f7feabc48b608ff23120d" +checksum = "83ce94e18756058af8a66e3c0ba1123ae15517c72162d8060d0cb0974642adf2" dependencies = [ "anyhow", "cranelift-codegen", @@ -295,9 +295,9 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "regalloc2" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b4dcbd3a2ae7fb94b5813fa0e957c6ab51bf5d0a8ee1b69e0c2d0f1e6eb8485" +checksum = "ad156d539c879b7a24a363a2016d77961786e71f48f2e2fc8302a92abd2429a6" dependencies = [ "hashbrown 0.13.2", "log", @@ -374,9 +374,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasmtime-jit-icache-coherence" -version = "14.0.2" +version = "15.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0980a96b16abbdaf829858d2389697b1d6cfc6a903873fd74b7e47a6b1045584" +checksum = "b73ad1395eda136baec5ece7e079e0536a82ef73488e345456cc9b89858ad0ec" dependencies = [ "cfg-if", "libc", diff --git a/Cargo.toml b/Cargo.toml index 30db10f745715..20fcd22273219 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,12 +8,12 @@ crate-type = ["dylib"] [dependencies] # These have to be in sync with each other -cranelift-codegen = { version = "0.101.2", default-features = false, features = ["std", "unwind", "all-arch"] } -cranelift-frontend = { version = "0.101.2" } -cranelift-module = { version = "0.101.2" } -cranelift-native = { version = "0.101.2" } -cranelift-jit = { version = "0.101.2", optional = true } -cranelift-object = { version = "0.101.2" } +cranelift-codegen = { version = "0.102", default-features = false, features = ["std", "unwind", "all-arch"] } +cranelift-frontend = { version = "0.102" } +cranelift-module = { version = "0.102" } +cranelift-native = { version = "0.102" } +cranelift-jit = { version = "0.102", optional = true } +cranelift-object = { version = "0.102" } target-lexicon = "0.12.0" gimli = { version = "0.28", default-features = false, features = ["write"]} object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] } diff --git a/src/pretty_clif.rs b/src/pretty_clif.rs index da84e54a91636..02c0dcb8b1bd2 100644 --- a/src/pretty_clif.rs +++ b/src/pretty_clif.rs @@ -58,11 +58,10 @@ use std::fmt; use std::io::Write; -use cranelift_codegen::{ - entity::SecondaryMap, - ir::entities::AnyEntity, - write::{FuncWriter, PlainWriter}, -}; +use cranelift_codegen::entity::SecondaryMap; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::Fact; +use cranelift_codegen::write::{FuncWriter, PlainWriter}; use rustc_middle::ty::layout::FnAbiOf; use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_session::config::{OutputFilenames, OutputType}; @@ -155,8 +154,13 @@ impl FuncWriter for &'_ CommentWriter { _func: &Function, entity: AnyEntity, value: &dyn fmt::Display, + maybe_fact: Option<&Fact>, ) -> fmt::Result { - write!(w, " {} = {}", entity, value)?; + if let Some(fact) = maybe_fact { + write!(w, " {} ! {} = {}", entity, fact, value)?; + } else { + write!(w, " {} = {}", entity, value)?; + } if let Some(comment) = self.entity_comments.get(&entity) { writeln!(w, " ; {}", comment.replace('\n', "\n; ")) From a5fcfad890b8b35aa5c4efeff0e5eae5ebc504c1 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 21 Nov 2023 17:00:43 +0100 Subject: [PATCH 05/17] Fix _mm_sad_epu8 implementation Fixes rust-lang/rustc_codegen_cranelift#1426 --- src/intrinsics/llvm_x86.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 8dd2b6ed014e6..78a0a347cfc24 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -364,9 +364,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for out_lane_idx in 0..lane_count / 8 { let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0); - for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 { + for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 { let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx); + let a_lane = fx.bcx.ins().uextend(types::I16, a_lane); let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx); + let b_lane = fx.bcx.ins().uextend(types::I16, b_lane); let lane_diff = fx.bcx.ins().isub(a_lane, b_lane); let abs_lane_diff = fx.bcx.ins().iabs(lane_diff); From 4ae658683f87b0f34ffb5c06f50ca1c51333a1a4 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 23 Nov 2023 20:02:45 +0000 Subject: [PATCH 06/17] Fix fn_sig_for_fn_abi and the coroutine transform for generators There were three issues previously: * The self argument was pinned, despite Iterator::next taking an unpinned mutable reference. * A resume argument was passed, despite Iterator::next not having one. * The return value was CoroutineState rather than Option While these things just so happened to work with the LLVM backend, cg_clif does much stricter checks when trying to assign a value to a place. In addition it can't handle the mismatch between the amount of arguments specified by the FnAbi and the FnSig. --- build_system/tests.rs | 9 +++++++++ config.txt | 1 + example/gen_block_iterate.rs | 36 ++++++++++++++++++++++++++++++++++++ rustfmt.toml | 5 ++++- 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 example/gen_block_iterate.rs diff --git a/build_system/tests.rs b/build_system/tests.rs index ff71a567ed3ac..aa50dbfdf35b4 100644 --- a/build_system/tests.rs +++ b/build_system/tests.rs @@ -100,6 +100,15 @@ const BASE_SYSROOT_SUITE: &[TestCase] = &[ TestCase::build_bin_and_run("aot.issue-72793", "example/issue-72793.rs", &[]), TestCase::build_bin("aot.issue-59326", "example/issue-59326.rs"), TestCase::build_bin_and_run("aot.neon", "example/neon.rs", &[]), + TestCase::custom("aot.gen_block_iterate", &|runner| { + runner.run_rustc([ + "example/gen_block_iterate.rs", + "--edition", + "2024", + "-Zunstable-options", + ]); + runner.run_out_command("gen_block_iterate", &[]); + }), ]; pub(crate) static RAND_REPO: GitRepo = GitRepo::github( diff --git a/config.txt b/config.txt index 2ccdc7d78748a..3cf295c003e42 100644 --- a/config.txt +++ b/config.txt @@ -43,6 +43,7 @@ aot.mod_bench aot.issue-72793 aot.issue-59326 aot.neon +aot.gen_block_iterate testsuite.extended_sysroot test.rust-random/rand diff --git a/example/gen_block_iterate.rs b/example/gen_block_iterate.rs new file mode 100644 index 0000000000000..14bd23e77ea01 --- /dev/null +++ b/example/gen_block_iterate.rs @@ -0,0 +1,36 @@ +// Copied from https://github.com/rust-lang/rust/blob/46455dc65069387f2dc46612f13fd45452ab301a/tests/ui/coroutine/gen_block_iterate.rs +// revisions: next old +//compile-flags: --edition 2024 -Zunstable-options +//[next] compile-flags: -Ztrait-solver=next +// run-pass +#![feature(gen_blocks)] + +fn foo() -> impl Iterator { + gen { yield 42; for x in 3..6 { yield x } } +} + +fn moved() -> impl Iterator { + let mut x = "foo".to_string(); + gen move { + yield 42; + if x == "foo" { return } + x.clear(); + for x in 3..6 { yield x } + } +} + +fn main() { + let mut iter = foo(); + assert_eq!(iter.next(), Some(42)); + assert_eq!(iter.next(), Some(3)); + assert_eq!(iter.next(), Some(4)); + assert_eq!(iter.next(), Some(5)); + assert_eq!(iter.next(), None); + // `gen` blocks are fused + assert_eq!(iter.next(), None); + + let mut iter = moved(); + assert_eq!(iter.next(), Some(42)); + assert_eq!(iter.next(), None); + +} diff --git a/rustfmt.toml b/rustfmt.toml index ebeca8662a519..0f884187adddb 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,7 @@ -ignore = ["y.rs"] +ignore = [ + "y.rs", + "example/gen_block_iterate.rs", # uses edition 2024 +] # Matches rustfmt.toml of rustc version = "Two" From 430ab4e923a16fc46a6738aa4dfc2ff0c735ac9f Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 24 Nov 2023 10:22:51 +0100 Subject: [PATCH 07/17] Allow function pointers in CValue::const_val Fixes rust-lang/rustc_codegen_cranelift#1430 --- src/value_and_place.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 21ad2a835fc96..9eb8e80d3fc23 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -329,7 +329,13 @@ impl<'tcx> CValue<'tcx> { let msb = fx.bcx.ins().iconst(types::I64, (const_val >> 64) as u64 as i64); fx.bcx.ins().iconcat(lsb, msb) } - ty::Bool | ty::Char | ty::Uint(_) | ty::Int(_) | ty::Ref(..) | ty::RawPtr(..) => { + ty::Bool + | ty::Char + | ty::Uint(_) + | ty::Int(_) + | ty::Ref(..) + | ty::RawPtr(..) + | ty::FnPtr(..) => { let raw_val = const_val.size().truncate(const_val.to_bits(layout.size).unwrap()); fx.bcx.ins().iconst(clif_ty, raw_val as i64) } From 9c958196e717744257e40623ca6598b82e050b9f Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 24 Nov 2023 10:46:22 +0100 Subject: [PATCH 08/17] Fix polymorphization for coroutines Fixes rust-lang/rustc_codegen_cranelift#1429 --- build_system/tests.rs | 4 ++++ config.txt | 1 + example/polymorphize_coroutine.rs | 16 ++++++++++++++++ src/value_and_place.rs | 26 ++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) create mode 100644 example/polymorphize_coroutine.rs diff --git a/build_system/tests.rs b/build_system/tests.rs index f6e6ac6f1e8e1..4cc572f0abaf5 100644 --- a/build_system/tests.rs +++ b/build_system/tests.rs @@ -99,6 +99,10 @@ const BASE_SYSROOT_SUITE: &[TestCase] = &[ TestCase::build_bin_and_run("aot.mod_bench", "example/mod_bench.rs", &[]), TestCase::build_bin_and_run("aot.issue-72793", "example/issue-72793.rs", &[]), TestCase::build_bin("aot.issue-59326", "example/issue-59326.rs"), + TestCase::custom("aot.polymorphize_coroutine", &|runner| { + runner.run_rustc(&["example/polymorphize_coroutine.rs", "-Zpolymorphize"]); + runner.run_out_command("polymorphize_coroutine", &[]); + }), TestCase::build_bin_and_run("aot.neon", "example/neon.rs", &[]), ]; diff --git a/config.txt b/config.txt index 2ccdc7d78748a..79284df6b1960 100644 --- a/config.txt +++ b/config.txt @@ -42,6 +42,7 @@ aot.float-minmax-pass aot.mod_bench aot.issue-72793 aot.issue-59326 +aot.polymorphize_coroutine aot.neon testsuite.extended_sysroot diff --git a/example/polymorphize_coroutine.rs b/example/polymorphize_coroutine.rs new file mode 100644 index 0000000000000..c965b34e13b90 --- /dev/null +++ b/example/polymorphize_coroutine.rs @@ -0,0 +1,16 @@ +#![feature(coroutines, coroutine_trait)] + +use std::ops::Coroutine; +use std::pin::Pin; + +fn main() { + run_coroutine::(); +} + +fn run_coroutine() { + let mut coroutine = || { + yield; + return; + }; + Pin::new(&mut coroutine).resume(()); +} diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 9eb8e80d3fc23..f52f59716a8a5 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -977,6 +977,32 @@ pub(crate) fn assert_assignable<'tcx>( } } } + (&ty::Coroutine(def_id_a, args_a, mov_a), &ty::Coroutine(def_id_b, args_b, mov_b)) + if def_id_a == def_id_b && mov_a == mov_b => + { + let mut types_a = args_a.types(); + let mut types_b = args_b.types(); + loop { + match (types_a.next(), types_b.next()) { + (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1), + (None, None) => return, + (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty), + } + } + } + (&ty::CoroutineWitness(def_id_a, args_a), &ty::CoroutineWitness(def_id_b, args_b)) + if def_id_a == def_id_b => + { + let mut types_a = args_a.types(); + let mut types_b = args_b.types(); + loop { + match (types_a.next(), types_b.next()) { + (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1), + (None, None) => return, + (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty), + } + } + } (ty::Param(_), _) | (_, ty::Param(_)) if fx.tcx.sess.opts.unstable_opts.polymorphize => { // No way to check if it is correct or not with polymorphization enabled } From 3ec8d7aa4a05c3286189441abef0ce757d03b4b5 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 12 Nov 2023 17:57:35 +0000 Subject: [PATCH 09/17] Implement _mm_cmpestri and _mm_cmpestrm using inline asm --- src/intrinsics/llvm_x86.rs | 98 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 78a0a347cfc24..0fa46b1dac266 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -720,6 +720,104 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } + "llvm.x86.sse42.pcmpestri128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939 + intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let la = la.load_scalar(fx); + let b = b.load_scalar(fx); + let lb = lb.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4]) + { + imm8 + } else { + fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestri` is not a constant"); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pcmpestri xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + value: a, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + value: la, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + value: lb, + }, + // Implicit result of the pcmpestri intrinsic + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), + late: true, + place: Some(ret), + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sse42.pcmpestrm128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm&ig_expand=940 + intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let la = la.load_scalar(fx); + let b = b.load_scalar(fx); + let lb = lb.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4]) + { + imm8 + } else { + fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestrm` is not a constant"); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pcmpestrm xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + value: la, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + value: lb, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + "llvm.x86.pclmulqdq" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 intrinsic_args!(fx, args => (a, b, _imm8); intrinsic); From 705031d0177af0b492a3224c01bda257c85505e6 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 24 Nov 2023 13:57:21 +0000 Subject: [PATCH 10/17] Implement _mm_cvttps_epi32 --- src/intrinsics/llvm_x86.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 0fa46b1dac266..85ce940999868 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -273,16 +273,31 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ); } "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => { - let a = match args { - [a] => a, - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); + intrinsic_args!(fx, args => (a); intrinsic); simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { fx.bcx.ins().iabs(lane) }); } + "llvm.x86.sse2.cvttps2dq" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32&ig_expand=2429 + intrinsic_args!(fx, args => (a); intrinsic); + let a = a.load_scalar(fx); + + // Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned + // into 0x80000000 for which Cranelift doesn't have a native instruction. + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } "llvm.x86.addcarry.32" | "llvm.x86.addcarry.64" => { intrinsic_args!(fx, args => (c_in, a, b); intrinsic); let c_in = c_in.load_scalar(fx); From 65da67169427a3b09726b64389d827aa7b2bfbe8 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 24 Nov 2023 15:30:53 +0000 Subject: [PATCH 11/17] Implement *fmaddsub_p*, *fmsubadd_p* and *fnmadd_p* vendor intrinsics --- src/intrinsics/llvm_x86.rs | 111 +++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 85ce940999868..142ca1cf5dd0f 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -735,6 +735,117 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } + "llvm.x86.fma.vfmaddsub.ps" + | "llvm.x86.fma.vfmaddsub.pd" + | "llvm.x86.fma.vfmaddsub.ps.256" + | "llvm.x86.fma.vfmaddsub.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185 + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let res = if idx & 1 == 0 { + fx.bcx.ins().fsub(mul, c_lane) + } else { + fx.bcx.ins().fadd(mul, c_lane) + }; + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.fma.vfmsubadd.ps" + | "llvm.x86.fma.vfmsubadd.pd" + | "llvm.x86.fma.vfmsubadd.ps.256" + | "llvm.x86.fma.vfmsubadd.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305 + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let res = if idx & 1 == 0 { + fx.bcx.ins().fadd(mul, c_lane) + } else { + fx.bcx.ins().fsub(mul, c_lane) + }; + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.fma.vfnmadd.ps" + | "llvm.x86.fma.vfnmadd.pd" + | "llvm.x86.fma.vfnmadd.ps.256" + | "llvm.x86.fma.vfnmadd.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371 + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let neg_mul = fx.bcx.ins().fneg(mul); + let res = fx.bcx.ins().fadd(neg_mul, c_lane); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + } + "llvm.x86.sse42.pcmpestri128" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939 intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic); From c8729e9216bda2b51d8781d95cae0b907e71c6b8 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 24 Nov 2023 15:31:14 +0000 Subject: [PATCH 12/17] Implement _mm256_zeroupper vendor intrinsic --- src/intrinsics/llvm_x86.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 142ca1cf5dd0f..f85f9b8917ebd 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -22,6 +22,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( // Spin loop hint } + "llvm.x86.avx.vzeroupper" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper&ig_expand=7218 + // Do nothing. It is a perf hint anyway. + } + // Used by is_x86_feature_detected!(); "llvm.x86.xgetbv" => { intrinsic_args!(fx, args => (xcr_no); intrinsic); From d5a7ae7976ec02196ec9893f79fa06612059dfbc Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 24 Nov 2023 19:26:46 +0000 Subject: [PATCH 13/17] Implement the float part of the gather family vendor intrinsics --- src/intrinsics/llvm_x86.rs | 87 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index f85f9b8917ebd..6cccc8b8396df 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -74,6 +74,93 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.write_cvalue(fx, val); } + "llvm.x86.avx2.gather.d.ps" + | "llvm.x86.avx2.gather.d.pd" + | "llvm.x86.avx2.gather.d.ps.256" + | "llvm.x86.avx2.gather.d.pd.256" + | "llvm.x86.avx2.gather.q.ps" + | "llvm.x86.avx2.gather.q.pd" + | "llvm.x86.avx2.gather.q.ps.256" + | "llvm.x86.avx2.gather.q.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd&ig_expand=3819 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd&ig_expand=3821 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd&ig_expand=3822 + // ... + + intrinsic_args!(fx, args => (src, ptr, index, mask, scale); intrinsic); + + let (src_lane_count, src_lane_ty) = src.layout().ty.simd_size_and_type(fx.tcx); + let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx); + let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(src_lane_ty.is_floating_point()); + assert!(index_lane_ty.is_integral()); + assert!(mask_lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(src_lane_count, mask_lane_count); + assert_eq!(src_lane_count, ret_lane_count); + + let lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap(); + let index_lane_clif_ty = fx.clif_type(index_lane_ty).unwrap(); + let mask_lane_clif_ty = fx.clif_type(mask_lane_ty).unwrap(); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + let ptr = ptr.load_scalar(fx); + let scale = scale.load_scalar(fx); + let scale = fx.bcx.ins().uextend(types::I64, scale); + for lane_idx in 0..std::cmp::min(src_lane_count, index_lane_count) { + let src_lane = src.value_lane(fx, lane_idx).load_scalar(fx); + let index_lane = index.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = + fx.bcx.ins().bitcast(mask_lane_clif_ty.as_int(), MemFlags::new(), mask_lane); + + let if_enabled = fx.bcx.create_block(); + let if_disabled = fx.bcx.create_block(); + let next = fx.bcx.create_block(); + let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); + + let mask_lane = match mask_lane_clif_ty { + types::F32 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64), + types::F64 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64), + _ => unreachable!(), + }; + fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); + fx.bcx.seal_block(if_enabled); + fx.bcx.seal_block(if_disabled); + + fx.bcx.switch_to_block(if_enabled); + let index_lane = if index_lane_clif_ty != types::I64 { + fx.bcx.ins().sextend(types::I64, index_lane) + } else { + index_lane + }; + let offset = fx.bcx.ins().imul(index_lane, scale); + let lane_ptr = fx.bcx.ins().iadd(ptr, offset); + let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), lane_ptr, 0); + fx.bcx.ins().jump(next, &[res]); + + fx.bcx.switch_to_block(if_disabled); + fx.bcx.ins().jump(next, &[src_lane]); + + fx.bcx.seal_block(next); + fx.bcx.switch_to_block(next); + + fx.bcx.ins().nop(); + + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout)); + } + + for lane_idx in std::cmp::min(src_lane_count, index_lane_count)..ret_lane_count { + let zero_lane = fx.bcx.ins().iconst(mask_lane_clif_ty.as_int(), 0); + let zero_lane = fx.bcx.ins().bitcast(mask_lane_clif_ty, MemFlags::new(), zero_lane); + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(zero_lane, ret_lane_layout)); + } + } + "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => { let (x, y, kind) = match args { [x, y, kind] => (x, y, kind), From 3b49b9efd5e91b3104bc377ab64ee88aaa3ce31d Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 24 Nov 2023 19:38:13 +0000 Subject: [PATCH 14/17] Implement the int part of the gather family vendor intrinsics --- src/intrinsics/llvm_x86.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 6cccc8b8396df..2108b4bb9ffe0 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -74,12 +74,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.write_cvalue(fx, val); } - "llvm.x86.avx2.gather.d.ps" + "llvm.x86.avx2.gather.d.d" + | "llvm.x86.avx2.gather.d.q" + | "llvm.x86.avx2.gather.d.ps" | "llvm.x86.avx2.gather.d.pd" + | "llvm.x86.avx2.gather.d.d.256" + | "llvm.x86.avx2.gather.d.q.256" | "llvm.x86.avx2.gather.d.ps.256" | "llvm.x86.avx2.gather.d.pd.256" + | "llvm.x86.avx2.gather.q.d" + | "llvm.x86.avx2.gather.q.q" | "llvm.x86.avx2.gather.q.ps" | "llvm.x86.avx2.gather.q.pd" + | "llvm.x86.avx2.gather.q.d.256" + | "llvm.x86.avx2.gather.q.q.256" | "llvm.x86.avx2.gather.q.ps.256" | "llvm.x86.avx2.gather.q.pd.256" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818 @@ -94,10 +102,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx); let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert!(src_lane_ty.is_floating_point()); + assert_eq!(src_lane_ty, ret_lane_ty); assert!(index_lane_ty.is_integral()); - assert!(mask_lane_ty.is_floating_point()); - assert!(ret_lane_ty.is_floating_point()); assert_eq!(src_lane_count, mask_lane_count); assert_eq!(src_lane_count, ret_lane_count); @@ -122,8 +128,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); let mask_lane = match mask_lane_clif_ty { - types::F32 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64), - types::F64 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64), + types::I32 | types::F32 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64) + } + types::I64 | types::F64 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64) + } _ => unreachable!(), }; fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); From e6e2f00d2196281bdd92c3dbfb466687708807c3 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 25 Nov 2023 09:14:19 +0000 Subject: [PATCH 15/17] Fix incorrect implementation of several vendor intrinsics --- src/intrinsics/llvm_x86.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 2108b4bb9ffe0..07b95b7933d0a 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -556,12 +556,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let ret_lane_layout = fx.layout_of(fx.tcx.types.i32); for out_lane_idx in 0..lane_count / 2 { let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); - let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0); + let a_lane0 = fx.bcx.ins().sextend(types::I32, a_lane0); let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0); let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); - let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1); + let a_lane1 = fx.bcx.ins().sextend(types::I32, a_lane1); let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1); @@ -716,14 +716,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( assert_eq!(ret_lane_ty, fx.tcx.types.i16); assert_eq!(lane_count * 2, ret_lane_count); - let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); - let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64); + let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64); let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); for idx in 0..lane_count { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -733,7 +733,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -760,8 +760,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = a.value_lane(fx, idx).load_scalar(fx); - let sat = fx.bcx.ins().umax(lane, min_u16); - let sat = fx.bcx.ins().umin(sat, max_u16); + let sat = fx.bcx.ins().smax(lane, min_u16); + let sat = fx.bcx.ins().smin(sat, max_u16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -770,8 +770,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = b.value_lane(fx, idx).load_scalar(fx); - let sat = fx.bcx.ins().umax(lane, min_u16); - let sat = fx.bcx.ins().umin(sat, max_u16); + let sat = fx.bcx.ins().smax(lane, min_u16); + let sat = fx.bcx.ins().smin(sat, max_u16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -792,14 +792,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( assert_eq!(ret_lane_ty, fx.tcx.types.i16); assert_eq!(lane_count * 2, ret_lane_count); - let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); - let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64); + let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64); let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); for idx in 0..lane_count / 2 { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -809,7 +809,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -819,7 +819,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -829,7 +829,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); From da3782a0b4aa6ad31a2e59a17eb712f4ea190a91 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 25 Nov 2023 09:32:50 +0000 Subject: [PATCH 16/17] Rustup to rustc 1.76.0-nightly (37b2813a7 2023-11-24) --- rust-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain b/rust-toolchain index 80ef1e49f2319..2997816d96c7a 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2023-11-16" +channel = "nightly-2023-11-25" components = ["rust-src", "rustc-dev", "llvm-tools"] From 86fc533a7155179f490649571174bc1470edb97b Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 25 Nov 2023 09:40:02 +0000 Subject: [PATCH 17/17] Re-enable rustc test that was disabled due to a rustc bug --- scripts/test_rustc_tests.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/test_rustc_tests.sh b/scripts/test_rustc_tests.sh index cdc78adcf85e3..a299b6de6b1cd 100755 --- a/scripts/test_rustc_tests.sh +++ b/scripts/test_rustc_tests.sh @@ -146,11 +146,6 @@ rm tests/ui/process/nofile-limit.rs # TODO some AArch64 linking issue rm tests/ui/stdio-is-blocking.rs # really slow with unoptimized libstd -# rustc bugs -# ========== -# https://github.com/rust-lang/rust/pull/116447#issuecomment-1790451463 -rm tests/ui/coroutine/gen_block_*.rs - cp ../dist/bin/rustdoc-clif ../dist/bin/rustdoc # some tests expect bin/rustdoc to exist # prevent $(RUSTDOC) from picking up the sysroot built by x.py. It conflicts with the one used by