Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,66 @@ fn codegen_regular_intrinsic_call<'tcx>(
let res = fx.bcx.ins().rotr(x, y);
ret.write_cvalue(fx, CValue::by_val(res, layout));
}
sym::funnel_shl => {
intrinsic_args!(fx, args => (x, y, z); intrinsic);
let layout = x.layout();

let width_bits = layout.size.bits() as u64;
let width_bits = fx.bcx.ins().iconst(types::I32, width_bits as i64);

let lhs_bits = x.load_scalar(fx);
let rhs_bits = y.load_scalar(fx);
let raw_shift_bits = z.load_scalar(fx);

let ty = fx.bcx.func.dfg.value_type(lhs_bits);
let zero = fx.bcx.ins().iconst(ty, 0);

let shift_bits = fx.bcx.ins().urem(raw_shift_bits, width_bits);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use urem_imm to avoid the width_bits = iconst(...). Also maybe prefer band_imm with width_bits - 1 as mask?


// lhs_bits << shift_bits
let shl = fx.bcx.ins().ishl(lhs_bits, shift_bits);

let inv_shift_bits = fx.bcx.ins().isub(width_bits, shift_bits);

// rhs_bits.bounded_shr(inv_shift_bits)
let inv_shift_bits_mod = fx.bcx.ins().urem(inv_shift_bits, width_bits);
let shr = fx.bcx.ins().ushr(rhs_bits, inv_shift_bits_mod);
let is_zero = fx.bcx.ins().icmp(IntCC::Equal, inv_shift_bits_mod, zero);
let shr = fx.bcx.ins().select(is_zero, zero, shr);

let res = fx.bcx.ins().bor(shr, shl);
ret.write_cvalue(fx, CValue::by_val(res, layout));
}
sym::funnel_shr => {
intrinsic_args!(fx, args => (x, y, z); intrinsic);
let layout = x.layout();

let width_bits = layout.size.bits() as u64;
let width_bits = fx.bcx.ins().iconst(types::I32, width_bits as i64);

let lhs_bits = x.load_scalar(fx);
let rhs_bits = y.load_scalar(fx);
let raw_shift_bits = z.load_scalar(fx);

let ty = fx.bcx.func.dfg.value_type(lhs_bits);
let zero = fx.bcx.ins().iconst(ty, 0);

let shift_bits = fx.bcx.ins().urem(raw_shift_bits, width_bits);

// rhs_bits >> shift_bits
let shr = fx.bcx.ins().ushr(rhs_bits, shift_bits);

let inv_shift_bits = fx.bcx.ins().isub(width_bits, shift_bits);

// lhs_bits.bounded_shl(inv_shift_bits)
let inv_shift_bits_mod = fx.bcx.ins().urem(inv_shift_bits, width_bits);
let shl = fx.bcx.ins().ishl(lhs_bits, inv_shift_bits_mod);
let is_zero = fx.bcx.ins().icmp(IntCC::Equal, inv_shift_bits_mod, zero);
let shl = fx.bcx.ins().select(is_zero, zero, shl);

let res = fx.bcx.ins().bor(shr, shl);
ret.write_cvalue(fx, CValue::by_val(res, layout));
}

// The only difference between offset and arith_offset is regarding UB. Because Cranelift
// doesn't have UB both are codegen'ed the same way
Expand Down
49 changes: 49 additions & 0 deletions compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
| sym::bitreverse
| sym::rotate_left
| sym::rotate_right
| sym::funnel_shl
| sym::funnel_shr
| sym::saturating_add
| sym::saturating_sub => {
match int_type_width_signed(args[0].layout.ty, self) {
Expand Down Expand Up @@ -505,6 +507,53 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
self.rotate_right(val, raw_shift, width)
}
}
sym::funnel_shl => {
let lhs_bits = args[0].immediate();
let rhs_bits = args[1].immediate();
let raw_shift_bits = args[2].immediate();

let width_ty = raw_shift_bits.get_type();
let width_bits = self.cx.gcc_uint(width_ty, width as u64);
let shift_bits = self.gcc_urem(raw_shift_bits, width_bits);

// lhs_bits << shift_bits
let shl = self.gcc_shl(lhs_bits, shift_bits);

// rhs_bits.bounded_shr(inv_shift_bits)
let inv_shift_bits = self.gcc_sub(width_bits, shift_bits);
let inv_shift_bits_mod = self.gcc_urem(inv_shift_bits, width_bits);
let shr = self.gcc_lshr(rhs_bits, inv_shift_bits_mod);
let zero = self.cx.gcc_uint(lhs_bits.get_type(), 0);
let is_zero =
self.gcc_icmp(IntPredicate::IntEQ, inv_shift_bits_mod, zero);
let shr = self.select(is_zero, zero, shr);

self.or(shl, shr)
}
sym::funnel_shr => {
let lhs_bits = args[0].immediate();
let rhs_bits = args[1].immediate();
let raw_shift_bits = args[2].immediate();

let width_ty = raw_shift_bits.get_type();
let width_bits = self.cx.gcc_uint(width_ty, width as u64);
let shift_bits = self.gcc_urem(raw_shift_bits, width_bits);

// rhs_bits >> shift_bits
let shr = self.gcc_lshr(rhs_bits, shift_bits);

let inv_shift_bits = self.gcc_sub(width_bits, shift_bits);

// lhs_bits.bounded_shl(inv_shift_bits)
let inv_shift_bits_mod = self.gcc_urem(inv_shift_bits, width_bits);
let shl = self.gcc_shl(lhs_bits, inv_shift_bits_mod);
let zero = self.cx.gcc_uint(lhs_bits.get_type(), 0);
let is_zero =
self.gcc_icmp(IntPredicate::IntEQ, inv_shift_bits_mod, zero);
let shl = self.select(is_zero, zero, shl);

self.or(shl, shr)
}
sym::saturating_add => self.saturating_add(
args[0].immediate(),
args[1].immediate(),
Expand Down
17 changes: 16 additions & 1 deletion compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,9 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
| sym::rotate_left
| sym::rotate_right
| sym::saturating_add
| sym::saturating_sub => {
| sym::saturating_sub
| sym::funnel_shl
| sym::funnel_shr => {
let ty = args[0].layout.ty;
if !ty.is_integral() {
tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
Expand Down Expand Up @@ -437,6 +439,19 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {

self.call_intrinsic(llvm_name, &[llty], &[val, val, raw_shift])
}
sym::funnel_shl | sym::funnel_shr => {
let is_left = name == sym::funnel_shl;
let lhs = args[0].immediate();
let rhs = args[1].immediate();
let raw_shift = args[2].immediate();
let llvm_name = format!("llvm.fsh{}", if is_left { 'l' } else { 'r' });

// llvm expects shift to be the same type as the values, but rust
// always uses `u32`.
let raw_shift = self.intcast(raw_shift, self.val_ty(lhs), false);

self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs, raw_shift])
}
sym::saturating_add | sym::saturating_sub => {
let is_add = name == sym::saturating_add;
let lhs = args[0].immediate();
Expand Down
27 changes: 27 additions & 0 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,33 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let result = Scalar::from_uint(truncated_bits, layout_val.size);
self.write_scalar(result, dest)?;
}
sym::funnel_shl | sym::funnel_shr => {
// funnel_shl: (A << (S % BW)) | (B >> ((BW - S) % BW))
// funnel_shr: (A << ((BW - S) % BW)) | (B >> (S % BW))
let layout_val = self.layout_of(instance_args.type_at(0))?;

let lhs = self.read_scalar(&args[0])?;
let lhs_bits = lhs.to_bits(layout_val.size)?; // sign is ignored here

let rhs = self.read_scalar(&args[1])?;
let rhs_bits = rhs.to_bits(layout_val.size)?; // sign is ignored here

let raw_shift = self.read_scalar(&args[2])?;
let raw_shift_bits = raw_shift.to_u32()?;

// The funnel shifts modulo by T::BITS to circumvent panics/UB.
let width_bits = u32::try_from(layout_val.size.bits()).unwrap();
let shift_bits = raw_shift_bits % width_bits;
let inv_shift_bits = width_bits - shift_bits;
let result_bits = if intrinsic_name == sym::funnel_shl {
(lhs_bits << shift_bits) | rhs_bits.unbounded_shr(inv_shift_bits)
} else {
(rhs_bits >> shift_bits) | lhs_bits.unbounded_shl(inv_shift_bits)
};
let truncated_bits = layout_val.size.truncate(result_bits);
let result = Scalar::from_uint(truncated_bits, layout_val.size);
self.write_scalar(result, dest)?;
}
sym::copy => {
self.copy_intrinsic(&args[0], &args[1], &args[2], /*nonoverlapping*/ false)?;
}
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi
| sym::saturating_sub
| sym::rotate_left
| sym::rotate_right
| sym::funnel_shl
| sym::funnel_shr
| sym::ctpop
| sym::ctlz
| sym::cttz
Expand Down Expand Up @@ -449,6 +451,9 @@ pub(crate) fn check_intrinsic_type(
}
sym::unchecked_shl | sym::unchecked_shr => (2, 0, vec![param(0), param(1)], param(0)),
sym::rotate_left | sym::rotate_right => (1, 0, vec![param(0), tcx.types.u32], param(0)),
sym::funnel_shl | sym::funnel_shr => {
(1, 0, vec![param(0), param(0), tcx.types.u32], param(0))
}
sym::unchecked_add | sym::unchecked_sub | sym::unchecked_mul => {
(1, 0, vec![param(0), param(0)], param(0))
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,8 @@ symbols! {
fsub_fast,
full,
fundamental,
funnel_shl,
funnel_shr,
fused_iterator,
future,
future_drop_poll,
Expand Down
42 changes: 42 additions & 0 deletions library/core/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,48 @@ pub const fn saturating_add<T: Copy>(a: T, b: T) -> T;
#[rustc_intrinsic]
pub const fn saturating_sub<T: Copy>(a: T, b: T) -> T;

/// Funnel Shift left.
///
/// Concatenates `a` and `b` (with `a` in the most significant half),
/// creating an integer twice as wide. Then shift this inetegr left
/// by `shift` (taken modulo the bit size of `T`), and extract the
/// most significant half. If `a` and `b` are the same, this is equivalent
/// to a rotate left operation.
///
/// Note that, unlike most intrinsics, this is safe to call;
/// it does not require an `unsafe` block.
/// Therefore, implementations must not require the user to uphold
/// any safety invariants.
///
/// Safer versions of this intrinsic are available on the integer primitives
/// via the `funnel_shl` method. For example, [`u32::funnel_shl`].
#[rustc_intrinsic]
#[rustc_nounwind]
#[rustc_const_unstable(feature = "funnel_shifts", issue = "145686")]
#[unstable(feature = "funnel_shifts", issue = "145686")]
pub const fn funnel_shl<T: Copy>(a: T, b: T, shift: u32) -> T;

/// Funnel Shift right.
///
/// Concatenates `a` and `b` (with `a` in the most significant half),
/// creating an integer twice as wide. Then shift this integer right
/// by `shift` (taken modulo the bit size of `T`), and extract the
/// least significant half. If `a` and `b` are the same, this is equivalent
/// to a rotate right operation.
///
/// Note that, unlike most intrinsics, this is safe to call;
/// it does not require an `unsafe` block.
/// Therefore, implementations must not require the user to uphold
/// any safety invariants.
Comment on lines +2134 to +2137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update the comment/signature of the simd versions of this intrinsic to also reflect that it is safe to call?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to do that? There are other backends where making such guarantees might require more effort. For scalars, it's quite easy, so we can make the guarantee

///
/// Safer versions of this intrinsic are available on the integer primitives
/// via the `funnel_shr` method. For example, [`u32::funnel_shr`]
#[rustc_intrinsic]
#[rustc_nounwind]
#[rustc_const_unstable(feature = "funnel_shifts", issue = "145686")]
#[unstable(feature = "funnel_shifts", issue = "145686")]
pub const fn funnel_shr<T: Copy>(a: T, b: T, shift: u32) -> T;

/// This is an implementation detail of [`crate::ptr::read`] and should
/// not be used anywhere else. See its comments for why this exists.
///
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@
#![feature(f128)]
#![feature(freeze_impls)]
#![feature(fundamental)]
#![feature(funnel_shifts)]
#![feature(if_let_guard)]
#![feature(intra_doc_pointers)]
#![feature(intrinsics)]
Expand Down
24 changes: 24 additions & 0 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@ impl u8 {
rot = 2,
rot_op = "0x82",
rot_result = "0xa",
fsh_op = "0x36",
fshl_result = "0x8",
fshr_result = "0x8d",
swap_op = "0x12",
swapped = "0x12",
reversed = "0x48",
Expand Down Expand Up @@ -1088,6 +1091,9 @@ impl u16 {
rot = 4,
rot_op = "0xa003",
rot_result = "0x3a",
fsh_op = "0x2de",
fshl_result = "0x30",
fshr_result = "0x302d",
swap_op = "0x1234",
swapped = "0x3412",
reversed = "0x2c48",
Expand Down Expand Up @@ -1135,6 +1141,9 @@ impl u32 {
rot = 8,
rot_op = "0x10000b3",
rot_result = "0xb301",
fsh_op = "0x2fe78e45",
fshl_result = "0xb32f",
fshr_result = "0xb32fe78e",
swap_op = "0x12345678",
swapped = "0x78563412",
reversed = "0x1e6a2c48",
Expand All @@ -1158,6 +1167,9 @@ impl u64 {
rot = 12,
rot_op = "0xaa00000000006e1",
rot_result = "0x6e10aa",
fsh_op = "0x2fe78e45983acd98",
fshl_result = "0x6e12fe",
fshr_result = "0x6e12fe78e45983ac",
swap_op = "0x1234567890123456",
swapped = "0x5634129078563412",
reversed = "0x6a2c48091e6a2c48",
Expand All @@ -1181,6 +1193,9 @@ impl u128 {
rot = 16,
rot_op = "0x13f40000000000000000000000004f76",
rot_result = "0x4f7613f4",
fsh_op = "0x2fe78e45983acd98039000008736273",
fshl_result = "0x4f7602fe",
fshr_result = "0x4f7602fe78e45983acd9803900000873",
swap_op = "0x12345678901234567890123456789012",
swapped = "0x12907856341290785634129078563412",
reversed = "0x48091e6a2c48091e6a2c48091e6a2c48",
Expand All @@ -1207,6 +1222,9 @@ impl usize {
rot = 4,
rot_op = "0xa003",
rot_result = "0x3a",
fsh_op = "0x2fe78e45983acd98039000008736273",
fshl_result = "0x4f7602fe",
fshr_result = "0x4f7602fe78e45983acd9803900000873",
swap_op = "0x1234",
swapped = "0x3412",
reversed = "0x2c48",
Expand All @@ -1231,6 +1249,9 @@ impl usize {
rot = 8,
rot_op = "0x10000b3",
rot_result = "0xb301",
fsh_op = "0x2fe78e45",
fshl_result = "0xb32f",
fshr_result = "0xb32fe78e",
swap_op = "0x12345678",
swapped = "0x78563412",
reversed = "0x1e6a2c48",
Expand All @@ -1255,6 +1276,9 @@ impl usize {
rot = 12,
rot_op = "0xaa00000000006e1",
rot_result = "0x6e10aa",
fsh_op = "0x2fe78e45983acd98",
fshl_result = "0x6e12fe",
fshr_result = "0x6e12fe78e45983ac",
swap_op = "0x1234567890123456",
swapped = "0x5634129078563412",
reversed = "0x6a2c48091e6a2c48",
Expand Down
Loading
Loading