diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index ed40901ac9b8b..e0d0e580a2407 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -27,8 +27,8 @@ use rustc_span::{Symbol, sym}; pub(crate) use self::llvm::codegen_llvm_intrinsic_call; use crate::cast::clif_intcast; -use crate::codegen_f16_f128; use crate::prelude::*; +use crate::{codegen_f16_f128, common}; fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! { bug!("wrong number of args for intrinsic {}", intrinsic); @@ -655,6 +655,64 @@ fn codegen_regular_intrinsic_call<'tcx>( let res = fx.bcx.ins().rotr(x, y); ret.write_cvalue(fx, CValue::by_val(res, layout)); } + sym::funnel_shl => { + intrinsic_args!(fx, args => (x, y, z); intrinsic); + let layout = x.layout(); + + let width_bits = layout.size.bits() as i64; + + let lhs_bits = x.load_scalar(fx); + let rhs_bits = y.load_scalar(fx); + let raw_shift_bits = z.load_scalar(fx); + + let ty = fx.bcx.func.dfg.value_type(lhs_bits); + let zero = common::type_zero_value(fx.bcx, ty); + + let shift_bits = fx.bcx.ins().band_imm(raw_shift_bits, width_bits - 1); + + // lhs_bits << shift_bits + let shl = fx.bcx.ins().ishl(lhs_bits, shift_bits); + + let inv_shift_bits = fx.bcx.ins().irsub_imm(shift_bits, width_bits); + + // rhs_bits.unbounded_shr(inv_shift_bits) + let inv_shift_bits_mod = fx.bcx.ins().band_imm(inv_shift_bits, width_bits - 1); + let shr = fx.bcx.ins().ushr(rhs_bits, inv_shift_bits_mod); + let is_zero = fx.bcx.ins().icmp_imm(IntCC::Equal, shift_bits, 0); + let shr = fx.bcx.ins().select(is_zero, zero, shr); + + let res = fx.bcx.ins().bor(shr, shl); + ret.write_cvalue(fx, CValue::by_val(res, layout)); + } + sym::funnel_shr => { + intrinsic_args!(fx, args => (x, y, z); intrinsic); + let layout = x.layout(); + + let width_bits = layout.size.bits() as i64; + + let lhs_bits = x.load_scalar(fx); + let rhs_bits = y.load_scalar(fx); + let raw_shift_bits = z.load_scalar(fx); + + let ty = fx.bcx.func.dfg.value_type(lhs_bits); + let zero = common::type_zero_value(fx.bcx, ty); + + let shift_bits = fx.bcx.ins().band_imm(raw_shift_bits, width_bits - 1); + + // rhs_bits >> shift_bits + let shr = fx.bcx.ins().ushr(rhs_bits, shift_bits); + + let inv_shift_bits = fx.bcx.ins().irsub_imm(shift_bits, width_bits); + + // lhs_bits.unbounded_shl(inv_shift_bits) + let inv_shift_bits_mod = fx.bcx.ins().band_imm(inv_shift_bits, width_bits - 1); + let shl = fx.bcx.ins().ishl(lhs_bits, inv_shift_bits_mod); + let is_zero = fx.bcx.ins().icmp_imm(IntCC::Equal, shift_bits, 0); + let shl = fx.bcx.ins().select(is_zero, zero, shl); + + let res = fx.bcx.ins().bor(shr, shl); + ret.write_cvalue(fx, CValue::by_val(res, layout)); + } // The only difference between offset and arith_offset is regarding UB. Because Cranelift // doesn't have UB both are codegen'ed the same way diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs index eb0a5336a1f13..c3bcbadf71346 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -442,6 +442,8 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc | sym::bitreverse | sym::rotate_left | sym::rotate_right + | sym::funnel_shl + | sym::funnel_shr | sym::saturating_add | sym::saturating_sub => { match int_type_width_signed(args[0].layout.ty, self) { @@ -505,6 +507,53 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc self.rotate_right(val, raw_shift, width) } } + sym::funnel_shl => { + let lhs_bits = args[0].immediate(); + let rhs_bits = args[1].immediate(); + let raw_shift_bits = args[2].immediate(); + + let width_ty = raw_shift_bits.get_type(); + let width_bits = self.cx.gcc_uint(width_ty, width as u64); + let shift_bits = self.gcc_urem(raw_shift_bits, width_bits); + + // lhs_bits << shift_bits + let shl = self.gcc_shl(lhs_bits, shift_bits); + + // rhs_bits.bounded_shr(inv_shift_bits) + let inv_shift_bits = self.gcc_sub(width_bits, shift_bits); + let inv_shift_bits_mod = self.gcc_urem(inv_shift_bits, width_bits); + let shr = self.gcc_lshr(rhs_bits, inv_shift_bits_mod); + let zero = self.cx.gcc_uint(lhs_bits.get_type(), 0); + let is_zero = + self.gcc_icmp(IntPredicate::IntEQ, inv_shift_bits_mod, zero); + let shr = self.select(is_zero, zero, shr); + + self.or(shl, shr) + } + sym::funnel_shr => { + let lhs_bits = args[0].immediate(); + let rhs_bits = args[1].immediate(); + let raw_shift_bits = args[2].immediate(); + + let width_ty = raw_shift_bits.get_type(); + let width_bits = self.cx.gcc_uint(width_ty, width as u64); + let shift_bits = self.gcc_urem(raw_shift_bits, width_bits); + + // rhs_bits >> shift_bits + let shr = self.gcc_lshr(rhs_bits, shift_bits); + + let inv_shift_bits = self.gcc_sub(width_bits, shift_bits); + + // lhs_bits.bounded_shl(inv_shift_bits) + let inv_shift_bits_mod = self.gcc_urem(inv_shift_bits, width_bits); + let shl = self.gcc_shl(lhs_bits, inv_shift_bits_mod); + let zero = self.cx.gcc_uint(lhs_bits.get_type(), 0); + let is_zero = + self.gcc_icmp(IntPredicate::IntEQ, inv_shift_bits_mod, zero); + let shl = self.select(is_zero, zero, shl); + + self.or(shl, shr) + } sym::saturating_add => self.saturating_add( args[0].immediate(), args[1].immediate(), diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 49d3dedbeabdf..12f0c1f097358 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -383,7 +383,9 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { | sym::rotate_left | sym::rotate_right | sym::saturating_add - | sym::saturating_sub => { + | sym::saturating_sub + | sym::funnel_shl + | sym::funnel_shr => { let ty = args[0].layout.ty; if !ty.is_integral() { tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType { @@ -437,6 +439,19 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { self.call_intrinsic(llvm_name, &[llty], &[val, val, raw_shift]) } + sym::funnel_shl | sym::funnel_shr => { + let is_left = name == sym::funnel_shl; + let lhs = args[0].immediate(); + let rhs = args[1].immediate(); + let raw_shift = args[2].immediate(); + let llvm_name = format!("llvm.fsh{}", if is_left { 'l' } else { 'r' }); + + // llvm expects shift to be the same type as the values, but rust + // always uses `u32`. + let raw_shift = self.intcast(raw_shift, self.val_ty(lhs), false); + + self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs, raw_shift]) + } sym::saturating_add | sym::saturating_sub => { let is_add = name == sym::saturating_add; let lhs = args[0].immediate(); diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs index 5e3d0a15d8bc1..27a2bdb53c2bf 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -302,6 +302,33 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let result = Scalar::from_uint(truncated_bits, layout_val.size); self.write_scalar(result, dest)?; } + sym::funnel_shl | sym::funnel_shr => { + // funnel_shl: (A << (S % BW)) | (B >> ((BW - S) % BW)) + // funnel_shr: (A << ((BW - S) % BW)) | (B >> (S % BW)) + let layout_val = self.layout_of(instance_args.type_at(0))?; + + let lhs = self.read_scalar(&args[0])?; + let lhs_bits = lhs.to_bits(layout_val.size)?; // sign is ignored here + + let rhs = self.read_scalar(&args[1])?; + let rhs_bits = rhs.to_bits(layout_val.size)?; // sign is ignored here + + let raw_shift = self.read_scalar(&args[2])?; + let raw_shift_bits = raw_shift.to_u32()?; + + // The funnel shifts modulo by T::BITS to circumvent panics/UB. + let width_bits = u32::try_from(layout_val.size.bits()).unwrap(); + let shift_bits = raw_shift_bits % width_bits; + let inv_shift_bits = width_bits - shift_bits; + let result_bits = if intrinsic_name == sym::funnel_shl { + (lhs_bits << shift_bits) | rhs_bits.unbounded_shr(inv_shift_bits) + } else { + (rhs_bits >> shift_bits) | lhs_bits.unbounded_shl(inv_shift_bits) + }; + let truncated_bits = layout_val.size.truncate(result_bits); + let result = Scalar::from_uint(truncated_bits, layout_val.size); + self.write_scalar(result, dest)?; + } sym::copy => { self.copy_intrinsic(&args[0], &args[1], &args[2], /*nonoverlapping*/ false)?; } diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index cfc6bc2f3a0a9..458673b4bc6bf 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -85,6 +85,8 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi | sym::saturating_sub | sym::rotate_left | sym::rotate_right + | sym::funnel_shl + | sym::funnel_shr | sym::ctpop | sym::ctlz | sym::cttz @@ -449,6 +451,9 @@ pub(crate) fn check_intrinsic_type( } sym::unchecked_shl | sym::unchecked_shr => (2, 0, vec![param(0), param(1)], param(0)), sym::rotate_left | sym::rotate_right => (1, 0, vec![param(0), tcx.types.u32], param(0)), + sym::funnel_shl | sym::funnel_shr => { + (1, 0, vec![param(0), param(0), tcx.types.u32], param(0)) + } sym::unchecked_add | sym::unchecked_sub | sym::unchecked_mul => { (1, 0, vec![param(0), param(0)], param(0)) } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index dcb1becc957db..50c95e8084c0f 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1096,6 +1096,8 @@ symbols! { fsub_fast, full, fundamental, + funnel_shl, + funnel_shr, fused_iterator, future, future_drop_poll, diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index 904aa52c7845b..6bedb975314ff 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -2102,6 +2102,48 @@ pub const fn saturating_add(a: T, b: T) -> T; #[rustc_intrinsic] pub const fn saturating_sub(a: T, b: T) -> T; +/// Funnel Shift left. +/// +/// Concatenates `a` and `b` (with `a` in the most significant half), +/// creating an integer twice as wide. Then shift this inetegr left +/// by `shift` (taken modulo the bit size of `T`), and extract the +/// most significant half. If `a` and `b` are the same, this is equivalent +/// to a rotate left operation. +/// +/// Note that, unlike most intrinsics, this is safe to call; +/// it does not require an `unsafe` block. +/// Therefore, implementations must not require the user to uphold +/// any safety invariants. +/// +/// Safer versions of this intrinsic are available on the integer primitives +/// via the `funnel_shl` method. For example, [`u32::funnel_shl`]. +#[rustc_intrinsic] +#[rustc_nounwind] +#[rustc_const_unstable(feature = "funnel_shifts", issue = "145686")] +#[unstable(feature = "funnel_shifts", issue = "145686")] +pub const fn funnel_shl(a: T, b: T, shift: u32) -> T; + +/// Funnel Shift right. +/// +/// Concatenates `a` and `b` (with `a` in the most significant half), +/// creating an integer twice as wide. Then shift this integer right +/// by `shift` (taken modulo the bit size of `T`), and extract the +/// least significant half. If `a` and `b` are the same, this is equivalent +/// to a rotate right operation. +/// +/// Note that, unlike most intrinsics, this is safe to call; +/// it does not require an `unsafe` block. +/// Therefore, implementations must not require the user to uphold +/// any safety invariants. +/// +/// Safer versions of this intrinsic are available on the integer primitives +/// via the `funnel_shr` method. For example, [`u32::funnel_shr`] +#[rustc_intrinsic] +#[rustc_nounwind] +#[rustc_const_unstable(feature = "funnel_shifts", issue = "145686")] +#[unstable(feature = "funnel_shifts", issue = "145686")] +pub const fn funnel_shr(a: T, b: T, shift: u32) -> T; + /// This is an implementation detail of [`crate::ptr::read`] and should /// not be used anywhere else. See its comments for why this exists. /// diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 71abd707374cf..fb6cacb0e0076 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -156,6 +156,7 @@ #![feature(f128)] #![feature(freeze_impls)] #![feature(fundamental)] +#![feature(funnel_shifts)] #![feature(if_let_guard)] #![feature(intra_doc_pointers)] #![feature(intrinsics)] diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index acfe38b7a37b5..e2987b0b7e59d 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -454,6 +454,9 @@ impl u8 { rot = 2, rot_op = "0x82", rot_result = "0xa", + fsh_op = "0x36", + fshl_result = "0x8", + fshr_result = "0x8d", swap_op = "0x12", swapped = "0x12", reversed = "0x48", @@ -1088,6 +1091,9 @@ impl u16 { rot = 4, rot_op = "0xa003", rot_result = "0x3a", + fsh_op = "0x2de", + fshl_result = "0x30", + fshr_result = "0x302d", swap_op = "0x1234", swapped = "0x3412", reversed = "0x2c48", @@ -1135,6 +1141,9 @@ impl u32 { rot = 8, rot_op = "0x10000b3", rot_result = "0xb301", + fsh_op = "0x2fe78e45", + fshl_result = "0xb32f", + fshr_result = "0xb32fe78e", swap_op = "0x12345678", swapped = "0x78563412", reversed = "0x1e6a2c48", @@ -1158,6 +1167,9 @@ impl u64 { rot = 12, rot_op = "0xaa00000000006e1", rot_result = "0x6e10aa", + fsh_op = "0x2fe78e45983acd98", + fshl_result = "0x6e12fe", + fshr_result = "0x6e12fe78e45983ac", swap_op = "0x1234567890123456", swapped = "0x5634129078563412", reversed = "0x6a2c48091e6a2c48", @@ -1181,6 +1193,9 @@ impl u128 { rot = 16, rot_op = "0x13f40000000000000000000000004f76", rot_result = "0x4f7613f4", + fsh_op = "0x2fe78e45983acd98039000008736273", + fshl_result = "0x4f7602fe", + fshr_result = "0x4f7602fe78e45983acd9803900000873", swap_op = "0x12345678901234567890123456789012", swapped = "0x12907856341290785634129078563412", reversed = "0x48091e6a2c48091e6a2c48091e6a2c48", @@ -1207,6 +1222,9 @@ impl usize { rot = 4, rot_op = "0xa003", rot_result = "0x3a", + fsh_op = "0x2fe78e45983acd98039000008736273", + fshl_result = "0x4f7602fe", + fshr_result = "0x4f7602fe78e45983acd9803900000873", swap_op = "0x1234", swapped = "0x3412", reversed = "0x2c48", @@ -1231,6 +1249,9 @@ impl usize { rot = 8, rot_op = "0x10000b3", rot_result = "0xb301", + fsh_op = "0x2fe78e45", + fshl_result = "0xb32f", + fshr_result = "0xb32fe78e", swap_op = "0x12345678", swapped = "0x78563412", reversed = "0x1e6a2c48", @@ -1255,6 +1276,9 @@ impl usize { rot = 12, rot_op = "0xaa00000000006e1", rot_result = "0x6e10aa", + fsh_op = "0x2fe78e45983acd98", + fshl_result = "0x6e12fe", + fshr_result = "0x6e12fe78e45983ac", swap_op = "0x1234567890123456", swapped = "0x5634129078563412", reversed = "0x6a2c48091e6a2c48", diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index 10d9498d15e45..ed1ed944f10e6 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -14,6 +14,9 @@ macro_rules! uint_impl { rot = $rot:literal, rot_op = $rot_op:literal, rot_result = $rot_result:literal, + fsh_op = $fsh_op:literal, + fshl_result = $fshl_result:literal, + fshr_result = $fshr_result:literal, swap_op = $swap_op:literal, swapped = $swapped:literal, reversed = $reversed:literal, @@ -375,6 +378,64 @@ macro_rules! uint_impl { return intrinsics::rotate_right(self, n); } + /// Performs a left funnel shift (concatenates `self` with `rhs`, with `self` + /// making up the most significant half, then shifts the combined value left + /// by `n`, and most significant half is extracted to produce the result). + /// + /// Please note this isn't the same operation as the `<<` shifting operator or + /// [`rotate_left`](Self::rotate_left), although `a.funnel_shl(a, n)` is *equivalent* + /// to `a.rotate_left(n)`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(funnel_shifts)] + #[doc = concat!("let a = ", $rot_op, stringify!($SelfT), ";")] + #[doc = concat!("let b = ", $fsh_op, stringify!($SelfT), ";")] + #[doc = concat!("let m = ", $fshl_result, ";")] + /// + #[doc = concat!("assert_eq!(a.funnel_shl(b, ", $rot, "), m);")] + /// ``` + #[rustc_const_unstable(feature = "funnel_shifts", issue = "145686")] + #[unstable(feature = "funnel_shifts", issue = "145686")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline(always)] + pub const fn funnel_shl(self, rhs: Self, n: u32) -> Self { + return intrinsics::funnel_shl(self, rhs, n); + } + + /// Performs a right funnel shift (concatenates `self` and `rhs`, with `self` + /// making up the most significant half, then shifts the combined value right + /// by `n`, and least significant half is extracted to produce the result). + /// + /// Please note this isn't the same operation as the `>>` shifting operator or + /// [`rotate_right`](Self::rotate_right), although `a.funnel_shr(a, n)` is *equivalent* + /// to `a.rotate_right(n)`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(funnel_shifts)] + #[doc = concat!("let a = ", $rot_op, stringify!($SelfT), ";")] + #[doc = concat!("let b = ", $fsh_op, stringify!($SelfT), ";")] + #[doc = concat!("let m = ", $fshr_result, ";")] + /// + #[doc = concat!("assert_eq!(a.funnel_shr(b, ", $rot, "), m);")] + /// ``` + #[rustc_const_unstable(feature = "funnel_shifts", issue = "145686")] + #[unstable(feature = "funnel_shifts", issue = "145686")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline(always)] + pub const fn funnel_shr(self, rhs: Self, n: u32) -> Self { + return intrinsics::funnel_shr(self, rhs, n); + } + /// Reverses the byte order of the integer. /// /// # Examples diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs index d2281b1df2ffc..c16c344776dd0 100644 --- a/library/coretests/tests/lib.rs +++ b/library/coretests/tests/lib.rs @@ -50,6 +50,7 @@ #![feature(fmt_internals)] #![feature(formatting_options)] #![feature(freeze)] +#![feature(funnel_shifts)] #![feature(future_join)] #![feature(generic_assert_internals)] #![feature(hasher_prefixfree_extras)] diff --git a/library/coretests/tests/num/uint_macros.rs b/library/coretests/tests/num/uint_macros.rs index c7d10ea4d880a..57d6cd080c99f 100644 --- a/library/coretests/tests/num/uint_macros.rs +++ b/library/coretests/tests/num/uint_macros.rs @@ -104,6 +104,23 @@ macro_rules! uint_module { assert_eq_const_safe!($T: C.rotate_left(128), C); } + fn test_funnel_shift() { + // Shifting by 0 should have no effect + assert_eq_const_safe!($T: <$T>::funnel_shl(A, B, 0), A); + assert_eq_const_safe!($T: <$T>::funnel_shr(A, B, 0), B); + + // Shifting by a multiple of `T::BITS` should also have no effect + assert_eq_const_safe!($T: <$T>::funnel_shl(A, B, $T::BITS), A); + assert_eq_const_safe!($T: <$T>::funnel_shr(A, B, $T::BITS), B); + + assert_eq_const_safe!($T: <$T>::funnel_shl(_0, _1, 4), 0b1111); + assert_eq_const_safe!($T: <$T>::funnel_shr(_0, _1, 4), _1 >> 4); + + // The shift amount is taken modulo `T::BITS`. + assert_eq_const_safe!($T: <$T>::funnel_shl(_0, _1, $T::BITS + 4), 0b1111); + assert_eq_const_safe!($T: <$T>::funnel_shr(_0, _1, $T::BITS + 4), _1 >> 4); + } + fn test_swap_bytes() { assert_eq_const_safe!($T: A.swap_bytes().swap_bytes(), A); assert_eq_const_safe!($T: B.swap_bytes().swap_bytes(), B); diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index ab417b6c72f9b..75018424c4e92 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -296,6 +296,7 @@ #![feature(f128)] #![feature(ffi_const)] #![feature(formatting_options)] +#![feature(funnel_shifts)] #![feature(hash_map_internals)] #![feature(hash_map_macro)] #![feature(if_let_guard)]