Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions library/core/src/hint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -823,3 +823,152 @@ where
crate::intrinsics::select_unpredictable(condition, true_val, false_val).assume_init()
}
}

/// The expected temporal locality of a memory prefetch operation.
///
/// Locality expresses how likely the prefetched data is to be reused soon,
/// and therefore which level of cache it should be brought into.
///
/// The locality is just a hint, and may be ignored on some targets or by the hardware.
///
/// Used with functions like [`prefetch_read`] and [`prefetch_write`].
///
/// [`prefetch_read`]: crate::hint::prefetch_read
/// [`prefetch_write`]: crate::hint::prefetch_write
#[unstable(feature = "hint_prefetch", issue = "146941")]
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Locality {
/// Data is expected to be reused eventually.
///
/// Typically prefetches into L3 cache (if the CPU supports it).
L3,
/// Data is expected to be reused in the near future.
///
/// Typically prefetches into L2 cache.
L2,
/// Data is expected to be reused very soon.
///
/// Typically prefetches into L1 cache.
L1,
}

impl Locality {
/// Convert to the constant that LLVM associates with a locality.
const fn to_llvm(self) -> i32 {
match self {
Self::L3 => 1,
Self::L2 => 2,
Self::L1 => 3,
}
}
}

/// Prefetch the cache line containing `ptr` for a future read.
///
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
/// soon after, but may also increase bandwidth usage or evict other cache lines.
///
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
///
/// Passing a dangling or invalid pointer is permitted: the memory will not
/// actually be dereferenced, and no faults are raised.
///
/// # Examples
///
/// ```
/// #![feature(hint_prefetch)]
/// use std::hint::{Locality, prefetch_read};
/// use std::mem::size_of_val;
///
/// // Prefetch all of `slice` into the L1 cache.
/// fn prefetch_slice<T>(slice: &[T]) {
/// // On most systems the cache line size is 64 bytes.
/// for offset in (0..size_of_val(slice)).step_by(64) {
/// prefetch_read(slice.as_ptr().wrapping_add(offset), Locality::L1);
/// }
/// }
/// ```
#[inline(always)]
#[unstable(feature = "hint_prefetch", issue = "146941")]
pub const fn prefetch_read<T>(ptr: *const T, locality: Locality) {
match locality {
Locality::L3 => intrinsics::prefetch_read_data::<T, { Locality::L3.to_llvm() }>(ptr),
Locality::L2 => intrinsics::prefetch_read_data::<T, { Locality::L2.to_llvm() }>(ptr),
Locality::L1 => intrinsics::prefetch_read_data::<T, { Locality::L1.to_llvm() }>(ptr),
}
}

/// Prefetch the cache line containing `ptr` for a single future read, but attempt to avoid
/// polluting the cache.
///
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
/// soon after, but may also increase bandwidth usage or evict other cache lines.
///
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
///
/// Passing a dangling or invalid pointer is permitted: the memory will not
/// actually be dereferenced, and no faults are raised.
#[inline(always)]
#[unstable(feature = "hint_prefetch", issue = "146941")]
pub const fn prefetch_read_non_temporal<T>(ptr: *const T, locality: Locality) {
// The LLVM intrinsic does not currently support specifying the locality.
let _ = locality;
intrinsics::prefetch_read_data::<T, 0>(ptr)
}

/// Prefetch the cache line containing `ptr` for a future write.
///
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
/// soon after, but may also increase bandwidth usage or evict other cache lines.
///
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
///
/// Passing a dangling or invalid pointer is permitted: the memory will not
/// actually be dereferenced, and no faults are raised.
#[inline(always)]
#[unstable(feature = "hint_prefetch", issue = "146941")]
pub const fn prefetch_write<T>(ptr: *mut T, locality: Locality) {
match locality {
Locality::L3 => intrinsics::prefetch_write_data::<T, { Locality::L3.to_llvm() }>(ptr),
Locality::L2 => intrinsics::prefetch_write_data::<T, { Locality::L2.to_llvm() }>(ptr),
Locality::L1 => intrinsics::prefetch_write_data::<T, { Locality::L1.to_llvm() }>(ptr),
}
}

/// Prefetch the cache line containing `ptr` for a single future write, but attempt to avoid
/// polluting the cache.
///
/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
/// soon after, but may also increase bandwidth usage or evict other cache lines.
///
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
///
/// Passing a dangling or invalid pointer is permitted: the memory will not
/// actually be dereferenced, and no faults are raised.
#[inline(always)]
#[unstable(feature = "hint_prefetch", issue = "146941")]
pub const fn prefetch_write_non_temporal<T>(ptr: *const T, locality: Locality) {
// The LLVM intrinsic does not currently support specifying the locality.
let _ = locality;
intrinsics::prefetch_write_data::<T, 0>(ptr)
}

/// Prefetch the cache line containing `ptr` into the instruction cache for a future read.
///
/// A strategically placed prefetch can reduce cache miss latency if the instructions are
/// accessed soon after, but may also increase bandwidth usage or evict other cache lines.
///
/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
///
/// Passing a dangling or invalid pointer is permitted: the memory will not
/// actually be dereferenced, and no faults are raised.
#[inline(always)]
#[unstable(feature = "hint_prefetch", issue = "146941")]
pub const fn prefetch_read_instruction<T>(ptr: *const T, locality: Locality) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be ptr: unsafe fn() or something since some platforms have different data and instruction pointer sizes?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On some platforms a function pointer doesn't point directly to the instruction bytes, but rather to a function descriptor, which consists of a pointer to the first instruction and some value that needs to be loaded into a register. On these platforms using unsafe fn() would be incorrect. Itanium is an example, but I know there are more architectures that do this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, but that doesn't mean *const T is correct.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ultimately all you need is an address, so *const T seemed the simplest way of achieving that.

Copy link
Member

@programmerjake programmerjake Sep 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but *const T may be too small e.g. on 16-bit x86 in the medium model a data pointer is 16 bits but an instruction pointer is 32 bits.

there are some AVR cpus (not currently supported by rust?) which need >16 bits for instruction addresses but not for data, so they might have the same issue https://en.wikipedia.org/wiki/Atmel_AVR_instruction_set#:~:text=Rare)%20models%20with,zero%2Dextended%20Z.)

Copy link
Contributor Author

@folkertdev folkertdev Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that ACP actually use the LLVM address spaces? It's not really clear from the design. Also it looks like it was never actually nominated for T-lang?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLVM address space usage is dictated by the target, that ACP doesn't use non-default address-spaces because for all existing targets a NonNull<Code> is sufficient for function addresses (AVR just uses 16-bit pointers for both code and data and AFAIK LLVM doesn't currently support >16-bit pointers), however the plan is to add a type BikeshedFnAddr and switch to using that whenever we add a target where that's insufficient.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AVR does use ptr addrspace(1) for function pointers: https://rust.godbolt.org/z/3hGPfKvfG

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@programmerjake do you see that ACP moving forward? Maybe I should remove the instruction prefetching for now here and add it when there is progress?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you need just Code, you can probably get away with just adding that extern type for now under the tracking issue I just created #148768 for that ACP and let whoever implements the rest of that ACP just use Code. You can add them all now and wait on that tracking issue for stabilization. If it takes too long, this feature can be partially stabilized and leave the code prefetch stabilization for later.

match locality {
Locality::L3 => intrinsics::prefetch_read_instruction::<T, { Locality::L3.to_llvm() }>(ptr),
Locality::L2 => intrinsics::prefetch_read_instruction::<T, { Locality::L2.to_llvm() }>(ptr),
Locality::L1 => intrinsics::prefetch_read_instruction::<T, { Locality::L1.to_llvm() }>(ptr),
}
}
Loading