From e6ce1af5101bd4a4bf0db80dcd6f7648ee210cf5 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee@gmail.com>
Date: Wed, 9 Feb 2022 17:20:39 -0800
Subject: [PATCH 1/2] Explain unsafe contracts of core::simd

This permeate the module with remarks on safety for pub methods,
layout of the Simd type, correct use of intrinsics, et cetera.
This is mostly to help others curious about how core::simd works,
including other Rust contributors, `unsafe` library authors,
and eventually ourselves.
---
 crates/core_simd/src/intrinsics.rs | 57 ++++++++++++++++++++++++------
 crates/core_simd/src/lib.rs        |  1 +
 crates/core_simd/src/round.rs      |  5 +++
 crates/core_simd/src/select.rs     |  4 +++
 crates/core_simd/src/vector.rs     | 34 ++++++++++++++++++
 5 files changed, 90 insertions(+), 11 deletions(-)
diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index b5d0df7548f..314959c47ba 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -2,16 +2,29 @@
 //! crate.
 //!
 //! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
+//!
+//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
+//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
+//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
+//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
+//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
+//!   this means that division by poison or undef is like division by zero, which means it inflicts...
+//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
+//!   in particular, LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception.
+//!
+//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
+//!
+//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
 
 /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-/// simply lowered to the matching LLVM instructions by the compiler.  The associated instruction
-/// is documented alongside each intrinsic.
+/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
+/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
 extern "platform-intrinsic" {
     /// add/fadd
     pub(crate) fn simd_add<T>(x: T, y: T) -> T;
 
     /// sub/fsub
-    pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
 
     /// mul/fmul
     pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
@@ -20,19 +33,22 @@ extern "platform-intrinsic" {
     /// ints and uints: {s,u}div incur UB if division by zero occurs.
     /// ints: sdiv is UB for int::MIN / -1.
     /// floats: fdiv is never UB, but may create NaNs or infinities.
-    pub(crate) fn simd_div<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
 
     /// urem/srem/frem
     /// ints and uints: {s,u}rem incur UB if division by zero occurs.
     /// ints: srem is UB for int::MIN / -1.
     /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
-    pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
 
     /// shl
-    pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
+    /// for (u)ints. poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
 
-    /// lshr/ashr
-    pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
+    /// ints: ashr
+    /// uints: lshr
+    /// poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
 
     /// and
     pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@@ -44,6 +60,9 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
 
     /// fptoui/fptosi/uitofp/sitofp
+    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
+    /// but the truncated value must fit in the target type or the result is poison.
+    /// use `simd_as` instead for a cast that performs a saturating conversion.
     pub(crate) fn simd_cast<T, U>(x: T) -> U;
     /// follows Rust's `T as U` semantics, including saturating float casts
     /// which amounts to the same as `simd_cast` for many cases
@@ -63,6 +82,7 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
     pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
 
+    // these return Simd<int, N> with the same BITS size as the inputs
     pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -71,19 +91,31 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
 
     // shufflevector
+    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
     pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
 
+    /// llvm.masked.gather
+    /// like a loop of pointer reads
+    /// val: vector of values to select if a lane is masked
+    /// ptr: vector of pointers to read from
+    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
+    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
+    /// FIXME: consider reconciling this somehow if/when we fix up our mask story in general?
     pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    /// llvm.masked.scatter
+    /// like gather, but more spicy, as it writes instead of reads
     pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
 
     // {s,u}add.sat
     pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
 
     // {s,u}sub.sat
-    pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
 
     // reductions
+    // llvm.vector.reduce.{add,fadd}
     pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    // llvm.vector.reduce.{mul,fmul}
     pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
     #[allow(unused)]
     pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@@ -100,7 +132,10 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
 
     // select
-    pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+    // first argument is a vector of integers, -1 (all bits 1) is "true"
+    // logically equivalent to (yes & m) | (no & (m^-1),
+    // but you can use it on floats.
+    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
     #[allow(unused)]
-    pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
+    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
 }
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 41f64e972d9..91ae34c05e0 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -3,6 +3,7 @@
     const_fn_trait_bound,
     convert_float_to_int,
     decl_macro,
+    intra_doc_pointers,
     platform_intrinsics,
     repr_simd,
     simd_ffi,
diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs
index f1724cbc263..556bc2cc1fe 100644
--- a/crates/core_simd/src/round.rs
+++ b/crates/core_simd/src/round.rs
@@ -19,6 +19,11 @@ macro_rules! implement {
             /// * Not be NaN
             /// * Not be infinite
             /// * Be representable in the return type, after truncating off its fractional part
+            ///
+            /// If these requirements are infeasible or costly, consider using the safe function [cast],
+            /// which saturates on conversion.
+            ///
+            /// [cast]: Simd::cast
             #[inline]
             pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
             where
diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs
index 8d521057fbd..3acf07260e1 100644
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@@ -11,6 +11,7 @@ where
     /// For each lane in the mask, choose the corresponding lane from `true_values` if
     /// that lane mask is true, and `false_values` if that lane mask is false.
     ///
+    /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
@@ -31,6 +32,8 @@ where
     where
         U: SimdElement<Mask = T>,
     {
+        // Safety: The mask has been cast to a vector of integers,
+        // and the operands to select between are vectors of the same type and length.
         unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
     }
 
@@ -39,6 +42,7 @@ where
     /// For each lane in the mask, choose the corresponding lane from `true_values` if
     /// that lane mask is true, and `false_values` if that lane mask is false.
     ///
+    /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "std")] use core_simd::Mask;
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index e452fa8bfc8..81955b7a532 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -44,6 +44,39 @@ use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
 ///
 /// [`Wrapping<T>`]: core::num::Wrapping
 ///
+/// # Layout
+/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a higher alignment.
+/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
+/// It is thus sound to transmute from `Simd<T, N>` into `[T; N]` but not vice versa.
+///
+/// # ABI Bugs
+/// Due to an unfortunate bug in the Rust ABI, `Simd<T, N>` does not participate in argument-passing in SIMD registers,
+/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
+/// This may be corrected in the future.
+///
+/// # Safe SIMD with Unsafe Rust
+///
+/// `Simd<T, N>` has a higher alignment requirement than an array. It is aligned based on the size of the entire vector,
+/// not individual elements. This means it may be sound to [`transmute`] `Simd<T, N>` to `[T; N]` but **not** vice versa.
+/// When using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is usually best to use
+/// [`read_unaligned`] and [`write_unaligned`]. This is because:
+/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
+/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) or similar types, aligned to `T`
+/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
+/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
+/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
+///
+/// If neither the compiler nor the CPU correct for this, it is best to design data structures to be aligned to the `Simd<T, N>`
+/// you wish to use before using `unsafe` Rust to read or write. Otherwise, the common ways to compensate for these facts,
+/// like materializing `Simd<T, N>` to or from an array first, are handled by safe methods like [`Simd::from_array`] and
+/// [`Simd::from_slice`].
+///
+/// [`transmute`]: core::mem::transmute
+/// [raw pointers]: pointer
+/// [`read_unaligned`]: pointer::read_unaligned
+/// [`write_unaligned`]: pointer::write_unaligned
+/// [`read`]: pointer::read
+/// [`write`]: pointer::write
 #[repr(simd)]
 pub struct Simd<T, const LANES: usize>([T; LANES])
 where
@@ -133,6 +166,7 @@ where
     #[inline]
     #[cfg(not(bootstrap))]
     pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
+        // Safety: The input argument is a vector of a known SIMD type.
         unsafe { intrinsics::simd_as(self) }
     }
 

From 45b2f92bc430eabf10d696e738539cb39d1295f5 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee@gmail.com>
Date: Wed, 9 Feb 2022 22:46:24 -0800
Subject: [PATCH 2/2] Revise safety explanations under review

---
 crates/core_simd/src/intrinsics.rs |  6 ++++--
 crates/core_simd/src/vector.rs     | 34 ++++++++++++++++++------------
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index 314959c47ba..e150946c705 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -10,7 +10,9 @@
 //! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
 //!   this means that division by poison or undef is like division by zero, which means it inflicts...
 //! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
-//!   in particular, LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception.
+//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
+//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
+//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
 //!
 //! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
 //!
@@ -100,7 +102,7 @@ extern "platform-intrinsic" {
     /// ptr: vector of pointers to read from
     /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
     /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
-    /// FIXME: consider reconciling this somehow if/when we fix up our mask story in general?
+    /// FIXME: review this if/when we fix up our mask story in general?
     pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
     /// llvm.masked.scatter
     /// like gather, but more spicy, as it writes instead of reads
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 81955b7a532..ff1b2c756ad 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -45,31 +45,38 @@ use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
 /// [`Wrapping<T>`]: core::num::Wrapping
 ///
 /// # Layout
-/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a higher alignment.
+/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
 /// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
-/// It is thus sound to transmute from `Simd<T, N>` into `[T; N]` but not vice versa.
+/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
+/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
 ///
-/// # ABI Bugs
-/// Due to an unfortunate bug in the Rust ABI, `Simd<T, N>` does not participate in argument-passing in SIMD registers,
+/// # ABI "Features"
+/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
 /// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
-/// This may be corrected in the future.
+/// The need for this may be corrected in the future.
 ///
 /// # Safe SIMD with Unsafe Rust
 ///
-/// `Simd<T, N>` has a higher alignment requirement than an array. It is aligned based on the size of the entire vector,
-/// not individual elements. This means it may be sound to [`transmute`] `Simd<T, N>` to `[T; N]` but **not** vice versa.
-/// When using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is usually best to use
+/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
+/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
+/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
+/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
+/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
 /// [`read_unaligned`] and [`write_unaligned`]. This is because:
 /// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
-/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) or similar types, aligned to `T`
+/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
 /// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
 /// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
 /// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
 ///
-/// If neither the compiler nor the CPU correct for this, it is best to design data structures to be aligned to the `Simd<T, N>`
-/// you wish to use before using `unsafe` Rust to read or write. Otherwise, the common ways to compensate for these facts,
-/// like materializing `Simd<T, N>` to or from an array first, are handled by safe methods like [`Simd::from_array`] and
-/// [`Simd::from_slice`].
+/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
+/// and may be just as fast as stricter alternatives.
+/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
+/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
+/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
+/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
+/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
+/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
 ///
 /// [`transmute`]: core::mem::transmute
 /// [raw pointers]: pointer
@@ -77,6 +84,7 @@ use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
 /// [`write_unaligned`]: pointer::write_unaligned
 /// [`read`]: pointer::read
 /// [`write`]: pointer::write
+/// [as_simd]: slice::as_simd
 #[repr(simd)]
 pub struct Simd<T, const LANES: usize>([T; LANES])
 where