Rollup merge of rust-lang#121223 - RalfJung:simd-intrinsics, r=Amanieu

intrinsics::simd: add missing functions Turns out stdarch declares a bunch more SIMD intrinsics that are still missing from libcore. I hope I got the docs and in particular the safety requirements right for these "unordered" and "nanless" intrinsics. Many of these are unused even in stdarch, but they are implemented in the codegen backend, so we may as well list them here. r? `@Amanieu` Cc `@calebzulawski` `@workingjubilee`
Noratrieb · Feb 20, 2024 · 37046c3 · 37046c3
2 parents 66ee1ac + f70538c
commit 37046c3
Show file tree

Hide file tree

Showing 7 changed files with 116 additions and 11 deletions.
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -1727,7 +1727,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
     }
 
-    pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
         unimplemented!();
     }
 
@@ -1747,7 +1747,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
-    pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
         unimplemented!();
     }
 

diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -989,14 +989,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
     arith_red!(
         simd_reduce_add_unordered: BinaryOp::Plus,
-        vector_reduce_fadd_fast,
+        vector_reduce_fadd_reassoc,
         false,
         add,
         0.0 // TODO: Use this argument.
     );
     arith_red!(
         simd_reduce_mul_unordered: BinaryOp::Mult,
-        vector_reduce_fmul_fast,
+        vector_reduce_fmul_reassoc,
         false,
         mul,
         1.0

diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1327,17 +1327,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) }
     }
-    pub fn vector_reduce_fadd_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
+    pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
-            llvm::LLVMRustSetFastMath(instr);
+            llvm::LLVMRustSetAllowReassoc(instr);
             instr
         }
     }
-    pub fn vector_reduce_fmul_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
+    pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
-            llvm::LLVMRustSetFastMath(instr);
+            llvm::LLVMRustSetAllowReassoc(instr);
             instr
         }
     }

diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1880,14 +1880,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
     arith_red!(
         simd_reduce_add_unordered: vector_reduce_add,
-        vector_reduce_fadd_fast,
+        vector_reduce_fadd_reassoc,
         false,
         add,
         0.0
     );
     arith_red!(
         simd_reduce_mul_unordered: vector_reduce_mul,
-        vector_reduce_fmul_fast,
+        vector_reduce_fmul_reassoc,
         false,
         mul,
         1.0

diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1618,6 +1618,7 @@ extern "C" {
     ) -> &'a Value;
 
     pub fn LLVMRustSetFastMath(Instr: &Value);
+    pub fn LLVMRustSetAllowReassoc(Instr: &Value);
 
     // Miscellaneous instructions
     pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &Value;

diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
@@ -418,7 +418,7 @@ extern "C" LLVMAttributeRef LLVMRustCreateMemoryEffectsAttr(LLVMContextRef C,
   }
 }
 
-// Enable a fast-math flag
+// Enable all fast-math flags
 //
 // https://llvm.org/docs/LangRef.html#fast-math-flags
 extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
@@ -427,6 +427,15 @@ extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
   }
 }
 
+// Enable the reassoc fast-math flag
+//
+// https://llvm.org/docs/LangRef.html#fast-math-flags
+extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
+  if (auto I = dyn_cast<Instruction>(unwrap<Value>(V))) {
+    I->setHasAllowReassoc(true);
+  }
+}
+
 extern "C" LLVMValueRef
 LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Source,
                         const char *Name, LLVMAtomicOrdering Order) {

diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs
@@ -3,6 +3,24 @@
 //! In this module, a "vector" is any `repr(simd)` type.
 
 extern "platform-intrinsic" {
+    /// Insert an element into a vector, returning the updated vector.
+    ///
+    /// `T` must be a vector with element type `U`.
+    ///
+    /// # Safety
+    ///
+    /// `idx` must be in-bounds of the vector.
+    pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
+
+    /// Extract an element from a vector.
+    ///
+    /// `T` must be a vector with element type `U`.
+    ///
+    /// # Safety
+    ///
+    /// `idx` must be in-bounds of the vector.
+    pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
+
     /// Add two simd vectors elementwise.
     ///
     /// `T` must be a vector of integer or floating point primitive types.
@@ -315,6 +333,14 @@ extern "platform-intrinsic" {
     /// Starting with the value `y`, add the elements of `x` and accumulate.
     pub fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
 
+    /// Add elements within a vector in arbitrary order. May also be re-associated with
+    /// unordered additions on the inputs/outputs.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
+
     /// Multiply elements within a vector from left to right.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
@@ -324,6 +350,14 @@ extern "platform-intrinsic" {
     /// Starting with the value `y`, multiply the elements of `x` and accumulate.
     pub fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
 
+    /// Add elements within a vector in arbitrary order. May also be re-associated with
+    /// unordered additions on the inputs/outputs.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
+
     /// Check if all mask values are true.
     ///
     /// `T` must be a vector of integer primitive types.
@@ -349,6 +383,19 @@ extern "platform-intrinsic" {
     /// For floating-point values, uses IEEE-754 `maxNum`.
     pub fn simd_reduce_max<T, U>(x: T) -> U;
 
+    /// Return the maximum element of a vector.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    ///
+    /// For floating-point values, uses IEEE-754 `maxNum`.
+    ///
+    /// # Safety
+    ///
+    /// All input elements must be finite (i.e., not NAN and not +/- INF).
+    pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;
+
     /// Return the minimum element of a vector.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
@@ -358,6 +405,19 @@ extern "platform-intrinsic" {
     /// For floating-point values, uses IEEE-754 `minNum`.
     pub fn simd_reduce_min<T, U>(x: T) -> U;
 
+    /// Return the minimum element of a vector.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    ///
+    /// For floating-point values, uses IEEE-754 `minNum`.
+    ///
+    /// # Safety
+    ///
+    /// All input elements must be finite (i.e., not NAN and not +/- INF).
+    pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;
+
     /// Logical "and" all elements together.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
@@ -516,4 +576,39 @@ extern "platform-intrinsic" {
     ///
     /// `T` must be a vector of floats.
     pub fn simd_fma<T>(x: T, y: T, z: T) -> T;
+
+    // Computes the sine of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fsin<T>(a: T) -> T;
+
+    // Computes the cosine of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fcos<T>(a: T) -> T;
+
+    // Computes the exponential function of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fexp<T>(a: T) -> T;
+
+    // Computes 2 raised to the power of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fexp2<T>(a: T) -> T;
+
+    // Computes the base 10 logarithm of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_flog10<T>(a: T) -> T;
+
+    // Computes the base 2 logarithm of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_flog2<T>(a: T) -> T;
+
+    // Computes the natural logarithm of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_flog<T>(a: T) -> T;
 }