diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index 42e61b3ccb5ad..d5da5c845a758 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -1727,7 +1727,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
     }
 
-    pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
         unimplemented!();
     }
 
@@ -1747,7 +1747,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
-    pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
         unimplemented!();
     }
 
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index 9fa978cd2ef7a..dedd46538589d 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -989,14 +989,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
     arith_red!(
         simd_reduce_add_unordered: BinaryOp::Plus,
-        vector_reduce_fadd_fast,
+        vector_reduce_fadd_reassoc,
         false,
         add,
         0.0 // TODO: Use this argument.
     );
     arith_red!(
         simd_reduce_mul_unordered: BinaryOp::Mult,
-        vector_reduce_fmul_fast,
+        vector_reduce_fmul_reassoc,
         false,
         mul,
         1.0
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 7ed27b33dceaa..27dadb7c51106 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1327,17 +1327,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) }
     }
-    pub fn vector_reduce_fadd_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
+    pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
-            llvm::LLVMRustSetFastMath(instr);
+            llvm::LLVMRustSetAllowReassoc(instr);
             instr
         }
     }
-    pub fn vector_reduce_fmul_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
+    pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
-            llvm::LLVMRustSetFastMath(instr);
+            llvm::LLVMRustSetAllowReassoc(instr);
             instr
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 4415c51acf684..b7782410012e2 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1880,14 +1880,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
     arith_red!(
         simd_reduce_add_unordered: vector_reduce_add,
-        vector_reduce_fadd_fast,
+        vector_reduce_fadd_reassoc,
         false,
         add,
         0.0
     );
     arith_red!(
         simd_reduce_mul_unordered: vector_reduce_mul,
-        vector_reduce_fmul_fast,
+        vector_reduce_fmul_reassoc,
         false,
         mul,
         1.0
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index d0044086c616e..c51a2a8770149 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1618,6 +1618,7 @@ extern "C" {
     ) -> &'a Value;
 
     pub fn LLVMRustSetFastMath(Instr: &Value);
+    pub fn LLVMRustSetAllowReassoc(Instr: &Value);
 
     // Miscellaneous instructions
     pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &Value;
diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
index b45706fd1e5b2..8ae4542076a2d 100644
--- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
@@ -418,7 +418,7 @@ extern "C" LLVMAttributeRef LLVMRustCreateMemoryEffectsAttr(LLVMContextRef C,
   }
 }
 
-// Enable a fast-math flag
+// Enable all fast-math flags
 //
 // https://llvm.org/docs/LangRef.html#fast-math-flags
 extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
@@ -427,6 +427,15 @@ extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
   }
 }
 
+// Enable the reassoc fast-math flag
+//
+// https://llvm.org/docs/LangRef.html#fast-math-flags
+extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
+  if (auto I = dyn_cast<Instruction>(unwrap<Value>(V))) {
+    I->setHasAllowReassoc(true);
+  }
+}
+
 extern "C" LLVMValueRef
 LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Source,
                         const char *Name, LLVMAtomicOrdering Order) {
diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs
index ef4c65639eb4d..0ccdd74fa6269 100644
--- a/library/core/src/intrinsics/simd.rs
+++ b/library/core/src/intrinsics/simd.rs
@@ -3,6 +3,24 @@
 //! In this module, a "vector" is any `repr(simd)` type.
 
 extern "platform-intrinsic" {
+    /// Insert an element into a vector, returning the updated vector.
+    ///
+    /// `T` must be a vector with element type `U`.
+    ///
+    /// # Safety
+    ///
+    /// `idx` must be in-bounds of the vector.
+    pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
+
+    /// Extract an element from a vector.
+    ///
+    /// `T` must be a vector with element type `U`.
+    ///
+    /// # Safety
+    ///
+    /// `idx` must be in-bounds of the vector.
+    pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
+
     /// Add two simd vectors elementwise.
     ///
     /// `T` must be a vector of integer or floating point primitive types.
@@ -315,6 +333,14 @@ extern "platform-intrinsic" {
     /// Starting with the value `y`, add the elements of `x` and accumulate.
     pub fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
 
+    /// Add elements within a vector in arbitrary order. May also be re-associated with
+    /// unordered additions on the inputs/outputs.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
+
     /// Multiply elements within a vector from left to right.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
@@ -324,6 +350,14 @@ extern "platform-intrinsic" {
     /// Starting with the value `y`, multiply the elements of `x` and accumulate.
     pub fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
 
+    /// Add elements within a vector in arbitrary order. May also be re-associated with
+    /// unordered additions on the inputs/outputs.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
+
     /// Check if all mask values are true.
     ///
     /// `T` must be a vector of integer primitive types.
@@ -349,6 +383,19 @@ extern "platform-intrinsic" {
     /// For floating-point values, uses IEEE-754 `maxNum`.
     pub fn simd_reduce_max<T, U>(x: T) -> U;
 
+    /// Return the maximum element of a vector.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    ///
+    /// For floating-point values, uses IEEE-754 `maxNum`.
+    ///
+    /// # Safety
+    ///
+    /// All input elements must be finite (i.e., not NAN and not +/- INF).
+    pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;
+
     /// Return the minimum element of a vector.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
@@ -358,6 +405,19 @@ extern "platform-intrinsic" {
     /// For floating-point values, uses IEEE-754 `minNum`.
     pub fn simd_reduce_min<T, U>(x: T) -> U;
 
+    /// Return the minimum element of a vector.
+    ///
+    /// `T` must be a vector of integer or floating-point primitive types.
+    ///
+    /// `U` must be the element type of `T`.
+    ///
+    /// For floating-point values, uses IEEE-754 `minNum`.
+    ///
+    /// # Safety
+    ///
+    /// All input elements must be finite (i.e., not NAN and not +/- INF).
+    pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;
+
     /// Logical "and" all elements together.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
@@ -516,4 +576,39 @@ extern "platform-intrinsic" {
     ///
     /// `T` must be a vector of floats.
     pub fn simd_fma<T>(x: T, y: T, z: T) -> T;
+
+    // Computes the sine of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fsin<T>(a: T) -> T;
+
+    // Computes the cosine of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fcos<T>(a: T) -> T;
+
+    // Computes the exponential function of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fexp<T>(a: T) -> T;
+
+    // Computes 2 raised to the power of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_fexp2<T>(a: T) -> T;
+
+    // Computes the base 10 logarithm of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_flog10<T>(a: T) -> T;
+
+    // Computes the base 2 logarithm of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_flog2<T>(a: T) -> T;
+
+    // Computes the natural logarithm of each element.
+    ///
+    /// `T` must be a vector of floats.
+    pub fn simd_flog<T>(a: T) -> T;
 }