diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index 42e61b3ccb5ad..d5da5c845a758 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -1727,7 +1727,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b)) } - pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { + pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { unimplemented!(); } @@ -1747,7 +1747,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { unimplemented!(); } - pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { + pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { unimplemented!(); } diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs index 9fa978cd2ef7a..dedd46538589d 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs @@ -989,14 +989,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( arith_red!( simd_reduce_add_unordered: BinaryOp::Plus, - vector_reduce_fadd_fast, + vector_reduce_fadd_reassoc, false, add, 0.0 // TODO: Use this argument. ); arith_red!( simd_reduce_mul_unordered: BinaryOp::Mult, - vector_reduce_fmul_fast, + vector_reduce_fmul_reassoc, false, mul, 1.0 diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 7ed27b33dceaa..27dadb7c51106 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -1327,17 +1327,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value { unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) } } - pub fn vector_reduce_fadd_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value { + pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value { unsafe { let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src); - llvm::LLVMRustSetFastMath(instr); + llvm::LLVMRustSetAllowReassoc(instr); instr } } - pub fn vector_reduce_fmul_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value { + pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value { unsafe { let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src); - llvm::LLVMRustSetFastMath(instr); + llvm::LLVMRustSetAllowReassoc(instr); instr } } diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 4415c51acf684..b7782410012e2 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -1880,14 +1880,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>( arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0); arith_red!( simd_reduce_add_unordered: vector_reduce_add, - vector_reduce_fadd_fast, + vector_reduce_fadd_reassoc, false, add, 0.0 ); arith_red!( simd_reduce_mul_unordered: vector_reduce_mul, - vector_reduce_fmul_fast, + vector_reduce_fmul_reassoc, false, mul, 1.0 diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index d0044086c616e..c51a2a8770149 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1618,6 +1618,7 @@ extern "C" { ) -> &'a Value; pub fn LLVMRustSetFastMath(Instr: &Value); + pub fn LLVMRustSetAllowReassoc(Instr: &Value); // Miscellaneous instructions pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &Value; diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index b45706fd1e5b2..8ae4542076a2d 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -418,7 +418,7 @@ extern "C" LLVMAttributeRef LLVMRustCreateMemoryEffectsAttr(LLVMContextRef C, } } -// Enable a fast-math flag +// Enable all fast-math flags // // https://llvm.org/docs/LangRef.html#fast-math-flags extern "C" void LLVMRustSetFastMath(LLVMValueRef V) { @@ -427,6 +427,15 @@ extern "C" void LLVMRustSetFastMath(LLVMValueRef V) { } } +// Enable the reassoc fast-math flag +// +// https://llvm.org/docs/LangRef.html#fast-math-flags +extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) { + if (auto I = dyn_cast(unwrap(V))) { + I->setHasAllowReassoc(true); + } +} + extern "C" LLVMValueRef LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Source, const char *Name, LLVMAtomicOrdering Order) { diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs index ef4c65639eb4d..0ccdd74fa6269 100644 --- a/library/core/src/intrinsics/simd.rs +++ b/library/core/src/intrinsics/simd.rs @@ -3,6 +3,24 @@ //! In this module, a "vector" is any `repr(simd)` type. extern "platform-intrinsic" { + /// Insert an element into a vector, returning the updated vector. + /// + /// `T` must be a vector with element type `U`. + /// + /// # Safety + /// + /// `idx` must be in-bounds of the vector. + pub fn simd_insert(x: T, idx: u32, val: U) -> T; + + /// Extract an element from a vector. + /// + /// `T` must be a vector with element type `U`. + /// + /// # Safety + /// + /// `idx` must be in-bounds of the vector. + pub fn simd_extract(x: T, idx: u32) -> U; + /// Add two simd vectors elementwise. /// /// `T` must be a vector of integer or floating point primitive types. @@ -315,6 +333,14 @@ extern "platform-intrinsic" { /// Starting with the value `y`, add the elements of `x` and accumulate. pub fn simd_reduce_add_ordered(x: T, y: U) -> U; + /// Add elements within a vector in arbitrary order. May also be re-associated with + /// unordered additions on the inputs/outputs. + /// + /// `T` must be a vector of integer or floating-point primitive types. + /// + /// `U` must be the element type of `T`. + pub fn simd_reduce_add_unordered(x: T) -> U; + /// Multiply elements within a vector from left to right. /// /// `T` must be a vector of integer or floating-point primitive types. @@ -324,6 +350,14 @@ extern "platform-intrinsic" { /// Starting with the value `y`, multiply the elements of `x` and accumulate. pub fn simd_reduce_mul_ordered(x: T, y: U) -> U; + /// Add elements within a vector in arbitrary order. May also be re-associated with + /// unordered additions on the inputs/outputs. + /// + /// `T` must be a vector of integer or floating-point primitive types. + /// + /// `U` must be the element type of `T`. + pub fn simd_reduce_mul_unordered(x: T) -> U; + /// Check if all mask values are true. /// /// `T` must be a vector of integer primitive types. @@ -349,6 +383,19 @@ extern "platform-intrinsic" { /// For floating-point values, uses IEEE-754 `maxNum`. pub fn simd_reduce_max(x: T) -> U; + /// Return the maximum element of a vector. + /// + /// `T` must be a vector of integer or floating-point primitive types. + /// + /// `U` must be the element type of `T`. + /// + /// For floating-point values, uses IEEE-754 `maxNum`. + /// + /// # Safety + /// + /// All input elements must be finite (i.e., not NAN and not +/- INF). + pub fn simd_reduce_max_nanless(x: T) -> U; + /// Return the minimum element of a vector. /// /// `T` must be a vector of integer or floating-point primitive types. @@ -358,6 +405,19 @@ extern "platform-intrinsic" { /// For floating-point values, uses IEEE-754 `minNum`. pub fn simd_reduce_min(x: T) -> U; + /// Return the minimum element of a vector. + /// + /// `T` must be a vector of integer or floating-point primitive types. + /// + /// `U` must be the element type of `T`. + /// + /// For floating-point values, uses IEEE-754 `minNum`. + /// + /// # Safety + /// + /// All input elements must be finite (i.e., not NAN and not +/- INF). + pub fn simd_reduce_min_nanless(x: T) -> U; + /// Logical "and" all elements together. /// /// `T` must be a vector of integer or floating-point primitive types. @@ -516,4 +576,39 @@ extern "platform-intrinsic" { /// /// `T` must be a vector of floats. pub fn simd_fma(x: T, y: T, z: T) -> T; + + // Computes the sine of each element. + /// + /// `T` must be a vector of floats. + pub fn simd_fsin(a: T) -> T; + + // Computes the cosine of each element. + /// + /// `T` must be a vector of floats. + pub fn simd_fcos(a: T) -> T; + + // Computes the exponential function of each element. + /// + /// `T` must be a vector of floats. + pub fn simd_fexp(a: T) -> T; + + // Computes 2 raised to the power of each element. + /// + /// `T` must be a vector of floats. + pub fn simd_fexp2(a: T) -> T; + + // Computes the base 10 logarithm of each element. + /// + /// `T` must be a vector of floats. + pub fn simd_flog10(a: T) -> T; + + // Computes the base 2 logarithm of each element. + /// + /// `T` must be a vector of floats. + pub fn simd_flog2(a: T) -> T; + + // Computes the natural logarithm of each element. + /// + /// `T` must be a vector of floats. + pub fn simd_flog(a: T) -> T; }