Skip to content

Commit

Permalink
Rollup merge of rust-lang#121223 - RalfJung:simd-intrinsics, r=Amanieu
Browse files Browse the repository at this point in the history
intrinsics::simd: add missing functions

Turns out stdarch declares a bunch more SIMD intrinsics that are still missing from libcore.
I hope I got the docs and in particular the safety requirements right for these "unordered" and "nanless" intrinsics.

Many of these are unused even in stdarch, but they are implemented in the codegen backend, so we may as well list them here.

r? `@Amanieu`
Cc `@calebzulawski` `@workingjubilee`
  • Loading branch information
Noratrieb authored Feb 20, 2024
2 parents 66ee1ac + f70538c commit 37046c3
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 11 deletions.
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_gcc/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1727,7 +1727,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
}

pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}

Expand All @@ -1747,7 +1747,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
unimplemented!();
}

pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}

Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -989,14 +989,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(

arith_red!(
simd_reduce_add_unordered: BinaryOp::Plus,
vector_reduce_fadd_fast,
vector_reduce_fadd_reassoc,
false,
add,
0.0 // TODO: Use this argument.
);
arith_red!(
simd_reduce_mul_unordered: BinaryOp::Mult,
vector_reduce_fmul_fast,
vector_reduce_fmul_reassoc,
false,
mul,
1.0
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1327,17 +1327,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) }
}
pub fn vector_reduce_fadd_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
llvm::LLVMRustSetFastMath(instr);
llvm::LLVMRustSetAllowReassoc(instr);
instr
}
}
pub fn vector_reduce_fmul_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
llvm::LLVMRustSetFastMath(instr);
llvm::LLVMRustSetAllowReassoc(instr);
instr
}
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1880,14 +1880,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
arith_red!(
simd_reduce_add_unordered: vector_reduce_add,
vector_reduce_fadd_fast,
vector_reduce_fadd_reassoc,
false,
add,
0.0
);
arith_red!(
simd_reduce_mul_unordered: vector_reduce_mul,
vector_reduce_fmul_fast,
vector_reduce_fmul_reassoc,
false,
mul,
1.0
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_llvm/src/llvm/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,7 @@ extern "C" {
) -> &'a Value;

pub fn LLVMRustSetFastMath(Instr: &Value);
pub fn LLVMRustSetAllowReassoc(Instr: &Value);

// Miscellaneous instructions
pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &Value;
Expand Down
11 changes: 10 additions & 1 deletion compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ extern "C" LLVMAttributeRef LLVMRustCreateMemoryEffectsAttr(LLVMContextRef C,
}
}

// Enable a fast-math flag
// Enable all fast-math flags
//
// https://llvm.org/docs/LangRef.html#fast-math-flags
extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
Expand All @@ -427,6 +427,15 @@ extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
}
}

// Enable the reassoc fast-math flag
//
// https://llvm.org/docs/LangRef.html#fast-math-flags
extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
if (auto I = dyn_cast<Instruction>(unwrap<Value>(V))) {
I->setHasAllowReassoc(true);
}
}

extern "C" LLVMValueRef
LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Source,
const char *Name, LLVMAtomicOrdering Order) {
Expand Down
95 changes: 95 additions & 0 deletions library/core/src/intrinsics/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,24 @@
//! In this module, a "vector" is any `repr(simd)` type.
extern "platform-intrinsic" {
/// Insert an element into a vector, returning the updated vector.
///
/// `T` must be a vector with element type `U`.
///
/// # Safety
///
/// `idx` must be in-bounds of the vector.
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;

/// Extract an element from a vector.
///
/// `T` must be a vector with element type `U`.
///
/// # Safety
///
/// `idx` must be in-bounds of the vector.
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;

/// Add two simd vectors elementwise.
///
/// `T` must be a vector of integer or floating point primitive types.
Expand Down Expand Up @@ -315,6 +333,14 @@ extern "platform-intrinsic" {
/// Starting with the value `y`, add the elements of `x` and accumulate.
pub fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;

/// Add elements within a vector in arbitrary order. May also be re-associated with
/// unordered additions on the inputs/outputs.
///
/// `T` must be a vector of integer or floating-point primitive types.
///
/// `U` must be the element type of `T`.
pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;

/// Multiply elements within a vector from left to right.
///
/// `T` must be a vector of integer or floating-point primitive types.
Expand All @@ -324,6 +350,14 @@ extern "platform-intrinsic" {
/// Starting with the value `y`, multiply the elements of `x` and accumulate.
pub fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;

/// Add elements within a vector in arbitrary order. May also be re-associated with
/// unordered additions on the inputs/outputs.
///
/// `T` must be a vector of integer or floating-point primitive types.
///
/// `U` must be the element type of `T`.
pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;

/// Check if all mask values are true.
///
/// `T` must be a vector of integer primitive types.
Expand All @@ -349,6 +383,19 @@ extern "platform-intrinsic" {
/// For floating-point values, uses IEEE-754 `maxNum`.
pub fn simd_reduce_max<T, U>(x: T) -> U;

/// Return the maximum element of a vector.
///
/// `T` must be a vector of integer or floating-point primitive types.
///
/// `U` must be the element type of `T`.
///
/// For floating-point values, uses IEEE-754 `maxNum`.
///
/// # Safety
///
/// All input elements must be finite (i.e., not NAN and not +/- INF).
pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;

/// Return the minimum element of a vector.
///
/// `T` must be a vector of integer or floating-point primitive types.
Expand All @@ -358,6 +405,19 @@ extern "platform-intrinsic" {
/// For floating-point values, uses IEEE-754 `minNum`.
pub fn simd_reduce_min<T, U>(x: T) -> U;

/// Return the minimum element of a vector.
///
/// `T` must be a vector of integer or floating-point primitive types.
///
/// `U` must be the element type of `T`.
///
/// For floating-point values, uses IEEE-754 `minNum`.
///
/// # Safety
///
/// All input elements must be finite (i.e., not NAN and not +/- INF).
pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;

/// Logical "and" all elements together.
///
/// `T` must be a vector of integer or floating-point primitive types.
Expand Down Expand Up @@ -516,4 +576,39 @@ extern "platform-intrinsic" {
///
/// `T` must be a vector of floats.
pub fn simd_fma<T>(x: T, y: T, z: T) -> T;

// Computes the sine of each element.
///
/// `T` must be a vector of floats.
pub fn simd_fsin<T>(a: T) -> T;

// Computes the cosine of each element.
///
/// `T` must be a vector of floats.
pub fn simd_fcos<T>(a: T) -> T;

// Computes the exponential function of each element.
///
/// `T` must be a vector of floats.
pub fn simd_fexp<T>(a: T) -> T;

// Computes 2 raised to the power of each element.
///
/// `T` must be a vector of floats.
pub fn simd_fexp2<T>(a: T) -> T;

// Computes the base 10 logarithm of each element.
///
/// `T` must be a vector of floats.
pub fn simd_flog10<T>(a: T) -> T;

// Computes the base 2 logarithm of each element.
///
/// `T` must be a vector of floats.
pub fn simd_flog2<T>(a: T) -> T;

// Computes the natural logarithm of each element.
///
/// `T` must be a vector of floats.
pub fn simd_flog<T>(a: T) -> T;
}

0 comments on commit 37046c3

Please sign in to comment.