Skip to content

Commit

Permalink
Implement RVV backend
Browse files Browse the repository at this point in the history
  • Loading branch information
silvanshade committed Jan 18, 2024
1 parent 2041dd0 commit af4a32f
Show file tree
Hide file tree
Showing 11 changed files with 533 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ default = ["std"]
# implementation uses C intrinsics and requires a C compiler.
neon = []

# The RVV implementation does not participate in dynamic feature detection,
# which is currently x86-only. If "rvv" is on, RVV support is assumed. The
# RVV implementation uses C intrinsics and requires a C compiler.
rvv = []

# This crate uses libstd for std::io trait implementations, and also for
# runtime CPU feature detection. This feature is enabled by default. If you use
# --no-default-features, the only way to use the SIMD implementations in this
Expand Down
24 changes: 24 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ fn bench_single_compression_avx512(b: &mut Bencher) {
}
}

#[bench]
#[cfg(blake3_rvv)]
fn bench_single_compression_avx512(b: &mut Bencher) {
if let Some(platform) = Platform::rvv() {
bench_single_compression_fn(b, platform);
}
}

fn bench_many_chunks_fn(b: &mut Bencher, platform: Platform) {
let degree = platform.simd_degree();
let mut inputs = Vec::new();
Expand Down Expand Up @@ -151,6 +159,14 @@ fn bench_many_chunks_neon(b: &mut Bencher) {
}
}

#[bench]
#[cfg(feature = "rvv")]
fn bench_many_chunks_neon(b: &mut Bencher) {
if let Some(platform) = Platform::rvv() {
bench_many_chunks_fn(b, platform);
}
}

// TODO: When we get const generics we can unify this with the chunks code.
fn bench_many_parents_fn(b: &mut Bencher, platform: Platform) {
let degree = platform.simd_degree();
Expand Down Expand Up @@ -218,6 +234,14 @@ fn bench_many_parents_neon(b: &mut Bencher) {
}
}

#[bench]
#[cfg(feature = "rvv")]
fn bench_many_parents_rvv(b: &mut Bencher) {
if let Some(platform) = Platform::rvv() {
bench_many_parents_fn(b, platform);
}
}

fn bench_atonce(b: &mut Bencher, len: usize) {
let mut input = RandomInput::new(b, len);
b.iter(|| blake3::hash(input.get()));
Expand Down
46 changes: 45 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use core::panic;
use std::env;

fn defined(var: &str) -> bool {
Expand All @@ -21,6 +22,14 @@ fn is_no_neon() -> bool {
defined("CARGO_FEATURE_NO_NEON")
}

fn is_rvv() -> bool {
cfg!(feature = "rvv")
}

fn is_no_rvv() -> bool {
cfg!(not(feature = "rvv"))
}

fn is_ci() -> bool {
defined("BLAKE3_CI")
}
Expand Down Expand Up @@ -60,6 +69,18 @@ fn is_armv7() -> bool {
target_components()[0] == "armv7"
}

fn is_riscv32() -> bool {
std::env::var("CARGO_CFG_TARGET_ARCH")
.map(|target_arch| target_arch == "riscv32")
.unwrap_or_default()
}

fn is_riscv64() -> bool {
std::env::var("CARGO_CFG_TARGET_ARCH")
.map(|target_arch| target_arch == "riscv64")
.unwrap_or_default()
}

fn endianness() -> String {
let endianness = env::var("CARGO_CFG_TARGET_ENDIAN").unwrap();
assert!(endianness == "little" || endianness == "big");
Expand Down Expand Up @@ -239,15 +260,33 @@ fn build_neon_c_intrinsics() {
build.compile("blake3_neon");
}

fn build_rvv_c_intrinsics() {
let mut build = new_build();
build.file("c/blake3_rvv.c");
if is_riscv32() {
build.flag("-march=rv32gcv1p0");
}
if is_riscv64() {
build.flag("-march=rv64gcv1p0");
}
build.compile("blake3_rvv");
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
if is_pure() && is_neon() {
panic!("It doesn't make sense to enable both \"pure\" and \"neon\".");
}

if is_no_neon() && is_neon() {
panic!("It doesn't make sense to enable both \"no_neon\" and \"neon\".");
}

if is_pure() && is_rvv() {
panic!("It doesn't make sense to enable both \"pure\" and \"rvv\".");
}
if is_no_rvv() && is_rvv() {
panic!("It doesn't make sense to enable both \"no_rvv\" and \"rvv\".");
}

if is_x86_64() || is_x86_32() {
let support = c_compiler_support();
if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler {
Expand Down Expand Up @@ -278,6 +317,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
build_neon_c_intrinsics();
}

if (is_riscv32() || is_riscv64()) && is_rvv() {
println!("cargo:rustc-cfg=blake3_rvv");
build_rvv_c_intrinsics();
}

// The `cc` crate doesn't automatically emit rerun-if directives for the
// environment variables it supports, in particular for $CC. We expect to
// do a lot of benchmarking across different compilers, so we explicitly
Expand Down
12 changes: 12 additions & 0 deletions c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ include(GNUInstallDirs)
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
set(BLAKE3_X86_NAMES i686 x86 X86)
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
set(BLAKE3_RISCV64_NAMES riscv64)
# default SIMD compiler flag configuration (can be overriden by toolchains or CLI)
if(MSVC)
set(BLAKE3_CFLAGS_SSE2 "/arch:SSE2" CACHE STRING "the compiler flags to enable SSE2")
Expand All @@ -48,6 +49,7 @@ elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
set(BLAKE3_CFLAGS_SSE4.1 "-msse4.1" CACHE STRING "the compiler flags to enable SSE4.1")
set(BLAKE3_CFLAGS_AVX2 "-mavx2" CACHE STRING "the compiler flags to enable AVX2")
set(BLAKE3_CFLAGS_AVX512 "-mavx512f -mavx512vl" CACHE STRING "the compiler flags to enable AVX512")
set(BLAKE3_CFLAGS_RVV_RISCV64 "-march=rv64gcv1p0" CACHE STRING "the compiler flags to enable RVV for riscv64")

if (CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
Expand Down Expand Up @@ -95,8 +97,10 @@ macro(BLAKE3_DISABLE_SIMD)
set(BLAKE3_SIMD_AMD64_ASM OFF)
set(BLAKE3_SIMD_X86_INTRINSICS OFF)
set(BLAKE3_SIMD_NEON_INTRINSICS OFF)
set(BLAKE3_VLA_RVV_INTRINSICS OFF)
target_compile_definitions(blake3 PRIVATE
BLAKE3_USE_NEON=0
BLAKE3_USE_RVV
BLAKE3_NO_SSE2
BLAKE3_NO_SSE41
BLAKE3_NO_AVX2
Expand Down Expand Up @@ -179,6 +183,13 @@ elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
endif()

elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_RISCV64_NAMES]
OR BLAKE3_USE_RVV_INTRINSICS))
set(BLAKE3_VLA_RVV_INTRINSICS ON)
target_sources(blake3 PRIVATE
blake3_rvv.c
)
set_source_files_properties(blake3_rvv.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_RVV_RISCV64}")
else()
BLAKE3_DISABLE_SIMD()
endif()
Expand Down Expand Up @@ -216,4 +227,5 @@ install(FILES "${CMAKE_BINARY_DIR}/libblake3.pc"
add_feature_info("AMD64 assembly" BLAKE3_SIMD_AMD64_ASM "The library uses hand written amd64 SIMD assembly.")
add_feature_info("x86 SIMD intrinsics" BLAKE3_SIMD_X86_INTRINSICS "The library uses x86 SIMD intrinsics.")
add_feature_info("NEON SIMD intrinsics" BLAKE3_SIMD_NEON_INTRINSICS "The library uses NEON SIMD intrinsics.")
add_feature_info("RISC-V RVV intrinsics" BLAKE3_VLA_RVV_INTRINSICS "The library uses RISC-V RVV intrinsics.")
feature_summary(WHAT ENABLED_FEATURES)
15 changes: 15 additions & 0 deletions c/blake3_c_rust_bindings/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,15 @@ fn test_compress_avx512() {
);
}

#[test]
#[cfg(feature = "rvv")]
fn test_compress_rvv() {
test_compress_fn(
crate::ffi::x86::blake3_compress_in_place_rvv,
crate::ffi::x86::blake3_compress_xof_rvv,
);
}

type HashManyFn = unsafe extern "C" fn(
inputs: *const *const u8,
num_inputs: usize,
Expand Down Expand Up @@ -359,6 +368,12 @@ fn test_hash_many_neon() {
test_hash_many_fn(crate::ffi::neon::blake3_hash_many_neon);
}

#[test]
#[cfg(feature = "rvv")]
fn test_hash_many_rvv() {
test_hash_many_fn(crate::ffi::rvv::blake3_hash_many_rvv);
}

#[test]
fn test_compare_reference_impl() {
const OUT: usize = 303; // more than 64, not a multiple of 4
Expand Down
5 changes: 5 additions & 0 deletions c/blake3_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,11 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
return;
#endif

#if BLAKE3_USE_RVV == 1
blake3_hash_many_rvv(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
#endif

blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
Expand Down
17 changes: 17 additions & 0 deletions c/blake3_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,5 +281,22 @@ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
uint8_t flags_end, uint8_t *out);
#endif

#if BLAKE3_USE_RVV == 1
void blake3_compress_in_place_rvv(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
uint8_t flags);

void blake3_compress_xof_rvv(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter, uint8_t flags,
uint8_t out[64]);

void blake3_hash_many_rvv(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start, uint8_t flags_end,
uint8_t *out);
#endif

#endif /* BLAKE3_IMPL_H */
Loading

0 comments on commit af4a32f

Please sign in to comment.