Skip to content

Commit

Permalink
s/riscv64gcv/riscv_rva23u64/ everywhere
Browse files Browse the repository at this point in the history
  • Loading branch information
oconnor663 committed Jan 18, 2024
1 parent 3ddab49 commit 999312e
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 70 deletions.
8 changes: 4 additions & 4 deletions rust/guts/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,15 +229,15 @@ fn build_neon_c_intrinsics() {
build.compile("blake3_neon");
}

fn build_riscv64gcv_assembly() {
println!("cargo:rustc-cfg=blake3_riscv64gcv_ffi");
fn build_riscv_rva23u64_assembly() {
println!("cargo:rustc-cfg=blake3_riscv_rva23u64_ffi");
let mut build = new_build();
let asm_path = "src/riscv_rva23u64.S";
build.file(asm_path);
build.flag("--target=riscv64");
build.flag("-march=rv64gcv_zbb_zvbb1p0");
build.flag("-menable-experimental-extensions");
build.compile("blake3_riscv64gcv_assembly");
build.compile("blake3_riscv_rva23u64_assembly");
println!("cargo:rerun-if-changed={asm_path}");
}

Expand Down Expand Up @@ -277,7 +277,7 @@ fn main() {
// TODO: This implementation assumes some bleeding-edge extensions, and it should probably be
// gated by a Cargo feature.
if is_riscv64gc() && !is_pure() {
build_riscv64gcv_assembly();
build_riscv_rva23u64_assembly();
}

// The `cc` crate doesn't automatically emit rerun-if directives for the
Expand Down
6 changes: 3 additions & 3 deletions rust/guts/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use core::sync::atomic::{AtomicPtr, Ordering::Relaxed};
pub mod avx512;
pub mod portable;
#[cfg(any(target_arch = "riscv64"))]
pub mod riscv64gcv;
pub mod riscv_rva23u64;

#[cfg(test)]
mod test;
Expand Down Expand Up @@ -46,7 +46,7 @@ cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
pub const MAX_SIMD_DEGREE: usize = avx512::DEGREE;
} else if #[cfg(target_arch = "riscv64")] {
pub const MAX_SIMD_DEGREE: usize = riscv64gcv::MAX_SIMD_DEGREE;
pub const MAX_SIMD_DEGREE: usize = riscv_rva23u64::MAX_SIMD_DEGREE;
} else if #[cfg(blake3_neon)] {
pub const MAX_SIMD_DEGREE: usize = 4;
} else {
Expand Down Expand Up @@ -79,7 +79,7 @@ fn detect() -> Implementation {
}
#[cfg(target_arch = "riscv64")]
{
return riscv64gcv::implementation();
return riscv_rva23u64::implementation();
}
#[allow(unreachable_code)]
portable::implementation()
Expand Down
96 changes: 48 additions & 48 deletions rust/guts/src/riscv_rva23u64.S
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ MSG_PERMUTE:
// a3: counter
// a4: flags
// a5: out_ptr
.global blake3_guts_riscv64gcv_compress
blake3_guts_riscv64gcv_compress:
.global blake3_guts_riscv_rva23u64_compress
blake3_guts_riscv_rva23u64_compress:
// Load the message load and message permutation indexes.
vsetivli zero, 16, e16, m2, ta, ma
la t0, MSG_LOAD
Expand Down Expand Up @@ -110,7 +110,7 @@ blake3_guts_riscv64gcv_compress:
vslide1down.vx v3, v3, a1
vslide1down.vx v3, v3, a4
li t0, 7 // round counter
blake3_guts_riscv64gcv_compress_round_loop:
blake3_guts_riscv_rva23u64_compress_round_loop:
vadd.vv v0, v0, v4
vadd.vv v0, v0, v1
vxor.vv v3, v3, v0
Expand Down Expand Up @@ -148,7 +148,7 @@ blake3_guts_riscv64gcv_compress_round_loop:
vrgather.vv v3, v23, v13
vrgather.vv v2, v22, v12
addi t0, t0, -1
beqz t0, blake3_guts_riscv64gcv_compress_end
beqz t0, blake3_guts_riscv_rva23u64_compress_end
// Shuffle message words.
// TODO: Find a way to do this without so much movement?
vmv.v.v v16, v4
Expand All @@ -169,8 +169,8 @@ blake3_guts_riscv64gcv_compress_round_loop:
vmv.v.v v5, v16
vmv.v.v v6, v20
vmv.v.v v7, v24
j blake3_guts_riscv64gcv_compress_round_loop
blake3_guts_riscv64gcv_compress_end:
j blake3_guts_riscv_rva23u64_compress_round_loop
blake3_guts_riscv_rva23u64_compress_end:
vxor.vv v0, v0, v2
vxor.vv v1, v1, v3
vsetivli zero, 16, e8, m1, ta, ma
Expand All @@ -180,16 +180,16 @@ blake3_guts_riscv64gcv_compress_end:
ret


.global blake3_guts_riscv64gcv_degree
blake3_guts_riscv64gcv_degree:
.global blake3_guts_riscv_rva23u64_degree
blake3_guts_riscv_rva23u64_degree:
csrr t0, vlenb
srli t0, t0, 2
li t1, MAX_SIMD_DEGREE
minu a0, t0, t1
ret

// clobbers: t0
blake3_guts_riscv64gcv_kernel:
blake3_guts_riscv_rva23u64_kernel:
li t0, IV0
vmv.v.x v8, t0
li t0, IV1
Expand Down Expand Up @@ -993,7 +993,7 @@ blake3_guts_riscv64gcv_kernel:
// a5: aligned+transposed output [unused]
// a6: total chunks [unused]
// a7: remaining_bytes_in_last_chunk
blake3_guts_riscv64gcv_hash_blocks:
blake3_guts_riscv_rva23u64_hash_blocks:
// t0 := full_blocks := (input_len + 1024 - 64) / 1024
addi t0, a1, 1024 - 64
srli t0, t0, 10
Expand Down Expand Up @@ -1056,7 +1056,7 @@ partial_block_finished:
vslide1down.vx v15, v15, t1
// execute the kernel
mv t6, ra
call blake3_guts_riscv64gcv_kernel
call blake3_guts_riscv_rva23u64_kernel
mv ra, t6
// xor the two halves of the state
vxor.vv v0, v0, v8
Expand Down Expand Up @@ -1148,15 +1148,15 @@ vlenb_less_than_32:
// a3: counter
// a4: flags
// a5: aligned+transposed output
.global blake3_guts_riscv64gcv_hash_chunks
blake3_guts_riscv64gcv_hash_chunks:
.global blake3_guts_riscv_rva23u64_hash_chunks
blake3_guts_riscv_rva23u64_hash_chunks:
// Save the original num_chunks = (input_len+1023)/1024 in a6.
addi a6, a1, 1023
srli a6, a6, 10
// Track the bytes remaining in the last chunk in a7. The initial value
// of this is ((input_len - 1) % 1024) + 1. (The input to this function
// is never empty.) It decrements by 64 with each call to
// blake3_guts_riscv64gcv_hash_chunks, but not below 0.
// blake3_guts_riscv_rva23u64_hash_chunks, but not below 0.
addi a7, a1, -1
andi a7, a7, 1023
addi a7, a7, 1
Expand All @@ -1183,24 +1183,24 @@ blake3_guts_riscv64gcv_hash_chunks:
// the input length.
mv t5, ra
ori a4, a4, 1 // set CHUNK_START
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
andi a4, a4, -2 // unset CHUNK_START
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
ori a4, a4, 2 // set CHUNK_END
call blake3_guts_riscv64gcv_hash_blocks
call blake3_guts_riscv_rva23u64_hash_blocks
mv ra, t5
// If the final chunk is short, we need to set vl back to the total
// number of chunks.
Expand Down Expand Up @@ -1228,8 +1228,8 @@ blake3_guts_riscv64gcv_hash_chunks:
// a2: key
// a3: flags
// a4: out pointer
.global blake3_guts_riscv64gcv_hash_parents
blake3_guts_riscv64gcv_hash_parents:
.global blake3_guts_riscv_rva23u64_hash_parents
blake3_guts_riscv_rva23u64_hash_parents:
// load the transposed CVs and split alternating words into the low and
// high halves of the input vectors
vsetvli zero, a1, e32, m1, ta, ma
Expand Down Expand Up @@ -1285,7 +1285,7 @@ blake3_guts_riscv64gcv_hash_parents:

// execute the kernel
mv t6, ra
call blake3_guts_riscv64gcv_kernel
call blake3_guts_riscv_rva23u64_kernel
mv ra, t6

// xor the two halves of the state
Expand Down Expand Up @@ -1322,7 +1322,7 @@ blake3_guts_riscv64gcv_hash_parents:
// a4: flags
// a5: out_ptr
// a6: out_len
blake3_guts_riscv64gcv_xof_inner:
blake3_guts_riscv_rva23u64_xof_inner:
// t1 := total_blocks := (out_len + 63) / 64
addi t1, a6, 63
srli t1, t1, 6
Expand Down Expand Up @@ -1395,7 +1395,7 @@ blake3_guts_riscv64gcv_xof_inner:

// execute the kernel
mv t6, ra
call blake3_guts_riscv64gcv_kernel
call blake3_guts_riscv_rva23u64_kernel
mv ra, t6

// reload the CV, this time into v16-23
Expand Down Expand Up @@ -1441,10 +1441,10 @@ blake3_guts_riscv64gcv_xof_inner:
// a4: flags
// a5: out_ptr
// a6: out_len
.global blake3_guts_riscv64gcv_xof
blake3_guts_riscv64gcv_xof:
.global blake3_guts_riscv_rva23u64_xof
blake3_guts_riscv_rva23u64_xof:
mv t5, ra
call blake3_guts_riscv64gcv_xof_inner
call blake3_guts_riscv_rva23u64_xof_inner
mv ra, t5

// t1 is now total_blocks, and t2 is full_blocks. Set vl to t2 and the
Expand All @@ -1466,9 +1466,9 @@ blake3_guts_riscv64gcv_xof:

// If full_blocks != partial_blocks, we need to handle the final
// partial block. Otherwise, we're done.
bne t1, t2, blake3_guts_riscv64gcv_xof_partial_block
bne t1, t2, blake3_guts_riscv_rva23u64_xof_partial_block
ret
blake3_guts_riscv64gcv_xof_partial_block:
blake3_guts_riscv_rva23u64_xof_partial_block:
// Collect groups of 4 words in v0, v4, v8, and v12.
vsetivli zero, 4, e32, m1, ta, ma
vslidedown.vx v0, v0, t2
Expand Down Expand Up @@ -1520,10 +1520,10 @@ blake3_guts_riscv64gcv_xof_partial_block:
// a4: flags
// a5: out_ptr
// a6: out_len
.global blake3_guts_riscv64gcv_xof_xor
blake3_guts_riscv64gcv_xof_xor:
.global blake3_guts_riscv_rva23u64_xof_xor
blake3_guts_riscv_rva23u64_xof_xor:
mv t5, ra
call blake3_guts_riscv64gcv_xof_inner
call blake3_guts_riscv_rva23u64_xof_inner
mv ra, t5

// t1 is now total_blocks, and t2 is full_blocks. Set vl to t2 and the
Expand Down Expand Up @@ -1564,9 +1564,9 @@ blake3_guts_riscv64gcv_xof_xor:

// If full_blocks != partial_blocks, we need to handle the final
// partial block. Otherwise, we're done.
bne t1, t2, blake3_guts_riscv64gcv_xof_xor_partial_block
bne t1, t2, blake3_guts_riscv_rva23u64_xof_xor_partial_block
ret
blake3_guts_riscv64gcv_xof_xor_partial_block:
blake3_guts_riscv_rva23u64_xof_xor_partial_block:
// Collect groups of 4 words in v0, v4, v8, and v12.
vsetivli zero, 4, e32, m1, ta, ma
vslidedown.vx v0, v0, t2
Expand Down Expand Up @@ -1618,8 +1618,8 @@ blake3_guts_riscv64gcv_xof_xor_partial_block:
// a2: key
// a3: counter
// a4: out_ptr
.global blake3_guts_riscv64gcv_universal_hash
blake3_guts_riscv64gcv_universal_hash:
.global blake3_guts_riscv_rva23u64_universal_hash
blake3_guts_riscv_rva23u64_universal_hash:
// t0 := full_blocks := input_len / 64
srli t0, a1, 6
// Load and transpose full message blocks. These are "strided segment
Expand Down Expand Up @@ -1675,7 +1675,7 @@ universal_hash_partial_block_finished:
vmv.v.x v15, t1
// Execute the kernel.
mv t6, ra
call blake3_guts_riscv64gcv_kernel
call blake3_guts_riscv_rva23u64_kernel
mv ra, t6
// Finish the first four state vectors. The rest are dropped.
vxor.vv v0, v0, v8
Expand Down
30 changes: 15 additions & 15 deletions rust/guts/src/riscv64gcv.rs → rust/guts/src/riscv_rva23u64.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! This implementation currently assumes riscv64gcv_zbb_zvbb. Zvbb in particular ("Vector
//! This implementation currently assumes riscv_rva23u64_zbb_zvbb. Zvbb in particular ("Vector
//! Bit-manipulation used in Cryptography") is a bleeding-edge extension that was only frozen a few
//! weeks ago at the time I'm writing this comment. Compiling and testing this code currently
//! requires quite a lot of effort, including building Clang from master and building QEMU from a
Expand All @@ -10,31 +10,31 @@ use crate::{BlockBytes, CVBytes, Implementation};
pub(crate) const MAX_SIMD_DEGREE: usize = 16;

extern "C" {
fn blake3_guts_riscv64gcv_degree() -> usize;
fn blake3_guts_riscv64gcv_compress(
fn blake3_guts_riscv_rva23u64_degree() -> usize;
fn blake3_guts_riscv_rva23u64_compress(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut CVBytes,
);
fn blake3_guts_riscv64gcv_hash_chunks(
fn blake3_guts_riscv_rva23u64_hash_chunks(
input: *const u8,
input_len: usize,
key: *const CVBytes,
counter: u64,
flags: u32,
transposed_output: *mut u32,
);
fn blake3_guts_riscv64gcv_hash_parents(
fn blake3_guts_riscv_rva23u64_hash_parents(
transposed_input: *const u32,
num_parents: usize,
key: *const CVBytes,
flags: u32,
transposed_output: *mut u32,
);
fn blake3_guts_riscv64gcv_xof(
fn blake3_guts_riscv_rva23u64_xof(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
Expand All @@ -43,7 +43,7 @@ extern "C" {
out: *mut u8,
out_len: usize,
);
fn blake3_guts_riscv64gcv_xof_xor(
fn blake3_guts_riscv_rva23u64_xof_xor(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
Expand All @@ -52,7 +52,7 @@ extern "C" {
out: *mut u8,
out_len: usize,
);
fn blake3_guts_riscv64gcv_universal_hash(
fn blake3_guts_riscv_rva23u64_universal_hash(
input: *const u8,
input_len: usize,
key: *const CVBytes,
Expand All @@ -63,13 +63,13 @@ extern "C" {

pub fn implementation() -> Implementation {
Implementation::new(
blake3_guts_riscv64gcv_degree,
blake3_guts_riscv64gcv_compress,
blake3_guts_riscv64gcv_hash_chunks,
blake3_guts_riscv64gcv_hash_parents,
blake3_guts_riscv64gcv_xof,
blake3_guts_riscv64gcv_xof_xor,
blake3_guts_riscv64gcv_universal_hash,
blake3_guts_riscv_rva23u64_degree,
blake3_guts_riscv_rva23u64_compress,
blake3_guts_riscv_rva23u64_hash_chunks,
blake3_guts_riscv_rva23u64_hash_parents,
blake3_guts_riscv_rva23u64_xof,
blake3_guts_riscv_rva23u64_xof_xor,
blake3_guts_riscv_rva23u64_universal_hash,
)
}

Expand Down

0 comments on commit 999312e

Please sign in to comment.