Skip to content

Commit

Permalink
feat: add x64 simd for sparse vector operations (#417)
Browse files Browse the repository at this point in the history
* feat: support sparse vector with 1000000 dims

Signed-off-by: Mingzhuo Yin <[email protected]>

* change max dims to 1_048_576

Signed-off-by: Mingzhuo Yin <[email protected]>

* change max dims to 1_048_575

Signed-off-by: Mingzhuo Yin <[email protected]>

* update PQ dims to u32

Signed-off-by: Mingzhuo Yin <[email protected]>

* cargo clippy

Signed-off-by: Mingzhuo Yin <[email protected]>

* update parse function to previous impl

Signed-off-by: Mingzhuo Yin <[email protected]>

* feat: add simd for sparse dot distance

Signed-off-by: Mingzhuo Yin <[email protected]>

* update sparse dot

Signed-off-by: Mingzhuo Yin <[email protected]>

* add simd for cosine and sl2 of sparse vector

Signed-off-by: Mingzhuo Yin <[email protected]>

* update cosine to previous impl

Signed-off-by: Mingzhuo Yin <[email protected]>

* support aarch64

Signed-off-by: Mingzhuo Yin <[email protected]>

* typo

Signed-off-by: Mingzhuo Yin <[email protected]>

* change std_detect to tensorchord/stdarch

Signed-off-by: Mingzhuo Yin <[email protected]>

* remove native vp2intersect

Signed-off-by: Mingzhuo Yin <[email protected]>

---------

Signed-off-by: Mingzhuo Yin <[email protected]>
  • Loading branch information
silver-ymz authored Mar 13, 2024
1 parent 71e1815 commit bd7a0a6
Show file tree
Hide file tree
Showing 8 changed files with 619 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions crates/base/src/vector/bvecf32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ pub fn cosine<'a>(lhs: BVecf32Borrowed<'a>, rhs: BVecf32Borrowed<'a>) -> F32 {
"x86_64/x86-64-v2",
"aarch64+neon"
))]
pub fn cosine(lhs: &[usize], rhs: &[usize]) -> F32 {
fn cosine(lhs: &[usize], rhs: &[usize]) -> F32 {
let mut xy = 0;
let mut xx = 0;
let mut yy = 0;
Expand All @@ -191,6 +191,7 @@ pub fn cosine<'a>(lhs: BVecf32Borrowed<'a>, rhs: BVecf32Borrowed<'a>) -> F32 {
F32(rxy / (rxx * ryy).sqrt())
}

#[inline]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx512vpopcntdq,avx512bw,avx512f,bmi2")]
unsafe fn cosine_avx512vpopcntdq(lhs: &[usize], rhs: &[usize]) -> F32 {
Expand Down Expand Up @@ -266,14 +267,15 @@ pub fn dot<'a>(lhs: BVecf32Borrowed<'a>, rhs: BVecf32Borrowed<'a>) -> F32 {
"x86_64/x86-64-v2",
"aarch64+neon"
))]
pub fn dot(lhs: &[usize], rhs: &[usize]) -> F32 {
fn dot(lhs: &[usize], rhs: &[usize]) -> F32 {
let mut xy = 0;
for i in 0..lhs.len() {
xy += (lhs[i] & rhs[i]).count_ones();
}
F32(xy as f32)
}

#[inline]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx512vpopcntdq,avx512bw,avx512f,bmi2")]
unsafe fn dot_avx512vpopcntdq(lhs: &[usize], rhs: &[usize]) -> F32 {
Expand Down Expand Up @@ -341,14 +343,15 @@ pub fn sl2<'a>(lhs: BVecf32Borrowed<'a>, rhs: BVecf32Borrowed<'a>) -> F32 {
"x86_64/x86-64-v2",
"aarch64+neon"
))]
pub fn sl2(lhs: &[usize], rhs: &[usize]) -> F32 {
fn sl2(lhs: &[usize], rhs: &[usize]) -> F32 {
let mut dd = 0;
for i in 0..lhs.len() {
dd += (lhs[i] ^ rhs[i]).count_ones();
}
F32(dd as f32)
}

#[inline]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx512vpopcntdq,avx512bw,avx512f,bmi2")]
unsafe fn sl2_avx512vpopcntdq(lhs: &[usize], rhs: &[usize]) -> F32 {
Expand Down Expand Up @@ -416,7 +419,7 @@ pub fn jaccard<'a>(lhs: BVecf32Borrowed<'a>, rhs: BVecf32Borrowed<'a>) -> F32 {
"x86_64/x86-64-v2",
"aarch64+neon"
))]
pub fn jaccard(lhs: &[usize], rhs: &[usize]) -> F32 {
fn jaccard(lhs: &[usize], rhs: &[usize]) -> F32 {
let mut inter = 0;
let mut union = 0;
for i in 0..lhs.len() {
Expand All @@ -426,6 +429,7 @@ pub fn jaccard<'a>(lhs: BVecf32Borrowed<'a>, rhs: BVecf32Borrowed<'a>) -> F32 {
F32(inter as f32 / union as f32)
}

#[inline]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx512vpopcntdq,avx512bw,avx512f,bmi2")]
unsafe fn jaccard_avx512vpopcntdq(lhs: &[usize], rhs: &[usize]) -> F32 {
Expand Down Expand Up @@ -503,6 +507,7 @@ pub fn length(vector: BVecf32Borrowed<'_>) -> F32 {
F32(l as f32).sqrt()
}

#[inline]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx512vpopcntdq,avx512bw,avx512f,bmi2")]
unsafe fn length_avx512vpopcntdq(lhs: &[usize]) -> F32 {
Expand Down
Loading

0 comments on commit bd7a0a6

Please sign in to comment.