zhenfeizhang · zhenfeizhang · Feb 26, 2024 · Feb 26, 2024 · Feb 26, 2024
diff --git a/README.md b/README.md
@@ -5,10 +5,17 @@ This repo implements
 - Goldilocks Field mod `2^64 - 2^32 + 1`
 - Goldilocks quadratic extension over `x^2 - 7`
 - Goldilocks cubic extension over `x^3 - x - 1`
+- AVX2 acceleration for core operation inMLE 
 
 Traits are compatible with `ff 0.13.0`.
 
 ### Benchmark
+Without AVX2
 ```
 cargo bench
+```
+
+With AVX2
+```
+RUSTFLAGS='-C target-feature=+avx2' cargo bench
 ```
diff --git a/benches/bench.rs b/benches/bench.rs
@@ -1,6 +1,9 @@
-use criterion::{criterion_group, criterion_main, Criterion};
+use ark_std::test_rng;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use ff::Field;
-use goldilocks::{ExtensionField, Goldilocks, GoldilocksExt2, GoldilocksExt3, SmallField};
+use goldilocks::{
+    EvalHelper, ExtensionField, Goldilocks, GoldilocksExt2, GoldilocksExt3, SmallField,
+};
 use halo2curves::bn256::Fr;
 use rand_core::SeedableRng;
 use rand_xorshift::XorShiftRng;
@@ -9,7 +12,107 @@ const SIZE: usize = 1000;
 
 criterion_main!(bench);
 
-criterion_group!(bench, bench_fields);
+criterion_group!(bench, bench_avx2, bench_sum_5, bench_fields);
+
+fn bench_sum_5(crit: &mut Criterion) {
+    let mut rng = test_rng();
+    let a = (0..SIZE)
+        .map(|_| Goldilocks::random(&mut rng))
+        .collect::<Vec<_>>();
+    let b = (0..SIZE)
+        .map(|_| Goldilocks::random(&mut rng))
+        .collect::<Vec<_>>();
+    let c = (0..SIZE)
+        .map(|_| Goldilocks::random(&mut rng))
+        .collect::<Vec<_>>();
+    let d = (0..SIZE)
+        .map(|_| Goldilocks::random(&mut rng))
+        .collect::<Vec<_>>();
+    let e = (0..SIZE)
+        .map(|_| Goldilocks::random(&mut rng))
+        .collect::<Vec<_>>();
+
+    let bench_str = format!("{} sum 5", SIZE);
+    crit.bench_function(&bench_str, |bencher| {
+        bencher.iter(|| {
+            a.iter()
+                .zip(b.iter().zip(c.iter().zip(d.iter().zip(e.iter()))))
+                .map(|(ai, (bi, (ci, (di, ei))))| Goldilocks::sum_5(ai, bi, ci, di, ei))
+                .collect::<Vec<_>>()
+        })
+    });
+}
+
+fn bench_avx2(c: &mut Criterion) {
+    let mut rng = test_rng();
+
+    {
+        let x_and_y = (0..SIZE << 1)
+            .map(|_| GoldilocksExt2::random(&mut rng))
+            .collect::<Vec<_>>();
+        let z = (0..SIZE)
+            .map(|_| GoldilocksExt2::random(&mut rng))
+            .collect::<Vec<_>>();
+
+        let id = "eval single non-avx2";
+        c.bench_function(id, |b| {
+            b.iter(|| {
+                black_box(x_and_y.chunks(2).zip(z.iter()).for_each(|(x_and_yi, zi)| {
+                    let _ = *zi * (x_and_yi[1] - x_and_yi[0]) + x_and_yi[0];
+                }))
+            })
+        });
+
+        let id = "eval single avx2";
+        c.bench_function(id, |b| {
+            b.iter(|| {
+                black_box(x_and_y.chunks(2).zip(z.iter()).for_each(|(x_and_yi, zi)| {
+                    let _ = <GoldilocksExt2 as EvalHelper>::eval_helper(x_and_yi, zi);
+                }))
+            })
+        });
+    }
+
+    {
+        let x_and_y = (0..SIZE << 1)
+            .map(|_| {
+                (0..8)
+                    .map(|_| GoldilocksExt2::random(&mut rng))
+                    .collect::<Vec<_>>()
+            })
+            .collect::<Vec<_>>();
+        let z = (0..SIZE)
+            .map(|_| {
+                (0..4)
+                    .map(|_| GoldilocksExt2::random(&mut rng))
+                    .collect::<Vec<_>>()
+            })
+            .collect::<Vec<_>>();
+
+        let id = "eval batch 4 non-avx2";
+        c.bench_function(id, |b| {
+            b.iter(|| {
+                black_box(x_and_y.iter().zip(z.iter()).for_each(|(x_and_ys, zis)| {
+                    x_and_ys
+                        .chunks(2)
+                        .zip(zis.iter())
+                        .for_each(|(x_and_y, &p)| {
+                            let _ = p * (x_and_y[1] - x_and_y[0]) + x_and_y[0];
+                        });
+                }))
+            })
+        });
+
+        let id = "eval batch 4  avx2";
+        c.bench_function(id, |b| {
+            b.iter(|| {
+                black_box(x_and_y.iter().zip(z.iter()).for_each(|(x_and_yi, zi)| {
+                    let _ = <GoldilocksExt2 as EvalHelper>::eval_helper_4(x_and_yi, zi);
+                }))
+            })
+        });
+    }
+}
 
 fn bench_fields(c: &mut Criterion) {
     bench_field::<Goldilocks>(c, <Goldilocks as SmallField>::NAME);

diff --git a/src/lib.rs b/src/lib.rs
@@ -15,6 +15,7 @@ mod util;
 pub use extfield::ExtensionField;
 pub use smallfield::SmallField;
 pub use structs::{Goldilocks, GoldilocksExt2, GoldilocksExt3, EPSILON, MODULUS};
+pub use util::EvalHelper;
 
 #[cfg(test)]
 mod tests;
diff --git a/src/primefield/fp.rs b/src/primefield/fp.rs
@@ -549,3 +549,36 @@ impl Goldilocks {
         }
     }
 }
+const MULTIPLE_OF_EPSILON: [u64; 8] = [
+    0,
+    0xffffffff,
+    0x1fffffffe,
+    0x2fffffffd,
+    0x3fffffffc,
+    0x4fffffffb,
+    0x5fffffffa,
+    0x6fffffff9,
+];
+
+impl Goldilocks {
+    #[inline]
+    pub fn sum_5(
+        a: &Goldilocks,
+        b: &Goldilocks,
+        c: &Goldilocks,
+        d: &Goldilocks,
+        e: &Goldilocks,
+    ) -> Self {
+        let (sum, over_b) = a.0.overflowing_add(b.0);
+        let (sum, over_c) = sum.overflowing_add(c.0);
+        let (sum, over_d) = sum.overflowing_add(d.0);
+        let (sum, over_e) = sum.overflowing_add(e.0);
+        let over = over_b as u8 + over_c as u8 + over_d as u8 + over_e as u8;
+
+        let (mut sum, over) = sum.overflowing_add(MULTIPLE_OF_EPSILON[over as usize]);
+        if over {
+            sum += EPSILON; // Cannot overflow.
+        }
+        Self(sum)
+    }
+}
diff --git a/src/util.rs b/src/util.rs
@@ -6,6 +6,11 @@ use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};
 
 use crate::{Goldilocks, MODULUS};
 
+mod avx2;
+mod mle_helper;
+
+pub use mle_helper::EvalHelper;
+
 #[inline(always)]
 pub fn assume(p: bool) {
     debug_assert!(p);