Skip to content

Commit

Permalink
WideFib test with AVX Backend
Browse files Browse the repository at this point in the history
  • Loading branch information
spapinistarkware committed Mar 28, 2024
1 parent 0a61546 commit 34a5a50
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 34 deletions.
1 change: 1 addition & 0 deletions src/commitment_scheme/blake2_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::fmt;
use blake2::{Blake2s256, Digest};

// Wrapper for the blake2s hash type.
#[repr(align(32))]
#[derive(Clone, Copy, PartialEq, Default, Eq)]
pub struct Blake2sHash([u8; 32]);

Expand Down
7 changes: 5 additions & 2 deletions src/core/air/accumulation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ impl<B: Backend> DomainEvaluationAccumulator<B> {
.zip(self.n_cols_per_size.iter())
.skip(1)
{
if *n_cols == 0 {
continue;
}
let coeffs = SecureColumn::<B> {
columns: values.columns.map(|c| {
CircleEvaluation::<B, BaseField, BitReversedOrder>::new(
Expand All @@ -162,8 +165,8 @@ impl<B: Backend> DomainEvaluationAccumulator<B> {

/// An domain accumulator for polynomials of a single size.
pub struct ColumnAccumulator<'a, B: Backend> {
random_coeff_pow: SecureField,
col: &'a mut SecureColumn<B>,
pub random_coeff_pow: SecureField,
pub col: &'a mut SecureColumn<B>,
}
impl<'a> ColumnAccumulator<'a, CPUBackend> {
pub fn accumulate(&mut self, index: usize, evaluation: SecureField) {
Expand Down
2 changes: 1 addition & 1 deletion src/core/backend/avx512/accumulation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::core::fields::secure_column::SecureColumn;
impl AccumulationOps for AVX512Backend {
fn accumulate(column: &mut SecureColumn<Self>, alpha: SecureField, other: &SecureColumn<Self>) {
let alpha = PackedQM31::broadcast(alpha);
for i in 0..column.len() {
for i in 0..column.n_packs() {
let res_coeff = column.packed_at(i) * alpha + other.packed_at(i);
column.set_packed(i, res_coeff);
}
Expand Down
37 changes: 20 additions & 17 deletions src/core/backend/avx512/blake2s.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use std::arch::x86_64::__m512i;
use std::arch::x86_64::{__m512i, _mm512_loadu_si512};

use itertools::Itertools;

use super::blake2s_avx::{compress16, set1, transpose_msgs, untranspose_states};
use super::{AVX512Backend, VECS_LOG_SIZE};
use crate::commitment_scheme::blake2_hash::Blake2sHash;
use crate::commitment_scheme::blake2_merkle::Blake2sMerkleHasher;
use crate::commitment_scheme::ops::MerkleOps;
use crate::core::backend::{Col, ColumnOps};
use crate::commitment_scheme::ops::{MerkleHasher, MerkleOps};
use crate::core::backend::{Col, Column, ColumnOps};
use crate::core::fields::m31::BaseField;

impl ColumnOps<Blake2sHash> for AVX512Backend {
Expand All @@ -25,19 +25,20 @@ impl MerkleOps<Blake2sMerkleHasher> for AVX512Backend {
columns: &[&Col<AVX512Backend, BaseField>],
) -> Vec<Blake2sHash> {
// Pad prev_layer if too small.
let mut padded_buffer = vec![];
let prev_layer = if log_size < 4 {
prev_layer.map(|prev_layer| {
padded_buffer = prev_layer
.iter()
.copied()
.chain(std::iter::repeat(Blake2sHash::default()))
.collect_vec();
&padded_buffer
})
} else {
prev_layer
};
if log_size < 4 {
return (0..(1 << log_size))
.map(|i| {
Blake2sMerkleHasher::hash_node(
prev_layer.map(|prev_layer| (prev_layer[2 * i], prev_layer[2 * i + 1])),
&columns.iter().map(|column| column.at(i)).collect_vec(),
)
})
.collect();
}

if let Some(prev_layer) = prev_layer {
assert_eq!(prev_layer.len(), 1 << (log_size + 1));
}

// Commit to columns.
let mut res = Vec::with_capacity(1 << log_size);
Expand All @@ -46,7 +47,9 @@ impl MerkleOps<Blake2sMerkleHasher> for AVX512Backend {
// Hash prev_layer.
if let Some(prev_layer) = prev_layer {
let ptr = prev_layer[(i << 5)..(i << 5) + 32].as_ptr() as *const __m512i;
let msgs: [__m512i; 16] = std::array::from_fn(|j| unsafe { *ptr.add(j) });
let msgs: [__m512i; 16] = std::array::from_fn(|j| unsafe {
_mm512_loadu_si512(ptr.add(j) as *const i32)
});
state = unsafe {
compress16(
state,
Expand Down
4 changes: 4 additions & 0 deletions src/core/backend/avx512/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ impl FromIterator<BaseField> for BaseFieldVec {
}

impl SecureColumn<AVX512Backend> {
pub fn n_packs(&self) -> usize {
self.columns[0].data.len()
}

pub fn packed_at(&self, vec_index: usize) -> PackedQM31 {
unsafe {
PackedQM31([
Expand Down
167 changes: 167 additions & 0 deletions src/examples/wide_fibonacci/avx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
use itertools::Itertools;
use num_traits::One;

use super::structs::WideFibComponent;
use crate::core::air::accumulation::{DomainEvaluationAccumulator, PointEvaluationAccumulator};
use crate::core::air::{Air, Component, ComponentTrace, Mask};
use crate::core::backend::avx512::qm31::PackedQM31;
use crate::core::backend::avx512::{AVX512Backend, PackedBaseField, VECS_LOG_SIZE};
use crate::core::backend::{Col, Column};
use crate::core::circle::{CirclePoint, Coset};
use crate::core::constraints::coset_vanishing;
use crate::core::fields::m31::BaseField;
use crate::core::fields::qm31::SecureField;
use crate::core::fields::FieldExpOps;
use crate::core::poly::circle::{CanonicCoset, CircleEvaluation};
use crate::core::poly::BitReversedOrder;
use crate::core::utils::bit_reverse_index;
use crate::core::ColumnVec;

const N_COLS: usize = 1 << 8;

pub struct WideFibAir {
component: WideFibComponent,
}
impl Air<AVX512Backend> for WideFibAir {
fn components(&self) -> Vec<&dyn Component<AVX512Backend>> {
vec![&self.component]
}
}

pub fn gen_trace(
log_size: usize,
) -> ColumnVec<CircleEvaluation<AVX512Backend, BaseField, BitReversedOrder>> {
assert!(log_size >= VECS_LOG_SIZE);
let mut trace = (0..N_COLS)
.map(|_| Col::<AVX512Backend, BaseField>::zeros(1 << log_size))
.collect_vec();
for vec_index in 0..(1 << (log_size - VECS_LOG_SIZE)) {
let mut a = PackedBaseField::one();
let mut b = PackedBaseField::one();
trace[0].data[vec_index] = a;
trace[1].data[vec_index] = b;
trace.iter_mut().take(log_size).skip(2).for_each(|col| {
(a, b) = (b, a.square() + b.square());
col.data[vec_index] = b;
});
}
let domain = CanonicCoset::new(log_size as u32).circle_domain();
trace
.into_iter()
.map(|eval| CircleEvaluation::<AVX512Backend, _, BitReversedOrder>::new(domain, eval))
.collect_vec()
}

impl Component<AVX512Backend> for WideFibComponent {
fn max_constraint_log_degree_bound(&self) -> u32 {
self.log_size + 1
}

fn trace_log_degree_bounds(&self) -> Vec<u32> {
vec![self.log_size; N_COLS]
}

fn evaluate_constraint_quotients_on_domain(
&self,
trace: &ComponentTrace<'_, AVX512Backend>,
evaluation_accumulator: &mut DomainEvaluationAccumulator<AVX512Backend>,
) {
assert_eq!(trace.columns.len(), N_COLS);
// TODO(spapini): Steal evaluation from commitment.
let eval_domain = CanonicCoset::new(self.log_size + 1).circle_domain();
let trace_eval = trace
.columns
.iter()
.map(|poly| poly.evaluate(eval_domain))
.collect_vec();
let random_coeff = PackedQM31::broadcast(evaluation_accumulator.random_coeff);
let column_coeffs = (0..N_COLS)
.scan(PackedQM31::one(), |state, _| {
let res = *state;
*state *= random_coeff;
Some(res)
})
.collect_vec();

let constraint_log_degree_bound = self.log_size + 1;
let [accum] = evaluation_accumulator.columns([(constraint_log_degree_bound, N_COLS - 2)]);

for vec_row in 0..(1 << (eval_domain.log_size() - VECS_LOG_SIZE as u32)) {
// Numerator.
let mut row_res = PackedQM31::zero();
let mut a = trace_eval[0].data[vec_row];
let mut b = trace_eval[1].data[vec_row];
#[allow(clippy::needless_range_loop)]
for i in 0..(N_COLS - 2) {
unsafe {
let c = *trace_eval.get_unchecked(i + 2).data.get_unchecked(vec_row);
row_res = row_res + column_coeffs[i] * (a.square() + b.square() - c);
(a, b) = (b, c);
}
}

// Denominator.
// TODO(spapini): Optimized this, for the small number of columns case.
let points = std::array::from_fn(|i| {
eval_domain.at(bit_reverse_index(
(vec_row << VECS_LOG_SIZE) + i,
eval_domain.log_size(),
) + 1)
});
let mut shifted_xs = PackedBaseField::from_array(points.map(|p| p.x));
for _ in 1..self.log_size {
shifted_xs = shifted_xs.square() - PackedBaseField::one();
}

accum.col.set_packed(
vec_row,
accum.col.packed_at(vec_row) * PackedQM31::broadcast(accum.random_coeff_pow)
+ row_res,
)
}
}

fn mask(&self) -> Mask {
Mask(vec![vec![0]; N_COLS])
}

fn evaluate_constraint_quotients_at_point(
&self,
point: CirclePoint<SecureField>,
mask: &ColumnVec<Vec<SecureField>>,
evaluation_accumulator: &mut PointEvaluationAccumulator,
) {
let constraint_zero_domain = Coset::subgroup(self.log_size);
let constraint_log_degree_bound = self.log_size + 1;
for i in 0..(N_COLS - 2) {
let numerator = mask[i][0].square() + mask[i + 1][0].square() - mask[i + 2][0];
let denominator = coset_vanishing(constraint_zero_domain, point);
evaluation_accumulator.accumulate(constraint_log_degree_bound, numerator / denominator);
}
}
}

#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
#[cfg(test)]
mod tests {
use crate::commitment_scheme::blake2_hash::Blake2sHasher;
use crate::commitment_scheme::hasher::Hasher;
use crate::core::channel::{Blake2sChannel, Channel};
use crate::core::fields::m31::BaseField;
use crate::core::fields::IntoSlice;
use crate::core::prover::prove;
use crate::examples::wide_fibonacci::avx::{gen_trace, WideFibAir};
use crate::examples::wide_fibonacci::structs::WideFibComponent;

#[test]
fn test_avx_wide_fib_prove() {
// TODO(spapini): Increase to 20, to get 1GB of trace.
const LOG_SIZE: u32 = 12;
let component = WideFibComponent { log_size: LOG_SIZE };
let air = WideFibAir { component };
let trace = gen_trace(LOG_SIZE as usize);
let channel = &mut Blake2sChannel::new(Blake2sHasher::hash(BaseField::into_slice(&[])));
// TODO(spapini): Fix the constraints.
prove(&air, channel, trace).unwrap_err();
}
}
18 changes: 6 additions & 12 deletions src/examples/wide_fibonacci/constraint_eval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ impl Component<CPUBackend> for WideFibComponent {
trace: &ComponentTrace<'_, CPUBackend>,
evaluation_accumulator: &mut DomainEvaluationAccumulator<CPUBackend>,
) {
let constraint_log_degree = Component::<CPUBackend>::max_constraint_log_degree_bound(self);
let mut trace_evals = vec![];
// TODO(ShaharS), Share this LDE with the commitment LDE.
for poly_index in 0..64 {
let poly = &trace.columns[poly_index];
let trace_eval_domain =
CanonicCoset::new(self.max_constraint_log_degree_bound()).circle_domain();
let trace_eval_domain = CanonicCoset::new(constraint_log_degree).circle_domain();
trace_evals.push(poly.evaluate(trace_eval_domain).bit_reverse());
}
let zero_domain = CanonicCoset::new(self.log_size).coset;
Expand All @@ -47,14 +47,11 @@ impl Component<CPUBackend> for WideFibComponent {
for point in eval_domain.iter() {
denoms.push(coset_vanishing(zero_domain, point));
}
let mut denom_inverses =
vec![BaseField::zero(); 1 << (self.max_constraint_log_degree_bound())];
let mut denom_inverses = vec![BaseField::zero(); 1 << (constraint_log_degree)];
BaseField::batch_inverse(&denoms, &mut denom_inverses);
let mut numerators =
vec![SecureField::zero(); 1 << (self.max_constraint_log_degree_bound())];
let mut numerators = vec![SecureField::zero(); 1 << (constraint_log_degree)];
let random_coeff = evaluation_accumulator.random_coeff;
let [mut accum] =
evaluation_accumulator.columns([(self.max_constraint_log_degree_bound(), 64)]);
let [mut accum] = evaluation_accumulator.columns([(constraint_log_degree, 64)]);
for (i, point_index) in eval_domain.iter_indices().enumerate() {
numerators[i] = numerators[i] * random_coeff
+ (trace_evals[2].get_at(point_index)
Expand Down Expand Up @@ -420,10 +417,7 @@ impl Component<CPUBackend> for WideFibComponent {
* trace_evals[62].get_at(point_index))));
}
for (i, (num, denom)) in numerators.iter().zip(denom_inverses.iter()).enumerate() {
accum.accumulate(
bit_reverse_index(i, self.max_constraint_log_degree_bound()),
*num * *denom,
);
accum.accumulate(bit_reverse_index(i, constraint_log_degree), *num * *denom);
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/examples/wide_fibonacci/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod avx;
pub mod constraint_eval;
pub mod structs;
pub mod trace_asserts;
Expand All @@ -14,6 +15,7 @@ mod tests {
use crate::core::air::accumulation::DomainEvaluationAccumulator;
use crate::core::air::{Component, ComponentTrace};
use crate::core::backend::cpu::CPUCircleEvaluation;
use crate::core::backend::CPUBackend;
use crate::core::fields::m31::BaseField;
use crate::core::fields::qm31::QM31;
use crate::core::poly::circle::CanonicCoset;
Expand Down Expand Up @@ -73,8 +75,9 @@ mod tests {

let res = acc.finalize();
let poly = res.0[0].clone();
for coeff in
poly.coeffs[(1 << (wide_fib.max_constraint_log_degree_bound() - 1)) + 1..].iter()
for coeff in poly.coeffs
[(1 << (Component::<CPUBackend>::max_constraint_log_degree_bound(&wide_fib) - 1)) + 1..]
.iter()
{
assert_eq!(*coeff, BaseField::zero());
}
Expand Down

0 comments on commit 34a5a50

Please sign in to comment.