Skip to content

Commit

Permalink
Precompute twiddles
Browse files Browse the repository at this point in the history
  • Loading branch information
spapinistarkware committed Mar 13, 2024
1 parent 39b9c6c commit b0988f6
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 51 deletions.
20 changes: 18 additions & 2 deletions src/core/backend/avx512/circle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,21 @@ use super::{as_cpu_vec, AVX512Backend, VECS_LOG_SIZE};
use crate::core::backend::avx512::fft::rfft;
use crate::core::backend::avx512::BaseFieldVec;
use crate::core::backend::CPUBackend;
use crate::core::circle::CirclePoint;
use crate::core::circle::{CirclePoint, Coset};
use crate::core::fields::m31::BaseField;
use crate::core::fields::{Col, ExtensionOf, FieldExpOps};
use crate::core::poly::circle::{
CanonicCoset, CircleDomain, CircleEvaluation, CirclePoly, PolyOps,
};
use crate::core::poly::twiddles::TwiddleTree;
use crate::core::poly::utils::fold;
use crate::core::poly::BitReversedOrder;

// TODO(spapini): Everything is returned in redundant representation, where values can also be P.
// Decide if and when it's ok and what to do if it's not.
impl PolyOps for AVX512Backend {
type Twiddles = ();

fn new_canonical_ordered(
coset: CanonicCoset,
values: Col<Self, BaseField>,
Expand All @@ -27,7 +30,10 @@ impl PolyOps for AVX512Backend {
CircleEvaluation::new(eval.domain, Col::<AVX512Backend, _>::from_iter(eval.values))
}

fn interpolate(eval: CircleEvaluation<Self, BaseField, BitReversedOrder>) -> CirclePoly<Self> {
fn interpolate(
eval: CircleEvaluation<Self, BaseField, BitReversedOrder>,
_itwiddles: &TwiddleTree<Self>,
) -> CirclePoly<Self> {
let mut values = eval.values;
let log_size = values.length.ilog2();

Expand Down Expand Up @@ -70,6 +76,7 @@ impl PolyOps for AVX512Backend {
}
mappings.reverse();

// If the polynomial is large, the fft does a transpose in the middle.
if poly.log_size() as usize > CACHED_FFT_LOG_SIZE {
let n = mappings.len();
let n0 = (n - VECS_LOG_SIZE) / 2;
Expand All @@ -91,6 +98,7 @@ impl PolyOps for AVX512Backend {
fn evaluate(
poly: &CirclePoly<Self>,
domain: CircleDomain,
_twiddles: &TwiddleTree<Self>,
) -> CircleEvaluation<Self, BaseField, BitReversedOrder> {
// TODO(spapini): Precompute twiddles.
// TODO(spapini): Handle small cases.
Expand Down Expand Up @@ -140,6 +148,14 @@ impl PolyOps for AVX512Backend {
},
)
}

fn precompute_twiddles(coset: Coset) -> TwiddleTree<Self> {
TwiddleTree {
root_coset: coset,
twiddles: (),
itwiddles: (),
}
}
}

#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
Expand Down
6 changes: 4 additions & 2 deletions src/core/backend/avx512/fft/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,13 @@ unsafe fn compute_first_twiddles(twiddle1_dbl: [i32; 8]) -> (__m512i, __m512i) {
// 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
let t1 = _mm512_broadcast_i64x4(std::mem::transmute(twiddle1_dbl));

// The twiddles for layer 0 can be computed from the twiddles for layer 1:
// The twiddles for layer 0 can be computed from the twiddles for layer 1.
// Since the twiddles are bit reversed, we consider the circle domain in bit reversed order.
// Each consecutive 4 points in the bit reversed order of a coset form a circle coset of size 4.
// A circle coset of size 4 in bit reversed order looks like this:
// [(x, y), (-x, -y), (y, -x), (-y, x)]
// Note: This is related to the choice of M31_CIRCLE_GEN, and the fact the a quarter rotation
// is (0,-1) and not (0,1). This would cause another relation.
// is (0,-1) and not (0,1). (0,1) would yield another relation.
// The twiddles for layer 0 are the y coordinates:
// [y, -y, -x, x]
// The twiddles for layer 1 in bit reversed order are the x coordinates:
Expand Down
173 changes: 130 additions & 43 deletions src/core/backend/cpu/circle.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,21 @@
use num_traits::Zero;

use super::CPUBackend;
use crate::core::circle::CirclePoint;
use crate::core::circle::{CirclePoint, Coset};
use crate::core::fft::{butterfly, ibutterfly};
use crate::core::fields::m31::BaseField;
use crate::core::fields::{Col, ExtensionOf, FieldExpOps, FieldOps};
use crate::core::poly::circle::{
CanonicCoset, CircleDomain, CircleEvaluation, CirclePoly, PolyOps,
};
use crate::core::poly::twiddles::TwiddleTree;
use crate::core::poly::utils::fold;
use crate::core::poly::BitReversedOrder;
use crate::core::utils::bit_reverse;

fn get_twiddles(domain: CircleDomain) -> Vec<Vec<BaseField>> {
let mut coset = domain.half_coset;

let mut res = vec![];
res.push(coset.iter().map(|p| (p.y)).collect::<Vec<_>>());
bit_reverse(res.last_mut().unwrap());
for _ in 0..coset.log_size() {
res.push(
coset
.iter()
.take(coset.size() / 2)
.map(|p| (p.x))
.collect::<Vec<_>>(),
);
bit_reverse(res.last_mut().unwrap());
coset = coset.double();
}

res
}

impl PolyOps for CPUBackend {
type Twiddles = Vec<BaseField>;

fn new_canonical_ordered(
coset: CanonicCoset,
values: Col<Self, BaseField>,
Expand All @@ -52,19 +34,35 @@ impl PolyOps for CPUBackend {
CircleEvaluation::new(domain, new_values)
}

fn interpolate(eval: CircleEvaluation<Self, BaseField, BitReversedOrder>) -> CirclePoly<Self> {
let twiddles = get_twiddles(eval.domain);

fn interpolate(
eval: CircleEvaluation<Self, BaseField, BitReversedOrder>,
twiddles: &TwiddleTree<Self>,
) -> CirclePoly<Self> {
let mut values = eval.values;
for (i, layer_twiddles) in twiddles.iter().enumerate() {

assert!(eval.domain.half_coset.is_doubling_of(twiddles.root_coset));
let line_twiddles = domain_line_twiddles_from_tree(eval.domain, &twiddles.itwiddles);

if eval.domain.log_size() == 1 {
let (mut val0, mut val1) = (values[0], values[1]);
ibutterfly(
&mut val0,
&mut val1,
eval.domain.half_coset.initial.y.inverse(),
);
let inv = BaseField::from_u32_unchecked(2).inverse();
(values[0], values[1]) = (val0 * inv, val1 * inv);
return CirclePoly::new(values);
};

let circle_twiddles = circle_twiddles_from_line_twiddles(line_twiddles[0]);

for (h, t) in circle_twiddles.enumerate() {
fft_layer_loop(&mut values, 0, h, t, ibutterfly);
}
for (layer, layer_twiddles) in line_twiddles.into_iter().enumerate() {
for (h, &t) in layer_twiddles.iter().enumerate() {
for l in 0..(1 << i) {
let idx0 = (h << (i + 1)) + l;
let idx1 = idx0 + (1 << i);
let (mut val0, mut val1) = (values[idx0], values[idx1]);
ibutterfly(&mut val0, &mut val1, t.inverse());
(values[idx0], values[idx1]) = (val0, val1);
}
fft_layer_loop(&mut values, layer + 1, h, t, ibutterfly);
}
}

Expand Down Expand Up @@ -103,23 +101,112 @@ impl PolyOps for CPUBackend {
fn evaluate(
poly: &CirclePoly<Self>,
domain: CircleDomain,
twiddles: &TwiddleTree<Self>,
) -> CircleEvaluation<Self, BaseField, BitReversedOrder> {
let twiddles = get_twiddles(domain);

let mut values = poly.extend(domain.log_size()).coeffs;
for (i, layer_twiddles) in twiddles.iter().enumerate().rev() {

assert!(domain.half_coset.is_doubling_of(twiddles.root_coset));
let line_twiddles = domain_line_twiddles_from_tree(domain, &twiddles.twiddles);

if domain.log_size() == 1 {
let (mut val0, mut val1) = (values[0], values[1]);
butterfly(&mut val0, &mut val1, domain.half_coset.initial.y.inverse());
return CircleEvaluation::new(domain, values);
};

let circle_twiddles = circle_twiddles_from_line_twiddles(line_twiddles[0]);

for (layer, layer_twiddles) in line_twiddles.iter().enumerate().rev() {
for (h, &t) in layer_twiddles.iter().enumerate() {
for l in 0..(1 << i) {
let idx0 = (h << (i + 1)) + l;
let idx1 = idx0 + (1 << i);
let (mut val0, mut val1) = (values[idx0], values[idx1]);
butterfly(&mut val0, &mut val1, t);
(values[idx0], values[idx1]) = (val0, val1);
}
fft_layer_loop(&mut values, layer + 1, h, t, butterfly);
}
}
for (h, t) in circle_twiddles.enumerate() {
fft_layer_loop(&mut values, 0, h, t, butterfly);
}

CircleEvaluation::new(domain, values)
}

fn precompute_twiddles(mut coset: Coset) -> TwiddleTree<Self> {
let root_coset = coset;
let mut twiddles = Vec::with_capacity(coset.size());
for _ in 0..coset.log_size() {
let i0 = twiddles.len();
twiddles.extend(
coset
.iter()
.take(coset.size() / 2)
.map(|p| p.x)
.collect::<Vec<_>>(),
);
bit_reverse(&mut twiddles[i0..]);
coset = coset.double();
}
twiddles.push(1.into());

// TODO(spapini): Batch inverse.
let itwiddles = twiddles.iter().map(|&t| t.inverse()).collect();

TwiddleTree {
root_coset,
twiddles,
itwiddles,
}
}
}

/// Computes the line twiddles for a [CircleDomain] from the precomputed twiddles tree.
fn domain_line_twiddles_from_tree(
domain: CircleDomain,
twiddle_buffer: &[BaseField],
) -> Vec<&[BaseField]> {
(0..domain.half_coset.log_size())
.map(|i| {
let len = 1 << i;
&twiddle_buffer[twiddle_buffer.len() - len * 2..twiddle_buffer.len() - len]
})
.rev()
.collect()
}

fn fft_layer_loop(
values: &mut [BaseField],
i: usize,
h: usize,
t: BaseField,
butterfly_fn: impl Fn(&mut BaseField, &mut BaseField, BaseField),
) {
for l in 0..(1 << i) {
let idx0 = (h << (i + 1)) + l;
let idx1 = idx0 + (1 << i);
let (mut val0, mut val1) = (values[idx0], values[idx1]);
butterfly_fn(&mut val0, &mut val1, t);
(values[idx0], values[idx1]) = (val0, val1);
}
}

/// Computes the circle twiddles layer (layer 0) from the first line twiddles layer (layer 1).
fn circle_twiddles_from_line_twiddles(
first_line_twiddles: &[BaseField],
) -> impl Iterator<Item = BaseField> + '_ {
// The twiddles for layer 0 can be computed from the twiddles for layer 1.
// Since the twiddles are bit reversed, we consider the circle domain in bit reversed order.
// Each consecutive 4 points in the bit reversed order of a coset form a circle coset of size 4.
// A circle coset of size 4 in bit reversed order looks like this:
// [(x, y), (-x, -y), (y, -x), (-y, x)]
// Note: This relation is derived from the fact that `M31_CIRCLE_GEN`.repeated_double(ORDER / 4)
// == (-1,0), and not (0,1). (0,1) would yield another relation.
// The twiddles for layer 0 are the y coordinates:
// [y, -y, -x, x]
// The twiddles for layer 1 in bit reversed order are the x coordinates of the even indices
// points:
// [x, y]
// Works also for inverse of the twiddles.
first_line_twiddles
.iter()
.array_chunks()
.flat_map(|[&x, &y]| [y, -y, -x, x])
}

impl<F: ExtensionOf<BaseField>, EvalOrder> IntoIterator
Expand Down
9 changes: 9 additions & 0 deletions src/core/circle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,15 @@ impl Coset {
}
}

pub fn repeated_double(&self, n_doubles: u32) -> Self {
(0..n_doubles).fold(*self, |coset, _| coset.double())
}

pub fn is_doubling_of(&self, other: Self) -> bool {
self.log_size <= other.log_size
&& *self == other.repeated_double(other.log_size - self.log_size)
}

pub fn initial(&self) -> CirclePoint<M31> {
self.initial
}
Expand Down
10 changes: 9 additions & 1 deletion src/core/poly/circle/evaluation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::core::backend::cpu::CPUCircleEvaluation;
use crate::core::circle::{CirclePointIndex, Coset};
use crate::core::fields::m31::BaseField;
use crate::core::fields::{Col, Column, ExtensionOf, FieldOps};
use crate::core::poly::twiddles::TwiddleTree;
use crate::core::poly::{BitReversedOrder, NaturalOrder};
use crate::core::utils::bit_reverse_index;

Expand Down Expand Up @@ -76,7 +77,14 @@ impl<B: PolyOps> CircleEvaluation<B, BaseField, BitReversedOrder> {

/// Computes a minimal [CirclePoly] that evaluates to the same values as this evaluation.
pub fn interpolate(self) -> CirclePoly<B> {
B::interpolate(self)
let coset = self.domain.half_coset;
B::interpolate(self, &B::precompute_twiddles(coset))
}

/// Computes a minimal [CirclePoly] that evaluates to the same values as this evaluation, using
/// precomputed twiddles.
pub fn interpolate_with_twiddles(self, twiddles: &TwiddleTree<B>) -> CirclePoly<B> {
B::interpolate(self, twiddles)
}
}

Expand Down
16 changes: 14 additions & 2 deletions src/core/poly/circle/ops.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
use super::{CanonicCoset, CircleDomain, CircleEvaluation, CirclePoly};
use crate::core::circle::CirclePoint;
use crate::core::circle::{CirclePoint, Coset};
use crate::core::fields::m31::BaseField;
use crate::core::fields::{Col, ExtensionOf, FieldOps};
use crate::core::poly::twiddles::TwiddleTree;
use crate::core::poly::BitReversedOrder;

/// Operations on BaseField polynomials.
pub trait PolyOps: FieldOps<BaseField> + Sized {
// TODO(spapini): Use a column instead of this type.
/// The type for precomputed twiddles.
type Twiddles;

/// Creates a [CircleEvaluation] from values ordered according to [CanonicCoset].
/// Used by the [`CircleEvaluation::new_canonical_ordered()`] function.
fn new_canonical_ordered(
Expand All @@ -15,7 +20,10 @@ pub trait PolyOps: FieldOps<BaseField> + Sized {

/// Computes a minimal [CirclePoly] that evaluates to the same values as this evaluation.
/// Used by the [`CircleEvaluation::interpolate()`] function.
fn interpolate(eval: CircleEvaluation<Self, BaseField, BitReversedOrder>) -> CirclePoly<Self>;
fn interpolate(
eval: CircleEvaluation<Self, BaseField, BitReversedOrder>,
itwiddles: &TwiddleTree<Self>,
) -> CirclePoly<Self>;

/// Evaluates the polynomial at a single point.
/// Used by the [`CirclePoly::eval_at_point()`] function.
Expand All @@ -33,5 +41,9 @@ pub trait PolyOps: FieldOps<BaseField> + Sized {
fn evaluate(
poly: &CirclePoly<Self>,
domain: CircleDomain,
twiddles: &TwiddleTree<Self>,
) -> CircleEvaluation<Self, BaseField, BitReversedOrder>;

/// Precomputes twiddles for a given coset.
fn precompute_twiddles(coset: Coset) -> TwiddleTree<Self>;
}
Loading

0 comments on commit b0988f6

Please sign in to comment.