Merge pull request #2 from valida-xyz/morgan/parallelism

Enable parallelism
valida-xyz · May 2, 2024 · bdd338d · bdd338d
2 parents 3c89583 + 01f6715
commit bdd338d
Show file tree

Hide file tree

Showing 6 changed files with 62 additions and 50 deletions.
diff --git a/dft/src/radix_2_bowers.rs b/dft/src/radix_2_bowers.rs
@@ -4,7 +4,6 @@ use p3_field::{Field, Powers, TwoAdicField};
 use p3_matrix::dense::{RowMajorMatrix, RowMajorMatrixViewMut};
 use p3_matrix::util::reverse_matrix_index_bits;
 use p3_matrix::Matrix;
-use p3_maybe_rayon::prelude::*;
 use p3_util::{log2_strict_usize, reverse_bits, reverse_slice_index_bits};
 
 use crate::butterflies::{

diff --git a/fri/src/prover.rs b/fri/src/prover.rs
@@ -10,7 +10,7 @@ use p3_maybe_rayon::prelude::*;
 use tracing::{info_span, instrument};
 
 use crate::fold_even_odd::fold_even_odd;
-use crate::{CommitPhaseProofStep, FriConfig, FriProof, TwoAdicFriPcsGenericConfig, QueryProof};
+use crate::{CommitPhaseProofStep, FriConfig, FriProof, QueryProof};
 
 #[instrument(name = "FRI prover", skip_all)]
 pub fn prove<F, M, Challenger>(

diff --git a/fri/src/two_adic_pcs.rs b/fri/src/two_adic_pcs.rs
@@ -113,12 +113,13 @@ pub struct BatchOpening<C: TwoAdicFriPcsGenericConfig> {
     pub(crate) opening_proof: <C::InputMmcs as Mmcs<C::Val>>::Proof,
 }
 
-impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sized + Sync + Clone>
-    Pcs<C::Val, In> for TwoAdicFriPcs<C> 
-    where C::FriMmcs: Send,
-          <C::FriMmcs as Mmcs<C::Challenge>>::Proof: Send,
-          <C::FriMmcs as Mmcs<C::Challenge>>::ProverData: Send + Sync,
-          <C::InputMmcs as Mmcs<C::Val>>::ProverData: Send + Sync + Sized,
+impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sized + Sync + Clone> Pcs<C::Val, In>
+    for TwoAdicFriPcs<C>
+where
+    C::FriMmcs: Send,
+    <C::FriMmcs as Mmcs<C::Challenge>>::Proof: Send,
+    <C::FriMmcs as Mmcs<C::Challenge>>::ProverData: Send + Sync,
+    <C::InputMmcs as Mmcs<C::Val>>::ProverData: Send + Sync + Sized,
 {
     type Commitment = <C::InputMmcs as Mmcs<C::Val>>::Commitment;
     type ProverData = <C::InputMmcs as Mmcs<C::Val>>::ProverData;
@@ -133,10 +134,11 @@ impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sized + Sync + Clon
 
 impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sized + Sync + Clone>
     UnivariatePcsWithLde<C::Val, C::Challenge, In, C::Challenger> for TwoAdicFriPcs<C>
-    where C::FriMmcs: Send,
-          <C::FriMmcs as Mmcs<C::Challenge>>::Proof: Send,
-          <C::FriMmcs as Mmcs<C::Challenge>>::ProverData: Send + Sync,
-          <C::InputMmcs as Mmcs<C::Val>>::ProverData: Send + Sync + Sized,
+where
+    C::FriMmcs: Send,
+    <C::FriMmcs as Mmcs<C::Challenge>>::Proof: Send,
+    <C::FriMmcs as Mmcs<C::Challenge>>::ProverData: Send + Sync,
+    <C::InputMmcs as Mmcs<C::Val>>::ProverData: Send + Sync + Sized,
 {
     type Lde<'a> = BitReversedMatrixView<<C::InputMmcs as Mmcs<C::Val>>::Mat<'a>> where Self: 'a;
 
@@ -180,18 +182,19 @@ impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sized + Sync + Clon
                 })
                 .collect()
         });
-        let commitment = self.mmcs.commit(ldes);
-        commitment
+
+        self.mmcs.commit(ldes)
     }
 }
 
 impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sync + Clone>
     UnivariatePcs<C::Val, C::Challenge, In, C::Challenger> for TwoAdicFriPcs<C>
-    where C::FriMmcs: Send,
-          <C::FriMmcs as Mmcs<C::Challenge>>::Proof: Send,
-          <C::FriMmcs as Mmcs<C::Challenge>>::ProverData: Send + Sync,
-          <C::InputMmcs as Mmcs<C::Val>>::ProverData: Send + Sync + Sized,
-          C::Challenge: Send + Sync + Sized,
+where
+    C::FriMmcs: Send,
+    <C::FriMmcs as Mmcs<C::Challenge>>::Proof: Send,
+    <C::FriMmcs as Mmcs<C::Challenge>>::ProverData: Send + Sync,
+    <C::InputMmcs as Mmcs<C::Val>>::ProverData: Send + Sync + Sized,
+    C::Challenge: Send + Sync + Sized,
 {
     #[instrument(name = "open_multi_batches", skip_all)]
     fn open_multi_batches(
@@ -261,17 +264,23 @@ impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sync + Clone>
         let mut reduced_openings: [_; 32] = core::array::from_fn(|_| None);
         let mut num_reduced = [0; 32];
 
-        let ys_outer: Vec::<(&Self::ProverData, Vec<&Vec<C::Challenge>>)> = (*prover_data_and_points)
-            .into_iter()
-            .map(|(pd, cs)| { (*pd, (*cs).into_iter().collect::<Vec<&Vec<C::Challenge>>>()) })
+        #[allow(clippy::type_complexity)]
+        let ys_outer: Vec<(&Self::ProverData, Vec<&Vec<C::Challenge>>)> = (*prover_data_and_points)
+            .iter()
+            .map(|(pd, cs)| (*pd, (*cs).iter().collect::<Vec<&Vec<C::Challenge>>>()))
             .collect();
 
         let ys_outer: Vec<Vec<Vec<Vec<C::Challenge>>>> = ys_outer
             .par_iter()
             .map(|(data, points)| {
                 let mats = self.mmcs.get_matrices(data);
-                izip!(mats, (*points).clone()).collect::<Vec<_>>().par_iter().map(|(mat, points_for_mat)| {
-                        points_for_mat.par_iter().map(|&point| {
+                izip!(mats, (*points).clone())
+                    .collect::<Vec<_>>()
+                    .par_iter()
+                    .map(|(mat, points_for_mat)| {
+                        points_for_mat
+                            .par_iter()
+                            .map(|&point| {
                                 // Use Barycentric interpolation to evaluate the matrix at the given point.
                                 info_span!("compute opened values with Lagrange interpolation")
                                     .in_scope(|| {
@@ -283,11 +292,14 @@ impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sync + Clone>
                                             point,
                                         )
                                     })
-                            }).collect()
-                    }).collect()
-            }).collect();
+                            })
+                            .collect()
+                    })
+                    .collect()
+            })
+            .collect();
 
-        for (i, (data, points)) in prover_data_and_points.into_iter().enumerate() {
+        for (i, (data, points)) in prover_data_and_points.iter().enumerate() {
             let mats = self.mmcs.get_matrices(data);
             let opened_values_for_round = all_opened_values.pushed_mut(vec![]);
             for (j, (mat, points_for_mat)) in izip!(mats, *points).enumerate() {
@@ -297,7 +309,7 @@ impl<C: TwoAdicFriPcsGenericConfig, In: MatrixRows<C::Val> + Sync + Clone>
                 debug_assert_eq!(reduced_opening_for_log_height.len(), mat.height());
 
                 let opened_values_for_mat = opened_values_for_round.pushed_mut(vec![]);
-                for (k, &point) in points_for_mat.into_iter().enumerate() {
+                for (k, &point) in points_for_mat.iter().enumerate() {
                     let _guard =
                         info_span!("reduce matrix quotient", dims = %mat.dimensions()).entered();
 

diff --git a/interpolation/src/lib.rs b/interpolation/src/lib.rs
@@ -12,8 +12,8 @@ use p3_field::{
     two_adic_coset_zerofier, ExtensionField, Field, TwoAdicField,
 };
 use p3_matrix::MatrixRows;
-use p3_util::log2_strict_usize;
 use p3_maybe_rayon::prelude::*;
+use p3_util::log2_strict_usize;
 
 /// Given evaluations of a batch of polynomials over the canonical power-of-two subgroup, evaluate
 /// the polynomials at `point`.
@@ -54,19 +54,24 @@ where
         z
     };
 
-    let sum: Vec<EF> =
-        g.powers().zip(diff_invs).enumerate()
-            .map(|(i, (subgroup_i, diff_inv))| {
-                (coset_evals.row(i).into_iter().collect::<Vec<_>>(), (subgroup_i, diff_inv))
-            })
-            .collect::<Vec<_>>()
-            .into_par_iter()
-            .map(|(row_i, (subgroup_i, diff_inv))| {
-                let s = diff_inv * subgroup_i;
-                row_i.into_iter().map(|y_i| s * y_i).collect()
-            })
-            .reduce(sum_vecs)
-            .expect("Expected nonempty sum");
+    let sum: Vec<EF> = g
+        .powers()
+        .zip(diff_invs)
+        .enumerate()
+        .map(|(i, (subgroup_i, diff_inv))| {
+            (
+                coset_evals.row(i).into_iter().collect::<Vec<_>>(),
+                (subgroup_i, diff_inv),
+            )
+        })
+        .collect::<Vec<_>>()
+        .into_par_iter()
+        .map(|(row_i, (subgroup_i, diff_inv))| {
+            let s = diff_inv * subgroup_i;
+            row_i.into_iter().map(|y_i| s * y_i).collect()
+        })
+        .fold_chunks(64, || vec![EF::zero(); width], sum_vecs)
+        .reduce(|| vec![EF::zero(); width], sum_vecs);
 
     let zerofier = two_adic_coset_zerofier::<EF>(log_height, EF::from_base(shift), point);
     let denominator = F::from_canonical_usize(height) * shift.exp_u64(height as u64 - 1);

diff --git a/matrix/src/dense.rs b/matrix/src/dense.rs
@@ -68,8 +68,7 @@ impl<T> RowMajorMatrix<T> {
     pub fn row_chunks_mut(
         &mut self,
         chunk_rows: usize,
-    ) -> impl Iterator<Item = RowMajorMatrixViewMut<T>>
-    {
+    ) -> impl Iterator<Item = RowMajorMatrixViewMut<T>> {
         self.values
             .chunks_exact_mut(self.width & chunk_rows)
             .map(|slice| RowMajorMatrixViewMut::new(slice, self.width))
@@ -341,11 +340,7 @@ impl<'a, T> RowMajorMatrixViewMut<'a, T> {
         self.values.par_chunks_exact_mut(size * self.width)
     }
 
-    pub fn row_chunks_mut(
-        &mut self,
-        size: usize,
-    ) -> impl Iterator<Item = &mut [T]>
-    {
+    pub fn row_chunks_mut(&mut self, size: usize) -> impl Iterator<Item = &mut [T]> {
         self.values.chunks_exact_mut(size * self.width)
     }
 

diff --git a/maybe-rayon/Cargo.toml b/maybe-rayon/Cargo.toml
@@ -6,6 +6,7 @@ version = "0.1.0"
 edition = "2021"
 
 [features]
+default = ["parallel"]
 parallel = ["rayon"]
 
 [dependencies]