From e5f93920104a3bcbb504f784a8f0d2624a551e77 Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 16 Dec 2024 11:49:30 +0100 Subject: [PATCH] Use padded bitvec. (#2230) --- .../data_structures/finalizable_data.rs | 38 ++--- executor/src/witgen/data_structures/mod.rs | 1 + .../witgen/data_structures/padded_bitvec.rs | 130 ++++++++++++++++++ 3 files changed, 153 insertions(+), 16 deletions(-) create mode 100644 executor/src/witgen/data_structures/padded_bitvec.rs diff --git a/executor/src/witgen/data_structures/finalizable_data.rs b/executor/src/witgen/data_structures/finalizable_data.rs index ed5d53032..5b595c7fc 100644 --- a/executor/src/witgen/data_structures/finalizable_data.rs +++ b/executor/src/witgen/data_structures/finalizable_data.rs @@ -11,6 +11,8 @@ use powdr_number::FieldElement; use crate::witgen::rows::Row; +use super::padded_bitvec::PaddedBitVec; + /// Sequence of rows of field elements, stored in a compact form. /// Optimized for contiguous column IDs, but works with any combination. #[derive(Clone)] @@ -22,7 +24,9 @@ pub struct CompactData { /// The cell values, stored in row-major order. data: Vec, /// Bit vector of known cells, stored in row-major order. - known_cells: BitVec, + /// We use PaddedBitVec so that the row access is uniform and we can + /// combine setting the same bits in each row to setting a full word. + known_cells: PaddedBitVec, } impl CompactData { @@ -30,11 +34,12 @@ impl CompactData { pub fn new(column_ids: &[PolyID]) -> Self { let col_id_range = column_ids.iter().map(|id| id.id).minmax(); let (first_column_id, last_column_id) = col_id_range.into_option().unwrap(); + let column_count = (last_column_id - first_column_id + 1) as usize; Self { first_column_id, - column_count: (last_column_id - first_column_id + 1) as usize, + column_count, data: Vec::new(), - known_cells: BitVec::new(), + known_cells: PaddedBitVec::new(column_count), } } @@ -50,7 +55,7 @@ impl CompactData { /// Truncates the data to `len` rows. pub fn truncate(&mut self, len: usize) { self.data.truncate(len * self.column_count); - self.known_cells.truncate(len * self.column_count); + self.known_cells.truncate_to_rows(len); } pub fn clear(&mut self) { @@ -61,7 +66,7 @@ impl CompactData { /// Appends a non-finalized row to the data, turning it into a finalized row. pub fn push(&mut self, row: Row) { self.data.reserve(self.column_count); - self.known_cells.reserve(self.column_count); + self.known_cells.reserve_rows(1); for col_id in self.first_column_id..(self.first_column_id + self.column_count as u64) { if let Some(v) = row.value(&PolyID { id: col_id, @@ -79,7 +84,7 @@ impl CompactData { pub fn append_new_rows(&mut self, count: usize) { self.data .resize(self.data.len() + count * self.column_count, T::zero()); - self.known_cells.grow(count * self.column_count, false); + self.known_cells.append_empty_rows(count); } fn index(&self, row: usize, col: u64) -> usize { @@ -89,25 +94,26 @@ impl CompactData { pub fn get(&self, row: usize, col: u64) -> (T, bool) { let idx = self.index(row, col); - (self.data[idx], self.known_cells[idx]) + let relative_col = col - self.first_column_id; + (self.data[idx], self.known_cells.get(row, relative_col)) } pub fn set(&mut self, row: usize, col: u64, value: T) { let idx = self.index(row, col); - assert!(!self.known_cells[idx] || self.data[idx] == value); + let relative_col = col - self.first_column_id; + assert!(!self.known_cells.get(row, relative_col) || self.data[idx] == value); self.data[idx] = value; - self.known_cells.set(idx, true); + self.known_cells.set(row, relative_col, true); } pub fn known_values_in_row(&self, row: usize) -> impl Iterator { - (0..self.column_count).filter_map(move |i| { - let col = self.first_column_id + i as u64; - let idx = self.index(row, col); - self.known_cells[idx].then(|| { - let col_id = self.first_column_id + i as u64; - (col_id, &self.data[idx]) + (0..self.column_count) + .filter(move |i| self.known_cells.get(row, *i as u64)) + .map(move |i| { + let col = self.first_column_id + i as u64; + let idx = self.index(row, col); + (col, &self.data[idx]) }) - }) } } diff --git a/executor/src/witgen/data_structures/mod.rs b/executor/src/witgen/data_structures/mod.rs index b376530b7..4fead8e14 100644 --- a/executor/src/witgen/data_structures/mod.rs +++ b/executor/src/witgen/data_structures/mod.rs @@ -4,3 +4,4 @@ pub mod copy_constraints; pub mod finalizable_data; pub mod multiplicity_counter; pub mod mutable_state; +pub mod padded_bitvec; diff --git a/executor/src/witgen/data_structures/padded_bitvec.rs b/executor/src/witgen/data_structures/padded_bitvec.rs new file mode 100644 index 000000000..c07ead455 --- /dev/null +++ b/executor/src/witgen/data_structures/padded_bitvec.rs @@ -0,0 +1,130 @@ +/// A bit vector tuned to be used as flags for a trace matrix. +/// The benefit of this bit vector is that each row starts +/// at a new word, so the access to the bit vector is uniform +/// for each row and thus setting the same bits in each row +/// can be optimized to setting a full word. +#[derive(Clone)] +pub struct PaddedBitVec { + data: Vec, + bits_per_row: usize, + words_per_row: usize, + rows: usize, + bits_in_last_row: usize, +} + +impl PaddedBitVec { + pub fn new(bits_per_row: usize) -> Self { + let words_per_row = (bits_per_row + 31) / 32; + Self { + data: Vec::new(), + bits_per_row, + words_per_row, + rows: 0, + bits_in_last_row: bits_per_row, + } + } + + pub fn truncate_to_rows(&mut self, len: usize) { + assert!(len <= self.rows); + if len < self.rows { + self.data.truncate(len * self.words_per_row); + self.bits_in_last_row = self.bits_per_row; + self.rows = len; + } + } + + pub fn clear(&mut self) { + self.data.clear(); + self.rows = 0; + self.bits_in_last_row = self.bits_per_row + } + + pub fn reserve_rows(&mut self, count: usize) { + self.data.reserve(count * self.words_per_row); + } + + /// Append a single bit. + pub fn push(&mut self, value: bool) { + if self.bits_in_last_row == self.bits_per_row { + self.data.push(value as u32); + self.rows += 1; + self.bits_in_last_row = 1; + } else { + if self.bits_in_last_row % 32 == 0 { + self.data.push(value as u32); + } else if value { + let last_word = self.data.last_mut().unwrap(); + let bit_in_last_word = self.bits_in_last_row % 32; + *last_word |= 1 << bit_in_last_word; + } + self.bits_in_last_row += 1; + } + } + + /// Append a number of new empty rows. + pub fn append_empty_rows(&mut self, count: usize) { + assert!(self.bits_in_last_row == self.bits_per_row); + self.data + .resize(self.data.len() + count * self.words_per_row, 0); + self.rows += count; + } + + pub fn get(&self, row: usize, col: u64) -> bool { + if row >= self.rows || (row + 1 == self.rows && col >= self.bits_in_last_row as u64) { + panic!("Out of bounds"); + } + let word = &self.data[row * self.words_per_row + (col / 32) as usize]; + (word & (1 << (col % 32))) != 0 + } + + pub fn set(&mut self, row: usize, col: u64, value: bool) { + let word = &mut self.data[row * self.words_per_row + (col / 32) as usize]; + if value { + *word |= 1 << (col % 32); + } else { + *word &= !(1 << (col % 32)); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_push() { + let mut vec = PaddedBitVec::new(38); + let v = 0x20500600a0u64; + for i in 0..38 { + vec.push((v & (1 << i)) != 0); + } + assert_eq!(vec.data, vec![0x500600a0, 0x20]); + + assert_eq!(v, (0..38).map(|i| (vec.get(0, i) as u64) << i).sum::()); + } + + #[test] + #[should_panic = "Out of bounds"] + fn test_out_of_bouts() { + let mut vec = PaddedBitVec::new(38); + let v = 0x20500600a0u64; + for i in 0..38 { + vec.push((v & (1 << i)) != 0); + } + assert!(vec.get(0, 38)); + } + + #[test] + fn test_multirow() { + let mut vec = PaddedBitVec::new(3); + vec.push(true); + vec.push(false); + vec.push(true); + vec.push(true); + vec.push(false); + vec.push(false); + vec.push(true); + vec.push(false); + assert_eq!(vec.data, vec![5, 1, 1]); + } +}