diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index ba2ad1bd9b..a0ac3ec727 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -124,5 +124,5 @@ jobs: command: clippy args: --all-features --all-targets -- -D warnings -A incomplete-features env: - CARGO_INCREMENTAL: 1 - + # Seems necessary until https://github.com/rust-lang/rust/pull/115819 is merged. + CARGO_INCREMENTAL: 0 diff --git a/evm/src/arithmetic/arithmetic_stark.rs b/evm/src/arithmetic/arithmetic_stark.rs index 4695798af5..5441cf2760 100644 --- a/evm/src/arithmetic/arithmetic_stark.rs +++ b/evm/src/arithmetic/arithmetic_stark.rs @@ -27,10 +27,17 @@ use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; /// This is done by taking pairs of columns (x, y) of the arithmetic /// table and combining them as x + y*2^16 to ensure they equal the /// corresponding 32-bit number in the CPU table. -fn cpu_arith_data_link(ops: &[usize], regs: &[Range]) -> Vec> { +fn cpu_arith_data_link( + combined_ops: &[(usize, u8)], + regs: &[Range], +) -> Vec> { let limb_base = F::from_canonical_u64(1 << columns::LIMB_BITS); - let mut res = Column::singles(ops).collect_vec(); + let mut res = vec![Column::linear_combination( + combined_ops + .iter() + .map(|&(col, code)| (col, F::from_canonical_u8(code))), + )]; // The inner for loop below assumes N_LIMBS is even. const_assert!(columns::N_LIMBS % 2 == 0); @@ -49,21 +56,27 @@ fn cpu_arith_data_link(ops: &[usize], regs: &[Range]) -> Vec() -> TableWithColumns { - const ARITH_OPS: [usize; 14] = [ - columns::IS_ADD, - columns::IS_SUB, - columns::IS_MUL, - columns::IS_LT, - columns::IS_GT, - columns::IS_ADDFP254, - columns::IS_MULFP254, - columns::IS_SUBFP254, - columns::IS_ADDMOD, - columns::IS_MULMOD, - columns::IS_SUBMOD, - columns::IS_DIV, - columns::IS_MOD, - columns::IS_BYTE, + // We scale each filter flag with the associated opcode value. + // If an arithmetic operation is happening on the CPU side, + // the CTL will enforce that the reconstructed opcode value + // from the opcode bits matches. + const COMBINED_OPS: [(usize, u8); 16] = [ + (columns::IS_ADD, 0x01), + (columns::IS_MUL, 0x02), + (columns::IS_SUB, 0x03), + (columns::IS_DIV, 0x04), + (columns::IS_MOD, 0x06), + (columns::IS_ADDMOD, 0x08), + (columns::IS_MULMOD, 0x09), + (columns::IS_ADDFP254, 0x0c), + (columns::IS_MULFP254, 0x0d), + (columns::IS_SUBFP254, 0x0e), + (columns::IS_SUBMOD, 0x0f), + (columns::IS_LT, 0x10), + (columns::IS_GT, 0x11), + (columns::IS_BYTE, 0x1a), + (columns::IS_SHL, 0x1b), + (columns::IS_SHR, 0x1c), ]; const REGISTER_MAP: [Range; 4] = [ @@ -73,6 +86,8 @@ pub fn ctl_arithmetic_rows() -> TableWithColumns { columns::OUTPUT_REGISTER, ]; + let filter_column = Some(Column::sum(COMBINED_OPS.iter().map(|(c, _v)| *c))); + // Create the Arithmetic Table whose columns are those of the // operations listed in `ops` whose inputs and outputs are given // by `regs`, where each element of `regs` is a range of columns @@ -80,8 +95,8 @@ pub fn ctl_arithmetic_rows() -> TableWithColumns { // is used as the operation filter). TableWithColumns::new( Table::Arithmetic, - cpu_arith_data_link(&ARITH_OPS, ®ISTER_MAP), - Some(Column::sum(ARITH_OPS)), + cpu_arith_data_link(&COMBINED_OPS, ®ISTER_MAP), + filter_column, ) } diff --git a/evm/src/arithmetic/columns.rs b/evm/src/arithmetic/columns.rs index afdd583261..48e00f8e11 100644 --- a/evm/src/arithmetic/columns.rs +++ b/evm/src/arithmetic/columns.rs @@ -36,8 +36,10 @@ pub(crate) const IS_SUBMOD: usize = IS_SUBFP254 + 1; pub(crate) const IS_LT: usize = IS_SUBMOD + 1; pub(crate) const IS_GT: usize = IS_LT + 1; pub(crate) const IS_BYTE: usize = IS_GT + 1; +pub(crate) const IS_SHL: usize = IS_BYTE + 1; +pub(crate) const IS_SHR: usize = IS_SHL + 1; -pub(crate) const START_SHARED_COLS: usize = IS_BYTE + 1; +pub(crate) const START_SHARED_COLS: usize = IS_SHR + 1; /// Within the Arithmetic Unit, there are shared columns which can be /// used by any arithmetic circuit, depending on which one is active diff --git a/evm/src/arithmetic/divmod.rs b/evm/src/arithmetic/divmod.rs index 4f2dd748ec..258c131f32 100644 --- a/evm/src/arithmetic/divmod.rs +++ b/evm/src/arithmetic/divmod.rs @@ -45,7 +45,7 @@ pub(crate) fn generate( } match filter { - IS_DIV => { + IS_DIV | IS_SHR => { debug_assert!( lv[OUTPUT_REGISTER] .iter() @@ -104,11 +104,14 @@ pub(crate) fn eval_packed( nv: &[P; NUM_ARITH_COLUMNS], yield_constr: &mut ConstraintConsumer

, ) { + // Constrain IS_SHR independently, so that it doesn't impact the + // constraints when combining the flag with IS_DIV. + yield_constr.constraint_last_row(lv[IS_SHR]); eval_packed_divmod_helper( lv, nv, yield_constr, - lv[IS_DIV], + lv[IS_DIV] + lv[IS_SHR], OUTPUT_REGISTER, AUX_INPUT_REGISTER_0, ); @@ -161,12 +164,14 @@ pub(crate) fn eval_ext_circuit, const D: usize>( nv: &[ExtensionTarget; NUM_ARITH_COLUMNS], yield_constr: &mut RecursiveConstraintConsumer, ) { + yield_constr.constraint_last_row(builder, lv[IS_SHR]); + let div_shr_flag = builder.add_extension(lv[IS_DIV], lv[IS_SHR]); eval_ext_circuit_divmod_helper( builder, lv, nv, yield_constr, - lv[IS_DIV], + div_shr_flag, OUTPUT_REGISTER, AUX_INPUT_REGISTER_0, ); @@ -209,6 +214,8 @@ mod tests { for op in MODULAR_OPS { lv[op] = F::ZERO; } + // Deactivate the SHR flag so that a DIV operation is not triggered. + lv[IS_SHR] = F::ZERO; let mut constraint_consumer = ConstraintConsumer::new( vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)], @@ -240,6 +247,7 @@ mod tests { for op in MODULAR_OPS { lv[op] = F::ZERO; } + lv[IS_SHR] = F::ZERO; lv[op_filter] = F::ONE; let input0 = U256::from(rng.gen::<[u8; 32]>()); @@ -300,6 +308,7 @@ mod tests { for op in MODULAR_OPS { lv[op] = F::ZERO; } + lv[IS_SHR] = F::ZERO; lv[op_filter] = F::ONE; let input0 = U256::from(rng.gen::<[u8; 32]>()); diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs index d9d63a0b82..bd6d56e8cb 100644 --- a/evm/src/arithmetic/mod.rs +++ b/evm/src/arithmetic/mod.rs @@ -27,15 +27,17 @@ pub(crate) enum BinaryOperator { MulFp254, SubFp254, Byte, + Shl, // simulated with MUL + Shr, // simulated with DIV } impl BinaryOperator { pub(crate) fn result(&self, input0: U256, input1: U256) -> U256 { match self { BinaryOperator::Add => input0.overflowing_add(input1).0, - BinaryOperator::Mul => input0.overflowing_mul(input1).0, + BinaryOperator::Mul | BinaryOperator::Shl => input0.overflowing_mul(input1).0, BinaryOperator::Sub => input0.overflowing_sub(input1).0, - BinaryOperator::Div => { + BinaryOperator::Div | BinaryOperator::Shr => { if input1.is_zero() { U256::zero() } else { @@ -77,6 +79,8 @@ impl BinaryOperator { BinaryOperator::MulFp254 => columns::IS_MULFP254, BinaryOperator::SubFp254 => columns::IS_SUBFP254, BinaryOperator::Byte => columns::IS_BYTE, + BinaryOperator::Shl => columns::IS_SHL, + BinaryOperator::Shr => columns::IS_SHR, } } } @@ -107,6 +111,7 @@ impl TernaryOperator { } } +/// An enum representing arithmetic operations that can be either binary or ternary. #[derive(Debug)] pub(crate) enum Operation { BinaryOperation { @@ -125,6 +130,21 @@ pub(crate) enum Operation { } impl Operation { + /// Create a binary operator with given inputs. + /// + /// NB: This works as you would expect, EXCEPT for SHL and SHR, + /// whose inputs need a small amount of preprocessing. Specifically, + /// to create `SHL(shift, value)`, call (note the reversal of + /// argument order): + /// + /// `Operation::binary(BinaryOperator::Shl, value, 1 << shift)` + /// + /// Similarly, to create `SHR(shift, value)`, call + /// + /// `Operation::binary(BinaryOperator::Shr, value, 1 << shift)` + /// + /// See witness/operation.rs::append_shift() for an example (indeed + /// the only call site for such inputs). pub(crate) fn binary(operator: BinaryOperator, input0: U256, input1: U256) -> Self { let result = operator.result(input0, input1); Self::BinaryOperation { @@ -164,6 +184,10 @@ impl Operation { /// use vectors because that's what utils::transpose (who consumes /// the result of this function as part of the range check code) /// expects. + /// + /// The `is_simulated` bool indicates whether we use a native arithmetic + /// operation or simulate one with another. This is used to distinguish + /// SHL and SHR operations that are simulated through MUL and DIV respectively. fn to_rows(&self) -> (Vec, Option>) { match *self { Operation::BinaryOperation { @@ -214,11 +238,11 @@ fn binary_op_to_rows( addcy::generate(&mut row, op.row_filter(), input0, input1); (row, None) } - BinaryOperator::Mul => { + BinaryOperator::Mul | BinaryOperator::Shl => { mul::generate(&mut row, input0, input1); (row, None) } - BinaryOperator::Div | BinaryOperator::Mod => { + BinaryOperator::Div | BinaryOperator::Mod | BinaryOperator::Shr => { let mut nv = vec![F::ZERO; columns::NUM_ARITH_COLUMNS]; divmod::generate(&mut row, &mut nv, op.row_filter(), input0, input1, result); (row, Some(nv)) diff --git a/evm/src/arithmetic/mul.rs b/evm/src/arithmetic/mul.rs index 597d405192..efb4d82247 100644 --- a/evm/src/arithmetic/mul.rs +++ b/evm/src/arithmetic/mul.rs @@ -121,7 +121,7 @@ pub fn eval_packed_generic( ) { let base = P::Scalar::from_canonical_u64(1 << LIMB_BITS); - let is_mul = lv[IS_MUL]; + let is_mul = lv[IS_MUL] + lv[IS_SHL]; let input0_limbs = read_value::(lv, INPUT_REGISTER_0); let input1_limbs = read_value::(lv, INPUT_REGISTER_1); let output_limbs = read_value::(lv, OUTPUT_REGISTER); @@ -173,7 +173,7 @@ pub fn eval_ext_circuit, const D: usize>( lv: &[ExtensionTarget; NUM_ARITH_COLUMNS], yield_constr: &mut RecursiveConstraintConsumer, ) { - let is_mul = lv[IS_MUL]; + let is_mul = builder.add_extension(lv[IS_MUL], lv[IS_SHL]); let input0_limbs = read_value::(lv, INPUT_REGISTER_0); let input1_limbs = read_value::(lv, INPUT_REGISTER_1); let output_limbs = read_value::(lv, OUTPUT_REGISTER); @@ -229,6 +229,8 @@ mod tests { // if `IS_MUL == 0`, then the constraints should be met even // if all values are garbage. lv[IS_MUL] = F::ZERO; + // Deactivate the SHL flag so that a MUL operation is not triggered. + lv[IS_SHL] = F::ZERO; let mut constraint_consumer = ConstraintConsumer::new( vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)], diff --git a/evm/src/cpu/bootstrap_kernel.rs b/evm/src/cpu/bootstrap_kernel.rs index 66f88d3ae1..4aee617c53 100644 --- a/evm/src/cpu/bootstrap_kernel.rs +++ b/evm/src/cpu/bootstrap_kernel.rs @@ -25,6 +25,7 @@ pub(crate) fn generate_bootstrap_kernel(state: &mut GenerationState for chunk in &KERNEL.code.iter().enumerate().chunks(NUM_GP_CHANNELS) { let mut cpu_row = CpuColumnsView::default(); cpu_row.clock = F::from_canonical_usize(state.traces.clock()); + cpu_row.is_bootstrap_kernel = F::ONE; // Write this chunk to memory, while simultaneously packing its bytes into a u32 word. for (channel, (addr, &byte)) in chunk.enumerate() { @@ -39,6 +40,7 @@ pub(crate) fn generate_bootstrap_kernel(state: &mut GenerationState let mut final_cpu_row = CpuColumnsView::default(); final_cpu_row.clock = F::from_canonical_usize(state.traces.clock()); + final_cpu_row.is_bootstrap_kernel = F::ONE; final_cpu_row.is_keccak_sponge = F::ONE; // The Keccak sponge CTL uses memory value columns for its inputs and outputs. final_cpu_row.mem_channels[0].value[0] = F::ZERO; // context @@ -64,8 +66,8 @@ pub(crate) fn eval_bootstrap_kernel>( let next_values: &CpuColumnsView<_> = vars.next_values.borrow(); // IS_BOOTSTRAP_KERNEL must have an init value of 1, a final value of 0, and a delta in {0, -1}. - let local_is_bootstrap = P::ONES - local_values.op.into_iter().sum::

(); - let next_is_bootstrap = P::ONES - next_values.op.into_iter().sum::

(); + let local_is_bootstrap = local_values.is_bootstrap_kernel; + let next_is_bootstrap = next_values.is_bootstrap_kernel; yield_constr.constraint_first_row(local_is_bootstrap - P::ONES); yield_constr.constraint_last_row(local_is_bootstrap); let delta_is_bootstrap = next_is_bootstrap - local_is_bootstrap; @@ -111,10 +113,8 @@ pub(crate) fn eval_bootstrap_kernel_circuit, const let one = builder.one_extension(); // IS_BOOTSTRAP_KERNEL must have an init value of 1, a final value of 0, and a delta in {0, -1}. - let local_is_bootstrap = builder.add_many_extension(local_values.op.iter()); - let local_is_bootstrap = builder.sub_extension(one, local_is_bootstrap); - let next_is_bootstrap = builder.add_many_extension(next_values.op.iter()); - let next_is_bootstrap = builder.sub_extension(one, next_is_bootstrap); + let local_is_bootstrap = local_values.is_bootstrap_kernel; + let next_is_bootstrap = next_values.is_bootstrap_kernel; let constraint = builder.sub_extension(local_is_bootstrap, one); yield_constr.constraint_first_row(builder, constraint); yield_constr.constraint_last_row(builder, local_is_bootstrap); diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs index 134ab02b49..fecc8df986 100644 --- a/evm/src/cpu/columns/mod.rs +++ b/evm/src/cpu/columns/mod.rs @@ -35,6 +35,9 @@ pub struct MemoryChannelView { #[repr(C)] #[derive(Clone, Copy, Eq, PartialEq, Debug)] pub struct CpuColumnsView { + /// Filter. 1 if the row is part of bootstrapping the kernel code, 0 otherwise. + pub is_bootstrap_kernel: T, + /// If CPU cycle: Current context. // TODO: this is currently unconstrained pub context: T, diff --git a/evm/src/cpu/columns/ops.rs b/evm/src/cpu/columns/ops.rs index 6c68a18305..d4d753f7cf 100644 --- a/evm/src/cpu/columns/ops.rs +++ b/evm/src/cpu/columns/ops.rs @@ -7,33 +7,17 @@ use crate::util::{indices_arr, transmute_no_compile_time_size_checks}; #[repr(C)] #[derive(Clone, Copy, Eq, PartialEq, Debug)] pub struct OpsColumnsView { - // TODO: combine ADD, MUL, SUB, DIV, MOD, ADDFP254, MULFP254, SUBFP254, LT, and GT into one flag - pub add: T, - pub mul: T, - pub sub: T, - pub div: T, - pub mod_: T, - // TODO: combine ADDMOD, MULMOD and SUBMOD into one flag - pub addmod: T, - pub mulmod: T, - pub addfp254: T, - pub mulfp254: T, - pub subfp254: T, - pub submod: T, - pub lt: T, - pub gt: T, - pub eq_iszero: T, // Combines EQ and ISZERO flags. - pub logic_op: T, // Combines AND, OR and XOR flags. + pub binary_op: T, // Combines ADD, MUL, SUB, DIV, MOD, LT, GT and BYTE flags. + pub ternary_op: T, // Combines ADDMOD, MULMOD and SUBMOD flags. + pub fp254_op: T, // Combines ADD_FP254, MUL_FP254 and SUB_FP254 flags. + pub eq_iszero: T, // Combines EQ and ISZERO flags. + pub logic_op: T, // Combines AND, OR and XOR flags. pub not: T, - pub byte: T, - // TODO: combine SHL and SHR into one flag - pub shl: T, - pub shr: T, + pub shift: T, // Combines SHL and SHR flags. pub keccak_general: T, pub prover_input: T, pub pop: T, - // TODO: combine JUMP and JUMPI into one flag - pub jumps: T, // Note: This column must be 0 when is_cpu_cycle = 0. + pub jumps: T, // Combines JUMP and JUMPI flags. pub pc: T, pub jumpdest: T, pub push0: T, @@ -44,9 +28,7 @@ pub struct OpsColumnsView { pub mstore_32bytes: T, pub mload_32bytes: T, pub exit_kernel: T, - // TODO: combine MLOAD_GENERAL and MSTORE_GENERAL into one flag - pub mload_general: T, - pub mstore_general: T, + pub m_op_general: T, pub syscall: T, pub exception: T, diff --git a/evm/src/cpu/control_flow.rs b/evm/src/cpu/control_flow.rs index 0bea5c7c70..9c17367aa2 100644 --- a/evm/src/cpu/control_flow.rs +++ b/evm/src/cpu/control_flow.rs @@ -8,24 +8,14 @@ use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer use crate::cpu::columns::{CpuColumnsView, COL_MAP}; use crate::cpu::kernel::aggregator::KERNEL; -const NATIVE_INSTRUCTIONS: [usize; 28] = [ - COL_MAP.op.add, - COL_MAP.op.mul, - COL_MAP.op.sub, - COL_MAP.op.div, - COL_MAP.op.mod_, - COL_MAP.op.addmod, - COL_MAP.op.mulmod, - COL_MAP.op.addfp254, - COL_MAP.op.mulfp254, - COL_MAP.op.subfp254, - COL_MAP.op.lt, - COL_MAP.op.gt, +const NATIVE_INSTRUCTIONS: [usize; 17] = [ + COL_MAP.op.binary_op, + COL_MAP.op.ternary_op, + COL_MAP.op.fp254_op, COL_MAP.op.eq_iszero, COL_MAP.op.logic_op, COL_MAP.op.not, - COL_MAP.op.shl, - COL_MAP.op.shr, + COL_MAP.op.shift, COL_MAP.op.keccak_general, COL_MAP.op.prover_input, COL_MAP.op.pop, @@ -39,20 +29,14 @@ const NATIVE_INSTRUCTIONS: [usize; 28] = [ COL_MAP.op.swap, COL_MAP.op.context_op, // not EXIT_KERNEL (performs a jump) - COL_MAP.op.mload_general, - COL_MAP.op.mstore_general, + COL_MAP.op.m_op_general, // not SYSCALL (performs a jump) // not exceptions (also jump) ]; -pub(crate) fn get_halt_pcs() -> (F, F) { - let halt_pc0 = KERNEL.global_labels["halt_pc0"]; - let halt_pc1 = KERNEL.global_labels["halt_pc1"]; - - ( - F::from_canonical_usize(halt_pc0), - F::from_canonical_usize(halt_pc1), - ) +pub(crate) fn get_halt_pc() -> F { + let halt_pc = KERNEL.global_labels["halt"]; + F::from_canonical_usize(halt_pc) } pub(crate) fn get_start_pc() -> F { @@ -68,8 +52,15 @@ pub fn eval_packed_generic( ) { let is_cpu_cycle: P = COL_MAP.op.iter().map(|&col_i| lv[col_i]).sum(); let is_cpu_cycle_next: P = COL_MAP.op.iter().map(|&col_i| nv[col_i]).sum(); - // Once we start executing instructions, then we continue until the end of the table. - yield_constr.constraint_transition(is_cpu_cycle * (is_cpu_cycle_next - P::ONES)); + + let next_halt_state = P::ONES - nv.is_bootstrap_kernel - is_cpu_cycle_next; + + // Once we start executing instructions, then we continue until the end of the table + // or we reach dummy padding rows. This, along with the constraints on the first row, + // enforces that operation flags and the halt flag are mutually exclusive over the entire + // CPU trace. + yield_constr + .constraint_transition(is_cpu_cycle * (is_cpu_cycle_next + next_halt_state - P::ONES)); // If a row is a CPU cycle and executing a native instruction (implemented as a table row; not // microcoded) then the program counter is incremented by 1 to obtain the next row's program @@ -90,16 +81,6 @@ pub fn eval_packed_generic( yield_constr.constraint_transition(is_last_noncpu_cycle * pc_diff); yield_constr.constraint_transition(is_last_noncpu_cycle * (nv.is_kernel_mode - P::ONES)); yield_constr.constraint_transition(is_last_noncpu_cycle * nv.stack_len); - - // The last row must be a CPU cycle row. - yield_constr.constraint_last_row(is_cpu_cycle - P::ONES); - // Also, the last row's `program_counter` must be inside the `halt` infinite loop. Note that - // that loop consists of two instructions, so we must check for `halt` and `halt_inner` labels. - let (halt_pc0, halt_pc1) = get_halt_pcs::(); - yield_constr - .constraint_last_row((lv.program_counter - halt_pc0) * (lv.program_counter - halt_pc1)); - // Finally, the last row must be in kernel mode. - yield_constr.constraint_last_row(lv.is_kernel_mode - P::ONES); } pub fn eval_ext_circuit, const D: usize>( @@ -108,11 +89,21 @@ pub fn eval_ext_circuit, const D: usize>( nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { + let one = builder.one_extension(); + let is_cpu_cycle = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| lv[col_i])); let is_cpu_cycle_next = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| nv[col_i])); - // Once we start executing instructions, then we continue until the end of the table. + + let next_halt_state = builder.add_extension(nv.is_bootstrap_kernel, is_cpu_cycle_next); + let next_halt_state = builder.sub_extension(one, next_halt_state); + + // Once we start executing instructions, then we continue until the end of the table + // or we reach dummy padding rows. This, along with the constraints on the first row, + // enforces that operation flags and the halt flag are mutually exclusive over the entire + // CPU trace. { - let constr = builder.mul_sub_extension(is_cpu_cycle, is_cpu_cycle_next, is_cpu_cycle); + let constr = builder.add_extension(is_cpu_cycle_next, next_halt_state); + let constr = builder.mul_sub_extension(is_cpu_cycle, constr, is_cpu_cycle); yield_constr.constraint_transition(builder, constr); } @@ -155,30 +146,4 @@ pub fn eval_ext_circuit, const D: usize>( let kernel_constr = builder.mul_extension(is_last_noncpu_cycle, nv.stack_len); yield_constr.constraint_transition(builder, kernel_constr); } - - // The last row must be a CPU cycle row. - { - let one = builder.one_extension(); - let constr = builder.sub_extension(is_cpu_cycle, one); - yield_constr.constraint_last_row(builder, constr); - } - // Also, the last row's `program_counter` must be inside the `halt` infinite loop. Note that - // that loop consists of two instructions, so we must check for `halt` and `halt_inner` labels. - { - let (halt_pc0, halt_pc1) = get_halt_pcs(); - let halt_pc0_target = builder.constant_extension(halt_pc0); - let halt_pc1_target = builder.constant_extension(halt_pc1); - - let halt_pc0_offset = builder.sub_extension(lv.program_counter, halt_pc0_target); - let halt_pc1_offset = builder.sub_extension(lv.program_counter, halt_pc1_target); - let constr = builder.mul_extension(halt_pc0_offset, halt_pc1_offset); - - yield_constr.constraint_last_row(builder, constr); - } - // Finally, the last row must be in kernel mode. - { - let one = builder.one_extension(); - let constr = builder.sub_extension(lv.is_kernel_mode, one); - yield_constr.constraint_last_row(builder, constr); - } } diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs index 25e7cc6ba0..bd2fcf193b 100644 --- a/evm/src/cpu/cpu_stark.rs +++ b/evm/src/cpu/cpu_stark.rs @@ -8,6 +8,7 @@ use plonky2::field::packed::PackedField; use plonky2::field::types::Field; use plonky2::hash::hash_types::RichField; +use super::halt; use crate::all_stark::Table; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::cpu::columns::{CpuColumnsView, COL_MAP, NUM_CPU_COLUMNS}; @@ -48,9 +49,8 @@ pub fn ctl_filter_keccak_sponge() -> Column { /// Create the vector of Columns corresponding to the two inputs and /// one output of a binary operation. -fn ctl_data_binops(ops: &[usize]) -> Vec> { - let mut res = Column::singles(ops).collect_vec(); - res.extend(Column::singles(COL_MAP.mem_channels[0].value)); +fn ctl_data_binops() -> Vec> { + let mut res = Column::singles(COL_MAP.mem_channels[0].value).collect_vec(); res.extend(Column::singles(COL_MAP.mem_channels[1].value)); res.extend(Column::singles( COL_MAP.mem_channels[NUM_GP_CHANNELS - 1].value, @@ -70,10 +70,9 @@ fn ctl_data_binops(ops: &[usize]) -> Vec> { /// case of shift operations, which will skip the first memory channel and use the /// next three as ternary inputs. Because both `MUL` and `DIV` are binary operations, /// the last memory channel used for the inputs will be safely ignored. -fn ctl_data_ternops(ops: &[usize], is_shift: bool) -> Vec> { +fn ctl_data_ternops(is_shift: bool) -> Vec> { let offset = is_shift as usize; - let mut res = Column::singles(ops).collect_vec(); - res.extend(Column::singles(COL_MAP.mem_channels[offset].value)); + let mut res = Column::singles(COL_MAP.mem_channels[offset].value).collect_vec(); res.extend(Column::singles(COL_MAP.mem_channels[offset + 1].value)); res.extend(Column::singles(COL_MAP.mem_channels[offset + 2].value)); res.extend(Column::singles( @@ -85,7 +84,7 @@ fn ctl_data_ternops(ops: &[usize], is_shift: bool) -> Vec> { pub fn ctl_data_logic() -> Vec> { // Instead of taking single columns, we reconstruct the entire opcode value directly. let mut res = vec![Column::le_bits(COL_MAP.opcode_bits)]; - res.extend(ctl_data_binops(&[])); + res.extend(ctl_data_binops()); res } @@ -94,22 +93,9 @@ pub fn ctl_filter_logic() -> Column { } pub fn ctl_arithmetic_base_rows() -> TableWithColumns { - const OPS: [usize; 14] = [ - COL_MAP.op.add, - COL_MAP.op.sub, - COL_MAP.op.mul, - COL_MAP.op.lt, - COL_MAP.op.gt, - COL_MAP.op.addfp254, - COL_MAP.op.mulfp254, - COL_MAP.op.subfp254, - COL_MAP.op.addmod, - COL_MAP.op.mulmod, - COL_MAP.op.submod, - COL_MAP.op.div, - COL_MAP.op.mod_, - COL_MAP.op.byte, - ]; + // Instead of taking single columns, we reconstruct the entire opcode value directly. + let mut columns = vec![Column::le_bits(COL_MAP.opcode_bits)]; + columns.extend(ctl_data_ternops(false)); // Create the CPU Table whose columns are those with the three // inputs and one output of the ternary operations listed in `ops` // (also `ops` is used as the operation filter). The list of @@ -117,40 +103,25 @@ pub fn ctl_arithmetic_base_rows() -> TableWithColumns { // the third input. TableWithColumns::new( Table::Cpu, - ctl_data_ternops(&OPS, false), - Some(Column::sum(OPS)), + columns, + Some(Column::sum([ + COL_MAP.op.binary_op, + COL_MAP.op.fp254_op, + COL_MAP.op.ternary_op, + ])), ) } pub fn ctl_arithmetic_shift_rows() -> TableWithColumns { - const OPS: [usize; 14] = [ - COL_MAP.op.add, - COL_MAP.op.sub, - // SHL is interpreted as MUL on the arithmetic side - COL_MAP.op.shl, - COL_MAP.op.lt, - COL_MAP.op.gt, - COL_MAP.op.addfp254, - COL_MAP.op.mulfp254, - COL_MAP.op.subfp254, - COL_MAP.op.addmod, - COL_MAP.op.mulmod, - COL_MAP.op.submod, - // SHR is interpreted as DIV on the arithmetic side - COL_MAP.op.shr, - COL_MAP.op.mod_, - COL_MAP.op.byte, - ]; + // Instead of taking single columns, we reconstruct the entire opcode value directly. + let mut columns = vec![Column::le_bits(COL_MAP.opcode_bits)]; + columns.extend(ctl_data_ternops(true)); // Create the CPU Table whose columns are those with the three // inputs and one output of the ternary operations listed in `ops` // (also `ops` is used as the operation filter). The list of // operations includes binary operations which will simply ignore // the third input. - TableWithColumns::new( - Table::Cpu, - ctl_data_ternops(&OPS, true), - Some(Column::sum([COL_MAP.op.shl, COL_MAP.op.shr])), - ) + TableWithColumns::new(Table::Cpu, columns, Some(Column::single(COL_MAP.op.shift))) } pub fn ctl_data_byte_packing() -> Vec> { @@ -274,15 +245,16 @@ impl, const D: usize> Stark for CpuStark, const D: usize> Stark for CpuStark(lv: &mut CpuColumnsView) { @@ -99,6 +100,10 @@ pub fn generate(lv: &mut CpuColumnsView) { let flag = available && opcode_match; lv[col] = F::from_bool(flag); } + + if opcode == 0xfb || opcode == 0xfc { + lv.op.m_op_general = F::from_bool(kernel); + } } /// Break up an opcode (which is 8 bits long) into its eight bits. @@ -134,17 +139,17 @@ pub fn eval_packed_generic( let flag = lv[flag_col]; yield_constr.constraint(flag * (flag - P::ONES)); } - // Manually check the logic_op flag combining AND, OR and XOR. - let flag = lv.op.logic_op; - yield_constr.constraint(flag * (flag - P::ONES)); + // Also check that the combined instruction flags are valid. + for flag_idx in COMBINED_OPCODES { + yield_constr.constraint(lv[flag_idx] * (lv[flag_idx] - P::ONES)); + } - // Now check that they sum to 0 or 1. - // Includes the logic_op flag encompassing AND, OR and XOR opcodes. + // Now check that they sum to 0 or 1, including the combined flags. let flag_sum: P = OPCODES .into_iter() .map(|(_, _, _, flag_col)| lv[flag_col]) - .sum::

() - + lv.op.logic_op; + .chain(COMBINED_OPCODES.map(|op| lv[op])) + .sum::

(); yield_constr.constraint(flag_sum * (flag_sum - P::ONES)); // Finally, classify all opcodes, together with the kernel flag, into blocks @@ -173,6 +178,20 @@ pub fn eval_packed_generic( // correct mode. yield_constr.constraint(lv[col] * (unavailable + opcode_mismatch)); } + + // Manually check lv.op.m_op_constr + let opcode: P = lv + .opcode_bits + .into_iter() + .enumerate() + .map(|(i, bit)| bit * P::Scalar::from_canonical_u64(1 << i)) + .sum(); + yield_constr.constraint((P::ONES - kernel_mode) * lv.op.m_op_general); + + let m_op_constr = (opcode - P::Scalar::from_canonical_usize(0xfb_usize)) + * (opcode - P::Scalar::from_canonical_usize(0xfc_usize)) + * lv.op.m_op_general; + yield_constr.constraint(m_op_constr); } pub fn eval_ext_circuit, const D: usize>( @@ -204,15 +223,16 @@ pub fn eval_ext_circuit, const D: usize>( let constr = builder.mul_sub_extension(flag, flag, flag); yield_constr.constraint(builder, constr); } - // Manually check the logic_op flag combining AND, OR and XOR. - let flag = lv.op.logic_op; - let constr = builder.mul_sub_extension(flag, flag, flag); - yield_constr.constraint(builder, constr); + // Also check that the combined instruction flags are valid. + for flag_idx in COMBINED_OPCODES { + let constr = builder.mul_sub_extension(lv[flag_idx], lv[flag_idx], lv[flag_idx]); + yield_constr.constraint(builder, constr); + } - // Now check that they sum to 0 or 1. - // Includes the logic_op flag encompassing AND, OR and XOR opcodes. + // Now check that they sum to 0 or 1, including the combined flags. { - let mut flag_sum = lv.op.logic_op; + let mut flag_sum = + builder.add_many_extension(COMBINED_OPCODES.into_iter().map(|idx| lv[idx])); for (_, _, _, flag_col) in OPCODES { let flag = lv[flag_col]; flag_sum = builder.add_extension(flag_sum, flag); @@ -250,4 +270,28 @@ pub fn eval_ext_circuit, const D: usize>( let constr = builder.mul_extension(lv[col], constr); yield_constr.constraint(builder, constr); } + + // Manually check lv.op.m_op_constr + let opcode = lv + .opcode_bits + .into_iter() + .rev() + .fold(builder.zero_extension(), |cumul, bit| { + builder.mul_const_add_extension(F::TWO, cumul, bit) + }); + + let mload_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0xfb_usize)); + let mstore_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0xfc_usize)); + + let one_extension = builder.constant_extension(F::Extension::ONE); + let is_not_kernel_mode = builder.sub_extension(one_extension, kernel_mode); + let constr = builder.mul_extension(is_not_kernel_mode, lv.op.m_op_general); + yield_constr.constraint(builder, constr); + + let mload_constr = builder.sub_extension(opcode, mload_opcode); + let mstore_constr = builder.sub_extension(opcode, mstore_opcode); + let mut m_op_constr = builder.mul_extension(mload_constr, mstore_constr); + m_op_constr = builder.mul_extension(m_op_constr, lv.op.m_op_general); + + yield_constr.constraint(builder, m_op_constr); } diff --git a/evm/src/cpu/gas.rs b/evm/src/cpu/gas.rs index e967c07ece..51f375c056 100644 --- a/evm/src/cpu/gas.rs +++ b/evm/src/cpu/gas.rs @@ -19,25 +19,13 @@ const G_MID: Option = Some(8); const G_HIGH: Option = Some(10); const SIMPLE_OPCODES: OpsColumnsView> = OpsColumnsView { - add: G_VERYLOW, - mul: G_LOW, - sub: G_VERYLOW, - div: G_LOW, - mod_: G_LOW, - addmod: G_MID, - mulmod: G_MID, - addfp254: KERNEL_ONLY_INSTR, - mulfp254: KERNEL_ONLY_INSTR, - subfp254: KERNEL_ONLY_INSTR, - submod: KERNEL_ONLY_INSTR, - lt: G_VERYLOW, - gt: G_VERYLOW, + binary_op: None, // This is handled manually below + ternary_op: None, // This is handled manually below + fp254_op: KERNEL_ONLY_INSTR, eq_iszero: G_VERYLOW, logic_op: G_VERYLOW, not: G_VERYLOW, - byte: G_VERYLOW, - shl: G_VERYLOW, - shr: G_VERYLOW, + shift: G_VERYLOW, keccak_general: KERNEL_ONLY_INSTR, prover_input: KERNEL_ONLY_INSTR, pop: G_BASE, @@ -52,8 +40,7 @@ const SIMPLE_OPCODES: OpsColumnsView> = OpsColumnsView { mstore_32bytes: KERNEL_ONLY_INSTR, mload_32bytes: KERNEL_ONLY_INSTR, exit_kernel: None, - mload_general: KERNEL_ONLY_INSTR, - mstore_general: KERNEL_ONLY_INSTR, + m_op_general: KERNEL_ONLY_INSTR, syscall: None, exception: None, }; @@ -97,6 +84,21 @@ fn eval_packed_accumulate( let jump_gas_cost = P::Scalar::from_canonical_u32(G_MID.unwrap()) + lv.opcode_bits[0] * P::Scalar::from_canonical_u32(G_HIGH.unwrap() - G_MID.unwrap()); yield_constr.constraint_transition(lv.op.jumps * (nv.gas - lv.gas - jump_gas_cost)); + + // For binary_ops. + // MUL, DIV and MOD are differentiated from ADD, SUB, LT, GT and BYTE by their first and fifth bits set to 0. + let cost_filter = lv.opcode_bits[0] + lv.opcode_bits[4] - lv.opcode_bits[0] * lv.opcode_bits[4]; + let binary_op_cost = P::Scalar::from_canonical_u32(G_LOW.unwrap()) + + cost_filter + * (P::Scalar::from_canonical_u32(G_VERYLOW.unwrap()) + - P::Scalar::from_canonical_u32(G_LOW.unwrap())); + yield_constr.constraint_transition(lv.op.binary_op * (nv.gas - lv.gas - binary_op_cost)); + + // For ternary_ops. + // SUBMOD is differentiated by its second bit set to 1. + let ternary_op_cost = P::Scalar::from_canonical_u32(G_MID.unwrap()) + - lv.opcode_bits[1] * P::Scalar::from_canonical_u32(G_MID.unwrap()); + yield_constr.constraint_transition(lv.op.ternary_op * (nv.gas - lv.gas - ternary_op_cost)); } fn eval_packed_init( @@ -186,6 +188,41 @@ fn eval_ext_circuit_accumulate, const D: usize>( let gas_diff = builder.sub_extension(nv_lv_diff, jump_gas_cost); let constr = builder.mul_extension(filter, gas_diff); yield_constr.constraint_transition(builder, constr); + + // For binary_ops. + // MUL, DIV and MOD are differentiated from ADD, SUB, LT, GT and BYTE by their first and fifth bits set to 0. + let filter = lv.op.binary_op; + let cost_filter = { + let a = builder.add_extension(lv.opcode_bits[0], lv.opcode_bits[4]); + let b = builder.mul_extension(lv.opcode_bits[0], lv.opcode_bits[4]); + builder.sub_extension(a, b) + }; + let binary_op_cost = builder.mul_const_extension( + F::from_canonical_u32(G_VERYLOW.unwrap()) - F::from_canonical_u32(G_LOW.unwrap()), + cost_filter, + ); + let binary_op_cost = + builder.add_const_extension(binary_op_cost, F::from_canonical_u32(G_LOW.unwrap())); + + let nv_lv_diff = builder.sub_extension(nv.gas, lv.gas); + let gas_diff = builder.sub_extension(nv_lv_diff, binary_op_cost); + let constr = builder.mul_extension(filter, gas_diff); + yield_constr.constraint_transition(builder, constr); + + // For ternary_ops. + // SUBMOD is differentiated by its second bit set to 1. + let filter = lv.op.ternary_op; + let ternary_op_cost = builder.mul_const_extension( + F::from_canonical_u32(G_MID.unwrap()).neg(), + lv.opcode_bits[1], + ); + let ternary_op_cost = + builder.add_const_extension(ternary_op_cost, F::from_canonical_u32(G_MID.unwrap())); + + let nv_lv_diff = builder.sub_extension(nv.gas, lv.gas); + let gas_diff = builder.sub_extension(nv_lv_diff, ternary_op_cost); + let constr = builder.mul_extension(filter, gas_diff); + yield_constr.constraint_transition(builder, constr); } fn eval_ext_circuit_init, const D: usize>( diff --git a/evm/src/cpu/halt.rs b/evm/src/cpu/halt.rs new file mode 100644 index 0000000000..9ad34344ea --- /dev/null +++ b/evm/src/cpu/halt.rs @@ -0,0 +1,98 @@ +//! Once the CPU execution is over (i.e. reached the `halt` label in the kernel), +//! the CPU trace will be padded with special dummy rows, incurring no memory overhead. + +use plonky2::field::extension::Extendable; +use plonky2::field::packed::PackedField; +use plonky2::hash::hash_types::RichField; +use plonky2::iop::ext_target::ExtensionTarget; + +use super::control_flow::get_halt_pc; +use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; +use crate::cpu::columns::{CpuColumnsView, COL_MAP}; +use crate::cpu::membus::NUM_GP_CHANNELS; + +pub fn eval_packed( + lv: &CpuColumnsView

, + nv: &CpuColumnsView

, + yield_constr: &mut ConstraintConsumer

, +) { + let is_cpu_cycle: P = COL_MAP.op.iter().map(|&col_i| lv[col_i]).sum(); + let is_cpu_cycle_next: P = COL_MAP.op.iter().map(|&col_i| nv[col_i]).sum(); + + let halt_state = P::ONES - lv.is_bootstrap_kernel - is_cpu_cycle; + let next_halt_state = P::ONES - nv.is_bootstrap_kernel - is_cpu_cycle_next; + + // The halt flag must be boolean. + yield_constr.constraint(halt_state * (halt_state - P::ONES)); + // Once we reach a padding row, there must be only padding rows. + yield_constr.constraint_transition(halt_state * (next_halt_state - P::ONES)); + + // Padding rows should have their memory channels disabled. + for i in 0..NUM_GP_CHANNELS { + let channel = lv.mem_channels[i]; + yield_constr.constraint(halt_state * channel.used); + } + + // The last row must be a dummy padding row. + yield_constr.constraint_last_row(halt_state - P::ONES); + + // Also, a padding row's `program_counter` must be at the `halt` label. + // In particular, it ensures that the first padding row may only be added + // after we jumped to the `halt` function. Subsequent padding rows may set + // the `program_counter` to arbitrary values (there's no transition + // constraints) so we can place this requirement on them too. + let halt_pc = get_halt_pc::(); + yield_constr.constraint(halt_state * (lv.program_counter - halt_pc)); +} + +pub fn eval_ext_circuit, const D: usize>( + builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, + lv: &CpuColumnsView>, + nv: &CpuColumnsView>, + yield_constr: &mut RecursiveConstraintConsumer, +) { + let one = builder.one_extension(); + + let is_cpu_cycle = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| lv[col_i])); + let is_cpu_cycle_next = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| nv[col_i])); + + let halt_state = builder.add_extension(lv.is_bootstrap_kernel, is_cpu_cycle); + let halt_state = builder.sub_extension(one, halt_state); + let next_halt_state = builder.add_extension(nv.is_bootstrap_kernel, is_cpu_cycle_next); + let next_halt_state = builder.sub_extension(one, next_halt_state); + + // The halt flag must be boolean. + let constr = builder.mul_sub_extension(halt_state, halt_state, halt_state); + yield_constr.constraint(builder, constr); + // Once we reach a padding row, there must be only padding rows. + let constr = builder.mul_sub_extension(halt_state, next_halt_state, halt_state); + yield_constr.constraint_transition(builder, constr); + + // Padding rows should have their memory channels disabled. + for i in 0..NUM_GP_CHANNELS { + let channel = lv.mem_channels[i]; + let constr = builder.mul_extension(halt_state, channel.used); + yield_constr.constraint(builder, constr); + } + + // The last row must be a dummy padding row. + { + let one = builder.one_extension(); + let constr = builder.sub_extension(halt_state, one); + yield_constr.constraint_last_row(builder, constr); + } + + // Also, a padding row's `program_counter` must be at the `halt` label. + // In particular, it ensures that the first padding row may only be added + // after we jumped to the `halt` function. Subsequent padding rows may set + // the `program_counter` to arbitrary values (there's no transition + // constraints) so we can place this requirement on them too. + { + let halt_pc = get_halt_pc(); + let halt_pc_target = builder.constant_extension(halt_pc); + let constr = builder.sub_extension(lv.program_counter, halt_pc_target); + let constr = builder.mul_extension(halt_state, constr); + + yield_constr.constraint(builder, constr); + } +} diff --git a/evm/src/cpu/jumps.rs b/evm/src/cpu/jumps.rs index a3c38a90a6..62d9bdfd25 100644 --- a/evm/src/cpu/jumps.rs +++ b/evm/src/cpu/jumps.rs @@ -75,8 +75,8 @@ pub fn eval_packed_jump_jumpi( let is_jumpi = filter * lv.opcode_bits[0]; // Stack constraints. - stack::eval_packed_one(lv, is_jump, stack::JUMP_OP.unwrap(), yield_constr); - stack::eval_packed_one(lv, is_jumpi, stack::JUMPI_OP.unwrap(), yield_constr); + stack::eval_packed_one(lv, nv, is_jump, stack::JUMP_OP.unwrap(), yield_constr); + stack::eval_packed_one(lv, nv, is_jumpi, stack::JUMPI_OP.unwrap(), yield_constr); // If `JUMP`, re-use the `JUMPI` logic, but setting the second input (the predicate) to be 1. // In other words, we implement `JUMP(dst)` as `JUMPI(dst, cond=1)`. @@ -151,10 +151,18 @@ pub fn eval_ext_circuit_jump_jumpi, const D: usize> let is_jumpi = builder.mul_extension(filter, lv.opcode_bits[0]); // Stack constraints. - stack::eval_ext_circuit_one(builder, lv, is_jump, stack::JUMP_OP.unwrap(), yield_constr); stack::eval_ext_circuit_one( builder, lv, + nv, + is_jump, + stack::JUMP_OP.unwrap(), + yield_constr, + ); + stack::eval_ext_circuit_one( + builder, + lv, + nv, is_jumpi, stack::JUMPI_OP.unwrap(), yield_constr, diff --git a/evm/src/cpu/kernel/asm/halt.asm b/evm/src/cpu/kernel/asm/halt.asm index 906ce51aaa..49561fd660 100644 --- a/evm/src/cpu/kernel/asm/halt.asm +++ b/evm/src/cpu/kernel/asm/halt.asm @@ -1,6 +1,2 @@ global halt: - PUSH halt_pc0 -global halt_pc0: - DUP1 -global halt_pc1: - JUMP + PANIC diff --git a/evm/src/cpu/kernel/asm/memory/packing.asm b/evm/src/cpu/kernel/asm/memory/packing.asm index 81ab31236e..1dbbf39362 100644 --- a/evm/src/cpu/kernel/asm/memory/packing.asm +++ b/evm/src/cpu/kernel/asm/memory/packing.asm @@ -42,10 +42,10 @@ global mload_packing_u64_LE: // Post stack: offset' global mstore_unpacking: // stack: context, segment, offset, value, len, retdest - %stack(context, segment, offset, value, len, retdest) -> (context, segment, offset, value, len, len, offset, retdest) - // stack: context, segment, offset, value, len, len, offset, retdest + %stack(context, segment, offset, value, len, retdest) -> (context, segment, offset, value, len, offset, len, retdest) + // stack: context, segment, offset, value, len, offset, len, retdest MSTORE_32BYTES - // stack: len, offset, retdest + // stack: offset, len, retdest ADD SWAP1 // stack: retdest, offset' JUMP diff --git a/evm/src/cpu/memio.rs b/evm/src/cpu/memio.rs index 09490e87e4..aa3749cab2 100644 --- a/evm/src/cpu/memio.rs +++ b/evm/src/cpu/memio.rs @@ -7,6 +7,7 @@ use plonky2::iop::ext_target::ExtensionTarget; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::cpu::columns::CpuColumnsView; use crate::cpu::membus::NUM_GP_CHANNELS; +use crate::cpu::stack; fn get_addr(lv: &CpuColumnsView) -> (T, T, T) { let addr_context = lv.mem_channels[0].value[0]; @@ -17,9 +18,11 @@ fn get_addr(lv: &CpuColumnsView) -> (T, T, T) { fn eval_packed_load( lv: &CpuColumnsView

, + nv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { - let filter = lv.op.mload_general; + // The opcode for MLOAD_GENERAL is 0xfb. If the operation is MLOAD_GENERAL, lv.opcode_bits[0] = 1 + let filter = lv.op.m_op_general * lv.opcode_bits[0]; let (addr_context, addr_segment, addr_virtual) = get_addr(lv); @@ -38,14 +41,25 @@ fn eval_packed_load( for &channel in &lv.mem_channels[4..NUM_GP_CHANNELS - 1] { yield_constr.constraint(filter * channel.used); } + + // Stack constraints + stack::eval_packed_one( + lv, + nv, + filter, + stack::MLOAD_GENERAL_OP.unwrap(), + yield_constr, + ); } fn eval_ext_circuit_load, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, + nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { - let filter = lv.op.mload_general; + let mut filter = lv.op.m_op_general; + filter = builder.mul_extension(filter, lv.opcode_bits[0]); let (addr_context, addr_segment, addr_virtual) = get_addr(lv); @@ -82,13 +96,24 @@ fn eval_ext_circuit_load, const D: usize>( let constr = builder.mul_extension(filter, channel.used); yield_constr.constraint(builder, constr); } + + // Stack constraints + stack::eval_ext_circuit_one( + builder, + lv, + nv, + filter, + stack::MLOAD_GENERAL_OP.unwrap(), + yield_constr, + ); } fn eval_packed_store( lv: &CpuColumnsView

, + nv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { - let filter = lv.op.mstore_general; + let filter = lv.op.m_op_general * (P::ONES - lv.opcode_bits[0]); let (addr_context, addr_segment, addr_virtual) = get_addr(lv); @@ -107,14 +132,27 @@ fn eval_packed_store( for &channel in &lv.mem_channels[5..] { yield_constr.constraint(filter * channel.used); } + + // Stack constraints + stack::eval_packed_one( + lv, + nv, + filter, + stack::MSTORE_GENERAL_OP.unwrap(), + yield_constr, + ); } fn eval_ext_circuit_store, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, + nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { - let filter = lv.op.mstore_general; + let mut filter = lv.op.m_op_general; + let one = builder.one_extension(); + let minus = builder.sub_extension(one, lv.opcode_bits[0]); + filter = builder.mul_extension(filter, minus); let (addr_context, addr_segment, addr_virtual) = get_addr(lv); @@ -151,21 +189,33 @@ fn eval_ext_circuit_store, const D: usize>( let constr = builder.mul_extension(filter, channel.used); yield_constr.constraint(builder, constr); } + + // Stack constraints + stack::eval_ext_circuit_one( + builder, + lv, + nv, + filter, + stack::MSTORE_GENERAL_OP.unwrap(), + yield_constr, + ); } pub fn eval_packed( lv: &CpuColumnsView

, + nv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { - eval_packed_load(lv, yield_constr); - eval_packed_store(lv, yield_constr); + eval_packed_load(lv, nv, yield_constr); + eval_packed_store(lv, nv, yield_constr); } pub fn eval_ext_circuit, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, + nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { - eval_ext_circuit_load(builder, lv, yield_constr); - eval_ext_circuit_store(builder, lv, yield_constr); + eval_ext_circuit_load(builder, lv, nv, yield_constr); + eval_ext_circuit_store(builder, lv, nv, yield_constr); } diff --git a/evm/src/cpu/mod.rs b/evm/src/cpu/mod.rs index 91b04cf487..b7312147b4 100644 --- a/evm/src/cpu/mod.rs +++ b/evm/src/cpu/mod.rs @@ -6,6 +6,7 @@ pub mod cpu_stark; pub(crate) mod decode; mod dup_swap; mod gas; +mod halt; mod jumps; pub mod kernel; pub(crate) mod membus; diff --git a/evm/src/cpu/modfp254.rs b/evm/src/cpu/modfp254.rs index e6a2815d19..86f08052ef 100644 --- a/evm/src/cpu/modfp254.rs +++ b/evm/src/cpu/modfp254.rs @@ -19,7 +19,7 @@ pub fn eval_packed( lv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { - let filter = lv.op.addfp254 + lv.op.mulfp254 + lv.op.subfp254; + let filter = lv.op.fp254_op; // We want to use all the same logic as the usual mod operations, but without needing to read // the modulus from the stack. We simply constrain `mem_channels[2]` to be our prime (that's @@ -36,7 +36,7 @@ pub fn eval_ext_circuit, const D: usize>( lv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { - let filter = builder.add_many_extension([lv.op.addfp254, lv.op.mulfp254, lv.op.subfp254]); + let filter = lv.op.fp254_op; // We want to use all the same logic as the usual mod operations, but without needing to read // the modulus from the stack. We simply constrain `mem_channels[2]` to be our prime (that's diff --git a/evm/src/cpu/shift.rs b/evm/src/cpu/shift.rs index a8acf5d482..a424929798 100644 --- a/evm/src/cpu/shift.rs +++ b/evm/src/cpu/shift.rs @@ -13,7 +13,7 @@ pub(crate) fn eval_packed( lv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { - let is_shift = lv.op.shl + lv.op.shr; + let is_shift = lv.op.shift; let displacement = lv.mem_channels[0]; // holds the shift displacement d let two_exp = lv.mem_channels[2]; // holds 2^d @@ -64,7 +64,7 @@ pub(crate) fn eval_ext_circuit, const D: usize>( lv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { - let is_shift = builder.add_extension(lv.op.shl, lv.op.shr); + let is_shift = lv.op.shift; let displacement = lv.mem_channels[0]; let two_exp = lv.mem_channels[2]; diff --git a/evm/src/cpu/simple_logic/eq_iszero.rs b/evm/src/cpu/simple_logic/eq_iszero.rs index f16901f58f..7be021caa6 100644 --- a/evm/src/cpu/simple_logic/eq_iszero.rs +++ b/evm/src/cpu/simple_logic/eq_iszero.rs @@ -51,6 +51,7 @@ pub fn generate_pinv_diff(val0: U256, val1: U256, lv: &mut CpuColumnsV pub fn eval_packed( lv: &CpuColumnsView

, + nv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { let logic = lv.general.logic(); @@ -94,9 +95,10 @@ pub fn eval_packed( yield_constr.constraint(eq_or_iszero_filter * (dot - unequal)); // Stack constraints. - stack::eval_packed_one(lv, eq_filter, EQ_STACK_BEHAVIOR.unwrap(), yield_constr); + stack::eval_packed_one(lv, nv, eq_filter, EQ_STACK_BEHAVIOR.unwrap(), yield_constr); stack::eval_packed_one( lv, + nv, iszero_filter, IS_ZERO_STACK_BEHAVIOR.unwrap(), yield_constr, @@ -106,6 +108,7 @@ pub fn eval_packed( pub fn eval_ext_circuit, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, + nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { let zero = builder.zero_extension(); @@ -173,6 +176,7 @@ pub fn eval_ext_circuit, const D: usize>( stack::eval_ext_circuit_one( builder, lv, + nv, eq_filter, EQ_STACK_BEHAVIOR.unwrap(), yield_constr, @@ -180,6 +184,7 @@ pub fn eval_ext_circuit, const D: usize>( stack::eval_ext_circuit_one( builder, lv, + nv, iszero_filter, IS_ZERO_STACK_BEHAVIOR.unwrap(), yield_constr, diff --git a/evm/src/cpu/simple_logic/mod.rs b/evm/src/cpu/simple_logic/mod.rs index 03d2dd1584..9b4e60b016 100644 --- a/evm/src/cpu/simple_logic/mod.rs +++ b/evm/src/cpu/simple_logic/mod.rs @@ -11,17 +11,19 @@ use crate::cpu::columns::CpuColumnsView; pub fn eval_packed( lv: &CpuColumnsView

, + nv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { not::eval_packed(lv, yield_constr); - eq_iszero::eval_packed(lv, yield_constr); + eq_iszero::eval_packed(lv, nv, yield_constr); } pub fn eval_ext_circuit, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, + nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { not::eval_ext_circuit(builder, lv, yield_constr); - eq_iszero::eval_ext_circuit(builder, lv, yield_constr); + eq_iszero::eval_ext_circuit(builder, lv, nv, yield_constr); } diff --git a/evm/src/cpu/stack.rs b/evm/src/cpu/stack.rs index cfeaa1b0b5..28abf077cb 100644 --- a/evm/src/cpu/stack.rs +++ b/evm/src/cpu/stack.rs @@ -44,35 +44,31 @@ pub(crate) const JUMPI_OP: Option = Some(StackBehavior { disable_other_channels: false, }); +pub(crate) const MLOAD_GENERAL_OP: Option = Some(StackBehavior { + num_pops: 3, + pushes: true, + disable_other_channels: false, +}); + +pub(crate) const MSTORE_GENERAL_OP: Option = Some(StackBehavior { + num_pops: 4, + pushes: false, + disable_other_channels: false, +}); + // AUDITORS: If the value below is `None`, then the operation must be manually checked to ensure // that every general-purpose memory channel is either disabled or has its read flag and address // propertly constrained. The same applies when `disable_other_channels` is set to `false`, // except the first `num_pops` and the last `pushes as usize` channels have their read flag and // address constrained automatically in this file. const STACK_BEHAVIORS: OpsColumnsView> = OpsColumnsView { - add: BASIC_BINARY_OP, - mul: BASIC_BINARY_OP, - sub: BASIC_BINARY_OP, - div: BASIC_BINARY_OP, - mod_: BASIC_BINARY_OP, - addmod: BASIC_TERNARY_OP, - mulmod: BASIC_TERNARY_OP, - addfp254: BASIC_BINARY_OP, - mulfp254: BASIC_BINARY_OP, - subfp254: BASIC_BINARY_OP, - submod: BASIC_TERNARY_OP, - lt: BASIC_BINARY_OP, - gt: BASIC_BINARY_OP, + binary_op: BASIC_BINARY_OP, + ternary_op: BASIC_TERNARY_OP, + fp254_op: BASIC_BINARY_OP, eq_iszero: None, // EQ is binary, IS_ZERO is unary. logic_op: BASIC_BINARY_OP, not: BASIC_UNARY_OP, - byte: BASIC_BINARY_OP, - shl: Some(StackBehavior { - num_pops: 2, - pushes: true, - disable_other_channels: false, - }), - shr: Some(StackBehavior { + shift: Some(StackBehavior { num_pops: 2, pushes: true, disable_other_channels: false, @@ -123,16 +119,7 @@ const STACK_BEHAVIORS: OpsColumnsView> = OpsColumnsView { pushes: false, disable_other_channels: true, }), - mload_general: Some(StackBehavior { - num_pops: 3, - pushes: true, - disable_other_channels: false, - }), - mstore_general: Some(StackBehavior { - num_pops: 4, - pushes: false, - disable_other_channels: false, - }), + m_op_general: None, syscall: Some(StackBehavior { num_pops: 0, pushes: true, @@ -150,6 +137,7 @@ pub(crate) const IS_ZERO_STACK_BEHAVIOR: Option = BASIC_UNARY_OP; pub(crate) fn eval_packed_one( lv: &CpuColumnsView

, + nv: &CpuColumnsView

, filter: P, stack_behavior: StackBehavior, yield_constr: &mut ConstraintConsumer

, @@ -195,15 +183,21 @@ pub(crate) fn eval_packed_one( yield_constr.constraint(filter * channel.used); } } + + // Constrain new stack length. + let num_pops = P::Scalar::from_canonical_usize(stack_behavior.num_pops); + let push = P::Scalar::from_canonical_usize(stack_behavior.pushes as usize); + yield_constr.constraint_transition(filter * (nv.stack_len - (lv.stack_len - num_pops + push))); } pub fn eval_packed( lv: &CpuColumnsView

, + nv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { for (op, stack_behavior) in izip!(lv.op.into_iter(), STACK_BEHAVIORS.into_iter()) { if let Some(stack_behavior) = stack_behavior { - eval_packed_one(lv, op, stack_behavior, yield_constr); + eval_packed_one(lv, nv, op, stack_behavior, yield_constr); } } } @@ -211,6 +205,7 @@ pub fn eval_packed( pub(crate) fn eval_ext_circuit_one, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, + nv: &CpuColumnsView>, filter: ExtensionTarget, stack_behavior: StackBehavior, yield_constr: &mut RecursiveConstraintConsumer, @@ -308,16 +303,27 @@ pub(crate) fn eval_ext_circuit_one, const D: usize> yield_constr.constraint(builder, constr); } } + + // Constrain new stack length. + let diff = builder.constant_extension( + F::Extension::from_canonical_usize(stack_behavior.num_pops) + - F::Extension::from_canonical_usize(stack_behavior.pushes as usize), + ); + let diff = builder.sub_extension(lv.stack_len, diff); + let diff = builder.sub_extension(nv.stack_len, diff); + let constr = builder.mul_extension(filter, diff); + yield_constr.constraint_transition(builder, constr); } pub fn eval_ext_circuit, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, + nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { for (op, stack_behavior) in izip!(lv.op.into_iter(), STACK_BEHAVIORS.into_iter()) { if let Some(stack_behavior) = stack_behavior { - eval_ext_circuit_one(builder, lv, op, stack_behavior, yield_constr); + eval_ext_circuit_one(builder, lv, nv, op, stack_behavior, yield_constr); } } } diff --git a/evm/src/cross_table_lookup.rs b/evm/src/cross_table_lookup.rs index a9b90428ca..315bf42f7b 100644 --- a/evm/src/cross_table_lookup.rs +++ b/evm/src/cross_table_lookup.rs @@ -25,6 +25,7 @@ use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; #[derive(Clone, Debug)] pub struct Column { linear_combination: Vec<(usize, F)>, + next_row_linear_combination: Vec<(usize, F)>, constant: F, } @@ -32,6 +33,7 @@ impl Column { pub fn single(c: usize) -> Self { Self { linear_combination: vec![(c, F::ONE)], + next_row_linear_combination: vec![], constant: F::ZERO, } } @@ -42,9 +44,24 @@ impl Column { cs.into_iter().map(|c| Self::single(*c.borrow())) } + pub fn single_next_row(c: usize) -> Self { + Self { + linear_combination: vec![], + next_row_linear_combination: vec![(c, F::ONE)], + constant: F::ZERO, + } + } + + pub fn singles_next_row>>( + cs: I, + ) -> impl Iterator { + cs.into_iter().map(|c| Self::single_next_row(*c.borrow())) + } + pub fn constant(constant: F) -> Self { Self { linear_combination: vec![], + next_row_linear_combination: vec![], constant, } } @@ -70,6 +87,34 @@ impl Column { ); Self { linear_combination: v, + next_row_linear_combination: vec![], + constant, + } + } + + pub fn linear_combination_and_next_row_with_constant>( + iter: I, + next_row_iter: I, + constant: F, + ) -> Self { + let v = iter.into_iter().collect::>(); + let next_row_v = next_row_iter.into_iter().collect::>(); + + assert!(!v.is_empty() || !next_row_v.is_empty()); + debug_assert_eq!( + v.iter().map(|(c, _)| c).unique().count(), + v.len(), + "Duplicate columns." + ); + debug_assert_eq!( + next_row_v.iter().map(|(c, _)| c).unique().count(), + next_row_v.len(), + "Duplicate columns." + ); + + Self { + linear_combination: v, + next_row_linear_combination: next_row_v, constant, } } @@ -106,13 +151,43 @@ impl Column { + FE::from_basefield(self.constant) } + pub fn eval_with_next(&self, v: &[P], next_v: &[P]) -> P + where + FE: FieldExtension, + P: PackedField, + { + self.linear_combination + .iter() + .map(|&(c, f)| v[c] * FE::from_basefield(f)) + .sum::

() + + self + .next_row_linear_combination + .iter() + .map(|&(c, f)| next_v[c] * FE::from_basefield(f)) + .sum::

() + + FE::from_basefield(self.constant) + } + /// Evaluate on an row of a table given in column-major form. pub fn eval_table(&self, table: &[PolynomialValues], row: usize) -> F { - self.linear_combination + let mut res = self + .linear_combination .iter() .map(|&(c, f)| table[c].values[row] * f) .sum::() - + self.constant + + self.constant; + + // If we access the next row at the last row, for sanity, we consider the next row's values to be 0. + // If CTLs are correctly written, the filter should be 0 in that case anyway. + if !self.next_row_linear_combination.is_empty() && row < table.len() - 1 { + res += self + .next_row_linear_combination + .iter() + .map(|&(c, f)| table[c].values[row + 1] * f) + .sum::(); + } + + res } pub fn eval_circuit( @@ -136,6 +211,36 @@ impl Column { let constant = builder.constant_extension(F::Extension::from_basefield(self.constant)); builder.inner_product_extension(F::ONE, constant, pairs) } + + pub fn eval_with_next_circuit( + &self, + builder: &mut CircuitBuilder, + v: &[ExtensionTarget], + next_v: &[ExtensionTarget], + ) -> ExtensionTarget + where + F: RichField + Extendable, + { + let mut pairs = self + .linear_combination + .iter() + .map(|&(c, f)| { + ( + v[c], + builder.constant_extension(F::Extension::from_basefield(f)), + ) + }) + .collect::>(); + let next_row_pairs = self.next_row_linear_combination.iter().map(|&(c, f)| { + ( + next_v[c], + builder.constant_extension(F::Extension::from_basefield(f)), + ) + }); + pairs.extend(next_row_pairs); + let constant = builder.constant_extension(F::Extension::from_basefield(self.constant)); + builder.inner_product_extension(F::ONE, constant, pairs) + } } #[derive(Clone, Debug)] @@ -276,7 +381,7 @@ fn partial_products( let mut partial_prod = F::ONE; let degree = trace[0].len(); let mut res = Vec::with_capacity(degree); - for i in 0..degree { + for i in (0..degree).rev() { let filter = if let Some(column) = filter_column { column.eval_table(trace, i) } else { @@ -293,6 +398,7 @@ fn partial_products( }; res.push(partial_prod); } + res.reverse(); res.into() } @@ -362,6 +468,10 @@ impl<'a, F: RichField + Extendable, const D: usize> } } +/// CTL Z partial products are upside down: the complete product is on the first row, and +/// the first term is on the last row. This allows the transition constraint to be: +/// Z(w) = Z(gw) * combine(w) where combine is called on the local row +/// and not the next. This enables CTLs across two rows. pub(crate) fn eval_cross_table_lookup_checks( vars: StarkEvaluationVars, ctl_vars: &[CtlCheckVars], @@ -380,27 +490,23 @@ pub(crate) fn eval_cross_table_lookup_checks P { - let evals = columns.iter().map(|c| c.eval(v)).collect::>(); - challenges.combine(evals.iter()) - }; - let filter = |v: &[P]| -> P { - if let Some(column) = filter_column { - column.eval(v) - } else { - P::ONES - } + + let evals = columns + .iter() + .map(|c| c.eval_with_next(vars.local_values, vars.next_values)) + .collect::>(); + let combined = challenges.combine(evals.iter()); + let local_filter = if let Some(column) = filter_column { + column.eval(vars.local_values) + } else { + P::ONES }; - let local_filter = filter(vars.local_values); - let next_filter = filter(vars.next_values); - let select = |filter, x| filter * x + P::ONES - filter; - - // Check value of `Z(1)` - consumer.constraint_first_row(*local_z - select(local_filter, combine(vars.local_values))); - // Check `Z(gw) = combination * Z(w)` - consumer.constraint_transition( - *local_z * select(next_filter, combine(vars.next_values)) - *next_z, - ); + let select = local_filter * combined + P::ONES - local_filter; + + // Check value of `Z(g^(n-1))` + consumer.constraint_last_row(*local_z - select); + // Check `Z(w) = combination * Z(gw)` + consumer.constraint_transition(*next_z * select - *local_z); } } @@ -493,11 +599,6 @@ pub(crate) fn eval_cross_table_lookup_checks_circuit< } else { one }; - let next_filter = if let Some(column) = filter_column { - column.eval_circuit(builder, vars.next_values) - } else { - one - }; fn select, const D: usize>( builder: &mut CircuitBuilder, filter: ExtensionTarget, @@ -508,34 +609,30 @@ pub(crate) fn eval_cross_table_lookup_checks_circuit< builder.mul_add_extension(filter, x, tmp) // filter * x + 1 - filter } - // Check value of `Z(1)` - let local_columns_eval = columns - .iter() - .map(|c| c.eval_circuit(builder, vars.local_values)) - .collect::>(); - let combined_local = challenges.combine_circuit(builder, &local_columns_eval); - let selected_local = select(builder, local_filter, combined_local); - let first_row = builder.sub_extension(*local_z, selected_local); - consumer.constraint_first_row(builder, first_row); - // Check `Z(gw) = combination * Z(w)` - let next_columns_eval = columns + let evals = columns .iter() - .map(|c| c.eval_circuit(builder, vars.next_values)) + .map(|c| c.eval_with_next_circuit(builder, vars.local_values, vars.next_values)) .collect::>(); - let combined_next = challenges.combine_circuit(builder, &next_columns_eval); - let selected_next = select(builder, next_filter, combined_next); - let transition = builder.mul_sub_extension(*local_z, selected_next, *next_z); + + let combined = challenges.combine_circuit(builder, &evals); + let select = select(builder, local_filter, combined); + + // Check value of `Z(g^(n-1))` + let last_row = builder.sub_extension(*local_z, select); + consumer.constraint_last_row(builder, last_row); + // Check `Z(w) = combination * Z(gw)` + let transition = builder.mul_sub_extension(*next_z, select, *local_z); consumer.constraint_transition(builder, transition); } } pub(crate) fn verify_cross_table_lookups, const D: usize>( cross_table_lookups: &[CrossTableLookup], - ctl_zs_lasts: [Vec; NUM_TABLES], + ctl_zs_first: [Vec; NUM_TABLES], ctl_extra_looking_products: Vec>, config: &StarkConfig, ) -> Result<()> { - let mut ctl_zs_openings = ctl_zs_lasts.iter().map(|v| v.iter()).collect::>(); + let mut ctl_zs_openings = ctl_zs_first.iter().map(|v| v.iter()).collect::>(); for ( index, CrossTableLookup { @@ -568,11 +665,11 @@ pub(crate) fn verify_cross_table_lookups, const D: pub(crate) fn verify_cross_table_lookups_circuit, const D: usize>( builder: &mut CircuitBuilder, cross_table_lookups: Vec>, - ctl_zs_lasts: [Vec; NUM_TABLES], + ctl_zs_first: [Vec; NUM_TABLES], ctl_extra_looking_products: Vec>, inner_config: &StarkConfig, ) { - let mut ctl_zs_openings = ctl_zs_lasts.iter().map(|v| v.iter()).collect::>(); + let mut ctl_zs_openings = ctl_zs_first.iter().map(|v| v.iter()).collect::>(); for CrossTableLookup { looking_tables, looked_table, diff --git a/evm/src/fixed_recursive_verifier.rs b/evm/src/fixed_recursive_verifier.rs index 8b15cde7b1..02887dd939 100644 --- a/evm/src/fixed_recursive_verifier.rs +++ b/evm/src/fixed_recursive_verifier.rs @@ -525,7 +525,7 @@ where verify_cross_table_lookups_circuit::( &mut builder, all_cross_table_lookups(), - pis.map(|p| p.ctl_zs_last), + pis.map(|p| p.ctl_zs_first), extra_looking_products, stark_config, ); diff --git a/evm/src/generation/mod.rs b/evm/src/generation/mod.rs index 13c6670ba6..35078e0784 100644 --- a/evm/src/generation/mod.rs +++ b/evm/src/generation/mod.rs @@ -16,6 +16,7 @@ use GlobalMetadata::{ use crate::all_stark::{AllStark, NUM_TABLES}; use crate::config::StarkConfig; use crate::cpu::bootstrap_kernel::generate_bootstrap_kernel; +use crate::cpu::columns::CpuColumnsView; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; use crate::generation::outputs::{get_outputs, GenerationOutputs}; @@ -278,26 +279,36 @@ pub fn generate_traces, const D: usize>( fn simulate_cpu, const D: usize>( state: &mut GenerationState, ) -> anyhow::Result<()> { - let halt_pc0 = KERNEL.global_labels["halt_pc0"]; - let halt_pc1 = KERNEL.global_labels["halt_pc1"]; + let halt_pc = KERNEL.global_labels["halt"]; - let mut already_in_halt_loop = false; loop { // If we've reached the kernel's halt routine, and our trace length is a power of 2, stop. let pc = state.registers.program_counter; - let in_halt_loop = state.registers.is_kernel && (pc == halt_pc0 || pc == halt_pc1); - if in_halt_loop && !already_in_halt_loop { + let halt = state.registers.is_kernel && pc == halt_pc; + if halt { log::info!("CPU halted after {} cycles", state.traces.clock()); - } - already_in_halt_loop |= in_halt_loop; - - transition(state)?; - if already_in_halt_loop && state.traces.clock().is_power_of_two() { + // Padding + let mut row = CpuColumnsView::::default(); + row.clock = F::from_canonical_usize(state.traces.clock()); + row.context = F::from_canonical_usize(state.registers.context); + row.program_counter = F::from_canonical_usize(pc); + row.is_kernel_mode = F::ONE; + row.gas = F::from_canonical_u64(state.registers.gas_used); + row.stack_len = F::from_canonical_usize(state.registers.stack_len); + + loop { + state.traces.push_cpu(row); + row.clock += F::ONE; + if state.traces.clock().is_power_of_two() { + break; + } + } log::info!("CPU trace padded to {} cycles", state.traces.clock()); - break; + + return Ok(()); } - } - Ok(()) + transition(state)?; + } } diff --git a/evm/src/keccak_sponge/columns.rs b/evm/src/keccak_sponge/columns.rs index 44f66a5d92..431c09e092 100644 --- a/evm/src/keccak_sponge/columns.rs +++ b/evm/src/keccak_sponge/columns.rs @@ -5,11 +5,14 @@ use crate::util::{indices_arr, transmute_no_compile_time_size_checks}; pub(crate) const KECCAK_WIDTH_BYTES: usize = 200; pub(crate) const KECCAK_WIDTH_U32S: usize = KECCAK_WIDTH_BYTES / 4; +pub(crate) const KECCAK_WIDTH_MINUS_DIGEST_U32S: usize = + (KECCAK_WIDTH_BYTES - KECCAK_DIGEST_BYTES) / 4; pub(crate) const KECCAK_RATE_BYTES: usize = 136; pub(crate) const KECCAK_RATE_U32S: usize = KECCAK_RATE_BYTES / 4; pub(crate) const KECCAK_CAPACITY_BYTES: usize = 64; pub(crate) const KECCAK_CAPACITY_U32S: usize = KECCAK_CAPACITY_BYTES / 4; pub(crate) const KECCAK_DIGEST_BYTES: usize = 32; +pub(crate) const KECCAK_DIGEST_U32S: usize = KECCAK_DIGEST_BYTES / 4; #[repr(C)] #[derive(Eq, PartialEq, Debug)] @@ -52,10 +55,14 @@ pub(crate) struct KeccakSpongeColumnsView { pub xored_rate_u32s: [T; KECCAK_RATE_U32S], /// The entire state (rate + capacity) of the sponge, encoded as 32-bit chunks, after the - /// permutation is applied. - pub updated_state_u32s: [T; KECCAK_WIDTH_U32S], - - pub updated_state_bytes: [T; KECCAK_DIGEST_BYTES], + /// permutation is applied, minus the first limbs where the digest is extracted from. + /// Those missing limbs can be recomputed from their corresponding bytes stored in + /// `updated_digest_state_bytes`. + pub partial_updated_state_u32s: [T; KECCAK_WIDTH_MINUS_DIGEST_U32S], + + /// The first part of the state of the sponge, seen as bytes, after the permutation is applied. + /// This also represents the output digest of the Keccak sponge during the squeezing phase. + pub updated_digest_state_bytes: [T; KECCAK_DIGEST_BYTES], } // `u8` is guaranteed to have a `size_of` of 1. diff --git a/evm/src/keccak_sponge/keccak_sponge_stark.rs b/evm/src/keccak_sponge/keccak_sponge_stark.rs index 5f1a49ccc2..d78e965141 100644 --- a/evm/src/keccak_sponge/keccak_sponge_stark.rs +++ b/evm/src/keccak_sponge/keccak_sponge_stark.rs @@ -28,7 +28,7 @@ pub(crate) fn ctl_looked_data() -> Vec> { let mut outputs = Vec::with_capacity(8); for i in (0..8).rev() { let cur_col = Column::linear_combination( - cols.updated_state_bytes[i * 4..(i + 1) * 4] + cols.updated_digest_state_bytes[i * 4..(i + 1) * 4] .iter() .enumerate() .map(|(j, &c)| (c, F::from_canonical_u64(1 << (24 - 8 * j)))), @@ -49,15 +49,30 @@ pub(crate) fn ctl_looked_data() -> Vec> { pub(crate) fn ctl_looking_keccak() -> Vec> { let cols = KECCAK_SPONGE_COL_MAP; - Column::singles( + let mut res: Vec<_> = Column::singles( [ cols.xored_rate_u32s.as_slice(), &cols.original_capacity_u32s, - &cols.updated_state_u32s, ] .concat(), ) - .collect() + .collect(); + + // We recover the 32-bit digest limbs from their corresponding bytes, + // and then append them to the rest of the updated state limbs. + let digest_u32s = cols.updated_digest_state_bytes.chunks_exact(4).map(|c| { + Column::linear_combination( + c.iter() + .enumerate() + .map(|(i, &b)| (b, F::from_canonical_usize(1 << (8 * i)))), + ) + }); + + res.extend(digest_u32s); + + res.extend(Column::singles(&cols.partial_updated_state_u32s)); + + res } pub(crate) fn ctl_looking_memory(i: usize) -> Vec> { @@ -239,7 +254,21 @@ impl, const D: usize> KeccakSpongeStark { block.try_into().unwrap(), ); - sponge_state = row.updated_state_u32s.map(|f| f.to_canonical_u64() as u32); + sponge_state[..KECCAK_DIGEST_U32S] + .iter_mut() + .zip(row.updated_digest_state_bytes.chunks_exact(4)) + .for_each(|(s, bs)| { + *s = bs + .iter() + .enumerate() + .map(|(i, b)| (b.to_canonical_u64() as u32) << (8 * i)) + .sum(); + }); + + sponge_state[KECCAK_DIGEST_U32S..] + .iter_mut() + .zip(row.partial_updated_state_u32s) + .for_each(|(s, x)| *s = x.to_canonical_u64() as u32); rows.push(row.into()); already_absorbed_bytes += KECCAK_RATE_BYTES; @@ -357,24 +386,33 @@ impl, const D: usize> KeccakSpongeStark { row.xored_rate_u32s = xored_rate_u32s.map(F::from_canonical_u32); keccakf_u32s(&mut sponge_state); - row.updated_state_u32s = sponge_state.map(F::from_canonical_u32); - let is_final_block = row.is_final_input_len.iter().copied().sum::() == F::ONE; - if is_final_block { - for (l, &elt) in row.updated_state_u32s[..8].iter().enumerate() { + // Store all but the first `KECCAK_DIGEST_U32S` limbs in the updated state. + // Those missing limbs will be broken down into bytes and stored separately. + row.partial_updated_state_u32s.copy_from_slice( + &sponge_state[KECCAK_DIGEST_U32S..] + .iter() + .copied() + .map(|i| F::from_canonical_u32(i)) + .collect::>(), + ); + sponge_state[..KECCAK_DIGEST_U32S] + .iter() + .enumerate() + .for_each(|(l, &elt)| { let mut cur_elt = elt; (0..4).for_each(|i| { - row.updated_state_bytes[l * 4 + i] = - F::from_canonical_u32((cur_elt.to_canonical_u64() & 0xFF) as u32); - cur_elt = F::from_canonical_u64(cur_elt.to_canonical_u64() >> 8); + row.updated_digest_state_bytes[l * 4 + i] = + F::from_canonical_u32(cur_elt & 0xFF); + cur_elt >>= 8; }); - let mut s = row.updated_state_bytes[l * 4].to_canonical_u64(); + // 32-bit limb reconstruction consistency check. + let mut s = row.updated_digest_state_bytes[l * 4].to_canonical_u64(); for i in 1..4 { - s += row.updated_state_bytes[l * 4 + i].to_canonical_u64() << (8 * i); + s += row.updated_digest_state_bytes[l * 4 + i].to_canonical_u64() << (8 * i); } - assert_eq!(elt, F::from_canonical_u64(s), "not equal"); - } - } + assert_eq!(elt as u64, s, "not equal"); + }) } fn generate_padding_row(&self) -> [F; NUM_KECCAK_SPONGE_COLUMNS] { @@ -445,26 +483,39 @@ impl, const D: usize> Stark for KeccakSpongeS ); // If this is a full-input block, the next row's "before" should match our "after" state. + for (current_bytes_after, next_before) in local_values + .updated_digest_state_bytes + .chunks_exact(4) + .zip(&next_values.original_rate_u32s[..KECCAK_DIGEST_U32S]) + { + let mut current_after = current_bytes_after[0]; + for i in 1..4 { + current_after += + current_bytes_after[i] * P::from(FE::from_canonical_usize(1 << (8 * i))); + } + yield_constr + .constraint_transition(is_full_input_block * (*next_before - current_after)); + } for (¤t_after, &next_before) in local_values - .updated_state_u32s + .partial_updated_state_u32s .iter() - .zip(next_values.original_rate_u32s.iter()) + .zip(next_values.original_rate_u32s[KECCAK_DIGEST_U32S..].iter()) { yield_constr.constraint_transition(is_full_input_block * (next_before - current_after)); } for (¤t_after, &next_before) in local_values - .updated_state_u32s + .partial_updated_state_u32s .iter() - .skip(KECCAK_RATE_U32S) + .skip(KECCAK_RATE_U32S - KECCAK_DIGEST_U32S) .zip(next_values.original_capacity_u32s.iter()) { yield_constr.constraint_transition(is_full_input_block * (next_before - current_after)); } - // If this is a full-input block, the next row's already_absorbed_bytes should be ours plus 136. + // If this is a full-input block, the next row's already_absorbed_bytes should be ours plus `KECCAK_RATE_BYTES`. yield_constr.constraint_transition( is_full_input_block - * (already_absorbed_bytes + P::from(FE::from_canonical_u64(136)) + * (already_absorbed_bytes + P::from(FE::from_canonical_usize(KECCAK_RATE_BYTES)) - next_values.already_absorbed_bytes), ); @@ -481,16 +532,6 @@ impl, const D: usize> Stark for KeccakSpongeS let entry_match = offset - P::from(FE::from_canonical_usize(i)); yield_constr.constraint(is_final_len * entry_match); } - - // Adding constraints for byte columns. - for (l, &elt) in local_values.updated_state_u32s[..8].iter().enumerate() { - let mut s = local_values.updated_state_bytes[l * 4]; - for i in 1..4 { - s += local_values.updated_state_bytes[l * 4 + i] - * P::from(FE::from_canonical_usize(1 << (8 * i))); - } - yield_constr.constraint(is_final_block * (s - elt)); - } } fn eval_ext_circuit( @@ -566,19 +607,36 @@ impl, const D: usize> Stark for KeccakSpongeS yield_constr.constraint_transition(builder, constraint); // If this is a full-input block, the next row's "before" should match our "after" state. + for (current_bytes_after, next_before) in local_values + .updated_digest_state_bytes + .chunks_exact(4) + .zip(&next_values.original_rate_u32s[..KECCAK_DIGEST_U32S]) + { + let mut current_after = current_bytes_after[0]; + for i in 1..4 { + current_after = builder.mul_const_add_extension( + F::from_canonical_usize(1 << (8 * i)), + current_bytes_after[i], + current_after, + ); + } + let diff = builder.sub_extension(*next_before, current_after); + let constraint = builder.mul_extension(is_full_input_block, diff); + yield_constr.constraint_transition(builder, constraint); + } for (¤t_after, &next_before) in local_values - .updated_state_u32s + .partial_updated_state_u32s .iter() - .zip(next_values.original_rate_u32s.iter()) + .zip(next_values.original_rate_u32s[KECCAK_DIGEST_U32S..].iter()) { let diff = builder.sub_extension(next_before, current_after); let constraint = builder.mul_extension(is_full_input_block, diff); yield_constr.constraint_transition(builder, constraint); } for (¤t_after, &next_before) in local_values - .updated_state_u32s + .partial_updated_state_u32s .iter() - .skip(KECCAK_RATE_U32S) + .skip(KECCAK_RATE_U32S - KECCAK_DIGEST_U32S) .zip(next_values.original_capacity_u32s.iter()) { let diff = builder.sub_extension(next_before, current_after); @@ -586,9 +644,11 @@ impl, const D: usize> Stark for KeccakSpongeS yield_constr.constraint_transition(builder, constraint); } - // If this is a full-input block, the next row's already_absorbed_bytes should be ours plus 136. - let absorbed_bytes = - builder.add_const_extension(already_absorbed_bytes, F::from_canonical_u64(136)); + // If this is a full-input block, the next row's already_absorbed_bytes should be ours plus `KECCAK_RATE_BYTES`. + let absorbed_bytes = builder.add_const_extension( + already_absorbed_bytes, + F::from_canonical_usize(KECCAK_RATE_BYTES), + ); let absorbed_diff = builder.sub_extension(absorbed_bytes, next_values.already_absorbed_bytes); let constraint = builder.mul_extension(is_full_input_block, absorbed_diff); @@ -615,21 +675,6 @@ impl, const D: usize> Stark for KeccakSpongeS let constraint = builder.mul_extension(is_final_len, entry_match); yield_constr.constraint(builder, constraint); } - - // Adding constraints for byte columns. - for (l, &elt) in local_values.updated_state_u32s[..8].iter().enumerate() { - let mut s = local_values.updated_state_bytes[l * 4]; - for i in 1..4 { - s = builder.mul_const_add_extension( - F::from_canonical_usize(1 << (8 * i)), - local_values.updated_state_bytes[l * 4 + i], - s, - ); - } - let constraint = builder.sub_extension(s, elt); - let constraint = builder.mul_extension(is_final_block, constraint); - yield_constr.constraint(builder, constraint); - } } fn constraint_degree(&self) -> usize { @@ -698,9 +743,10 @@ mod tests { let rows = stark.generate_rows_for_op(op); assert_eq!(rows.len(), 1); let last_row: &KeccakSpongeColumnsView = rows.last().unwrap().borrow(); - let output = last_row.updated_state_u32s[..8] + let output = last_row + .updated_digest_state_bytes .iter() - .flat_map(|x| (x.to_canonical_u64() as u32).to_le_bytes()) + .map(|x| x.to_canonical_u64() as u8) .collect_vec(); assert_eq!(output, expected_output.0); diff --git a/evm/src/proof.rs b/evm/src/proof.rs index 14f22b6791..76f3af32f0 100644 --- a/evm/src/proof.rs +++ b/evm/src/proof.rs @@ -623,7 +623,7 @@ impl, C: GenericConfig, const D: usize> S } pub fn num_ctl_zs(&self) -> usize { - self.openings.ctl_zs_last.len() + self.openings.ctl_zs_first.len() } } @@ -704,8 +704,8 @@ pub struct StarkOpeningSet, const D: usize> { pub permutation_ctl_zs: Vec, /// Openings of permutations and cross-table lookups `Z` polynomials at `g * zeta`. pub permutation_ctl_zs_next: Vec, - /// Openings of cross-table lookups `Z` polynomials at `g^-1`. - pub ctl_zs_last: Vec, + /// Openings of cross-table lookups `Z` polynomials at `1`. + pub ctl_zs_first: Vec, /// Openings of quotient polynomials at `zeta`. pub quotient_polys: Vec, } @@ -717,7 +717,6 @@ impl, const D: usize> StarkOpeningSet { trace_commitment: &PolynomialBatch, permutation_ctl_zs_commitment: &PolynomialBatch, quotient_commitment: &PolynomialBatch, - degree_bits: usize, num_permutation_zs: usize, ) -> Self { let eval_commitment = |z: F::Extension, c: &PolynomialBatch| { @@ -738,10 +737,8 @@ impl, const D: usize> StarkOpeningSet { next_values: eval_commitment(zeta_next, trace_commitment), permutation_ctl_zs: eval_commitment(zeta, permutation_ctl_zs_commitment), permutation_ctl_zs_next: eval_commitment(zeta_next, permutation_ctl_zs_commitment), - ctl_zs_last: eval_commitment_base( - F::primitive_root_of_unity(degree_bits).inverse(), - permutation_ctl_zs_commitment, - )[num_permutation_zs..] + ctl_zs_first: eval_commitment_base(F::ONE, permutation_ctl_zs_commitment) + [num_permutation_zs..] .to_vec(), quotient_polys: eval_commitment(zeta, quotient_commitment), } @@ -765,10 +762,10 @@ impl, const D: usize> StarkOpeningSet { .copied() .collect_vec(), }; - debug_assert!(!self.ctl_zs_last.is_empty()); - let ctl_last_batch = FriOpeningBatch { + debug_assert!(!self.ctl_zs_first.is_empty()); + let ctl_first_batch = FriOpeningBatch { values: self - .ctl_zs_last + .ctl_zs_first .iter() .copied() .map(F::Extension::from_basefield) @@ -776,7 +773,7 @@ impl, const D: usize> StarkOpeningSet { }; FriOpenings { - batches: vec![zeta_batch, zeta_next_batch, ctl_last_batch], + batches: vec![zeta_batch, zeta_next_batch, ctl_first_batch], } } } @@ -787,7 +784,7 @@ pub struct StarkOpeningSetTarget { pub next_values: Vec>, pub permutation_ctl_zs: Vec>, pub permutation_ctl_zs_next: Vec>, - pub ctl_zs_last: Vec, + pub ctl_zs_first: Vec, pub quotient_polys: Vec>, } @@ -797,7 +794,7 @@ impl StarkOpeningSetTarget { buffer.write_target_ext_vec(&self.next_values)?; buffer.write_target_ext_vec(&self.permutation_ctl_zs)?; buffer.write_target_ext_vec(&self.permutation_ctl_zs_next)?; - buffer.write_target_vec(&self.ctl_zs_last)?; + buffer.write_target_vec(&self.ctl_zs_first)?; buffer.write_target_ext_vec(&self.quotient_polys)?; Ok(()) } @@ -807,7 +804,7 @@ impl StarkOpeningSetTarget { let next_values = buffer.read_target_ext_vec::()?; let permutation_ctl_zs = buffer.read_target_ext_vec::()?; let permutation_ctl_zs_next = buffer.read_target_ext_vec::()?; - let ctl_zs_last = buffer.read_target_vec()?; + let ctl_zs_first = buffer.read_target_vec()?; let quotient_polys = buffer.read_target_ext_vec::()?; Ok(Self { @@ -815,7 +812,7 @@ impl StarkOpeningSetTarget { next_values, permutation_ctl_zs, permutation_ctl_zs_next, - ctl_zs_last, + ctl_zs_first, quotient_polys, }) } @@ -838,10 +835,10 @@ impl StarkOpeningSetTarget { .copied() .collect_vec(), }; - debug_assert!(!self.ctl_zs_last.is_empty()); - let ctl_last_batch = FriOpeningBatchTarget { + debug_assert!(!self.ctl_zs_first.is_empty()); + let ctl_first_batch = FriOpeningBatchTarget { values: self - .ctl_zs_last + .ctl_zs_first .iter() .copied() .map(|t| t.to_ext_target(zero)) @@ -849,7 +846,7 @@ impl StarkOpeningSetTarget { }; FriOpeningsTarget { - batches: vec![zeta_batch, zeta_next_batch, ctl_last_batch], + batches: vec![zeta_batch, zeta_next_batch, ctl_first_batch], } } } diff --git a/evm/src/prover.rs b/evm/src/prover.rs index 425634943e..7b960c95a2 100644 --- a/evm/src/prover.rs +++ b/evm/src/prover.rs @@ -454,7 +454,6 @@ where trace_commitment, &permutation_ctl_zs_commitment, "ient_commitment, - degree_bits, stark.num_permutation_batches(config), ); challenger.observe_openings(&openings.to_fri_openings()); @@ -469,7 +468,7 @@ where timing, "compute openings proof", PolynomialBatch::prove_openings( - &stark.fri_instance(zeta, g, degree_bits, ctl_data.len(), config), + &stark.fri_instance(zeta, g, ctl_data.len(), config), &initial_merkle_trees, challenger, &fri_params, diff --git a/evm/src/recursive_verifier.rs b/evm/src/recursive_verifier.rs index 531669c03e..d58344bbf7 100644 --- a/evm/src/recursive_verifier.rs +++ b/evm/src/recursive_verifier.rs @@ -60,7 +60,7 @@ pub struct RecursiveAllProof< pub(crate) struct PublicInputs> { pub(crate) trace_cap: Vec>, - pub(crate) ctl_zs_last: Vec, + pub(crate) ctl_zs_first: Vec, pub(crate) ctl_challenges: GrandProductChallengeSet, pub(crate) challenger_state_before: P, pub(crate) challenger_state_after: P, @@ -86,11 +86,11 @@ impl> Public }; let challenger_state_before = P::new(&mut iter); let challenger_state_after = P::new(&mut iter); - let ctl_zs_last: Vec<_> = iter.collect(); + let ctl_zs_first: Vec<_> = iter.collect(); Self { trace_cap, - ctl_zs_last, + ctl_zs_first, ctl_challenges, challenger_state_before, challenger_state_after, @@ -151,7 +151,7 @@ impl, C: GenericConfig, const D: usize> // Verify the CTL checks. verify_cross_table_lookups::( &cross_table_lookups, - pis.map(|p| p.ctl_zs_last), + pis.map(|p| p.ctl_zs_first), extra_looking_products, inner_config, )?; @@ -351,7 +351,7 @@ where let challenger_state = challenger.compact(&mut builder); builder.register_public_inputs(challenger_state.as_ref()); - builder.register_public_inputs(&proof_target.openings.ctl_zs_last); + builder.register_public_inputs(&proof_target.openings.ctl_zs_first); verify_stark_proof_with_challenges_circuit::( &mut builder, @@ -415,7 +415,7 @@ fn verify_stark_proof_with_challenges_circuit< next_values, permutation_ctl_zs, permutation_ctl_zs_next, - ctl_zs_last, + ctl_zs_first, quotient_polys, } = &proof.openings; let vars = StarkEvaluationTargets { @@ -485,8 +485,7 @@ fn verify_stark_proof_with_challenges_circuit< builder, challenges.stark_zeta, F::primitive_root_of_unity(degree_bits), - degree_bits, - ctl_zs_last.len(), + ctl_zs_first.len(), inner_config, ); builder.verify_fri_proof::( @@ -870,7 +869,7 @@ fn add_virtual_stark_opening_set, S: Stark, c .add_virtual_extension_targets(stark.num_permutation_batches(config) + num_ctl_zs), permutation_ctl_zs_next: builder .add_virtual_extension_targets(stark.num_permutation_batches(config) + num_ctl_zs), - ctl_zs_last: builder.add_virtual_targets(num_ctl_zs), + ctl_zs_first: builder.add_virtual_targets(num_ctl_zs), quotient_polys: builder .add_virtual_extension_targets(stark.quotient_degree_factor() * num_challenges), } diff --git a/evm/src/stark.rs b/evm/src/stark.rs index 72cee0ad60..73b51ada41 100644 --- a/evm/src/stark.rs +++ b/evm/src/stark.rs @@ -84,7 +84,6 @@ pub trait Stark, const D: usize>: Sync { &self, zeta: F::Extension, g: F, - degree_bits: usize, num_ctl_zs: usize, config: &StarkConfig, ) -> FriInstanceInfo { @@ -131,13 +130,13 @@ pub trait Stark, const D: usize>: Sync { point: zeta.scalar_mul(g), polynomials: [trace_info, permutation_ctl_zs_info].concat(), }; - let ctl_last_batch = FriBatchInfo { - point: F::Extension::primitive_root_of_unity(degree_bits).inverse(), + let ctl_first_batch = FriBatchInfo { + point: F::Extension::ONE, polynomials: ctl_zs_info, }; FriInstanceInfo { oracles: vec![trace_oracle, permutation_ctl_oracle, quotient_oracle], - batches: vec![zeta_batch, zeta_next_batch, ctl_last_batch], + batches: vec![zeta_batch, zeta_next_batch, ctl_first_batch], } } @@ -147,7 +146,6 @@ pub trait Stark, const D: usize>: Sync { builder: &mut CircuitBuilder, zeta: ExtensionTarget, g: F, - degree_bits: usize, num_ctl_zs: usize, inner_config: &StarkConfig, ) -> FriInstanceInfoTarget { @@ -195,14 +193,13 @@ pub trait Stark, const D: usize>: Sync { point: zeta_next, polynomials: [trace_info, permutation_ctl_zs_info].concat(), }; - let ctl_last_batch = FriBatchInfoTarget { - point: builder - .constant_extension(F::Extension::primitive_root_of_unity(degree_bits).inverse()), + let ctl_first_batch = FriBatchInfoTarget { + point: builder.one_extension(), polynomials: ctl_zs_info, }; FriInstanceInfoTarget { oracles: vec![trace_oracle, permutation_ctl_oracle, quotient_oracle], - batches: vec![zeta_batch, zeta_next_batch, ctl_last_batch], + batches: vec![zeta_batch, zeta_next_batch, ctl_first_batch], } } diff --git a/evm/src/verifier.rs b/evm/src/verifier.rs index 297d9276a6..11f8155d29 100644 --- a/evm/src/verifier.rs +++ b/evm/src/verifier.rs @@ -137,7 +137,9 @@ where verify_cross_table_lookups::( cross_table_lookups, - all_proof.stark_proofs.map(|p| p.proof.openings.ctl_zs_last), + all_proof + .stark_proofs + .map(|p| p.proof.openings.ctl_zs_first), extra_looking_products, config, ) @@ -310,7 +312,7 @@ where next_values, permutation_ctl_zs, permutation_ctl_zs_next, - ctl_zs_last, + ctl_zs_first, quotient_polys, } = &proof.openings; let vars = StarkEvaluationVars { @@ -376,8 +378,7 @@ where &stark.fri_instance( challenges.stark_zeta, F::primitive_root_of_unity(degree_bits), - degree_bits, - ctl_zs_last.len(), + ctl_zs_first.len(), config, ), &proof.openings.to_fri_openings(), @@ -417,7 +418,7 @@ where next_values, permutation_ctl_zs, permutation_ctl_zs_next, - ctl_zs_last, + ctl_zs_first, quotient_polys, } = openings; @@ -434,7 +435,7 @@ where ensure!(next_values.len() == S::COLUMNS); ensure!(permutation_ctl_zs.len() == num_zs); ensure!(permutation_ctl_zs_next.len() == num_zs); - ensure!(ctl_zs_last.len() == num_ctl_zs); + ensure!(ctl_zs_first.len() == num_ctl_zs); ensure!(quotient_polys.len() == stark.num_quotient_polys(config)); Ok(()) diff --git a/evm/src/witness/gas.rs b/evm/src/witness/gas.rs index 3a46c04439..aa312078a5 100644 --- a/evm/src/witness/gas.rs +++ b/evm/src/witness/gas.rs @@ -25,8 +25,8 @@ pub(crate) fn gas_to_charge(op: Operation) -> u64 { BinaryArithmetic(Lt) => G_VERYLOW, BinaryArithmetic(Gt) => G_VERYLOW, BinaryArithmetic(Byte) => G_VERYLOW, - Shl => G_VERYLOW, - Shr => G_VERYLOW, + BinaryArithmetic(Shl) => G_VERYLOW, + BinaryArithmetic(Shr) => G_VERYLOW, BinaryArithmetic(AddFp254) => KERNEL_ONLY_INSTR, BinaryArithmetic(MulFp254) => KERNEL_ONLY_INSTR, BinaryArithmetic(SubFp254) => KERNEL_ONLY_INSTR, diff --git a/evm/src/witness/operation.rs b/evm/src/witness/operation.rs index b1339d0cee..8349d56dfd 100644 --- a/evm/src/witness/operation.rs +++ b/evm/src/witness/operation.rs @@ -29,8 +29,6 @@ use crate::{arithmetic, logic}; pub(crate) enum Operation { Iszero, Not, - Shl, - Shr, Syscall(u8, usize, bool), // (syscall number, minimum stack length, increases stack length) Eq, BinaryLogic(logic::Op), @@ -473,6 +471,7 @@ pub(crate) fn generate_iszero( fn append_shift( state: &mut GenerationState, mut row: CpuColumnsView, + is_shl: bool, input0: U256, input1: U256, log_in0: MemoryOp, @@ -500,10 +499,10 @@ fn append_shift( } else { U256::one() << input0 }; - let operator = if row.op.shl.is_one() { - BinaryOperator::Mul + let operator = if is_shl { + BinaryOperator::Shl } else { - BinaryOperator::Div + BinaryOperator::Shr }; let operation = arithmetic::Operation::binary(operator, input1, input0); @@ -527,7 +526,7 @@ pub(crate) fn generate_shl( } else { input1 << input0 }; - append_shift(state, row, input0, input1, log_in0, log_in1, result) + append_shift(state, row, true, input0, input1, log_in0, log_in1, result) } pub(crate) fn generate_shr( @@ -542,7 +541,7 @@ pub(crate) fn generate_shr( } else { input1 >> input0 }; - append_shift(state, row, input0, input1, log_in0, log_in1, result) + append_shift(state, row, false, input0, input1, log_in0, log_in1, result) } pub(crate) fn generate_syscall( diff --git a/evm/src/witness/transition.rs b/evm/src/witness/transition.rs index 6e279cdf7a..1418beba8d 100644 --- a/evm/src/witness/transition.rs +++ b/evm/src/witness/transition.rs @@ -70,8 +70,8 @@ fn decode(registers: RegistersState, opcode: u8) -> Result Ok(Operation::BinaryArithmetic( arithmetic::BinaryOperator::Byte, )), - (0x1b, _) => Ok(Operation::Shl), - (0x1c, _) => Ok(Operation::Shr), + (0x1b, _) => Ok(Operation::BinaryArithmetic(arithmetic::BinaryOperator::Shl)), + (0x1c, _) => Ok(Operation::BinaryArithmetic(arithmetic::BinaryOperator::Shr)), (0x1d, _) => Ok(Operation::Syscall(opcode, 2, false)), // SAR (0x20, _) => Ok(Operation::Syscall(opcode, 2, false)), // KECCAK256 (0x21, true) => Ok(Operation::KeccakGeneral), @@ -162,22 +162,13 @@ fn fill_op_flag(op: Operation, row: &mut CpuColumnsView) { Operation::Not => &mut flags.not, Operation::Syscall(_, _, _) => &mut flags.syscall, Operation::BinaryLogic(_) => &mut flags.logic_op, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Add) => &mut flags.add, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Mul) => &mut flags.mul, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Sub) => &mut flags.sub, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Div) => &mut flags.div, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Mod) => &mut flags.mod_, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Lt) => &mut flags.lt, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Gt) => &mut flags.gt, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::Byte) => &mut flags.byte, - Operation::Shl => &mut flags.shl, - Operation::Shr => &mut flags.shr, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::AddFp254) => &mut flags.addfp254, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::MulFp254) => &mut flags.mulfp254, - Operation::BinaryArithmetic(arithmetic::BinaryOperator::SubFp254) => &mut flags.subfp254, - Operation::TernaryArithmetic(arithmetic::TernaryOperator::AddMod) => &mut flags.addmod, - Operation::TernaryArithmetic(arithmetic::TernaryOperator::MulMod) => &mut flags.mulmod, - Operation::TernaryArithmetic(arithmetic::TernaryOperator::SubMod) => &mut flags.submod, + Operation::BinaryArithmetic(arithmetic::BinaryOperator::AddFp254) + | Operation::BinaryArithmetic(arithmetic::BinaryOperator::MulFp254) + | Operation::BinaryArithmetic(arithmetic::BinaryOperator::SubFp254) => &mut flags.fp254_op, + Operation::BinaryArithmetic(arithmetic::BinaryOperator::Shl) + | Operation::BinaryArithmetic(arithmetic::BinaryOperator::Shr) => &mut flags.shift, + Operation::BinaryArithmetic(_) => &mut flags.binary_op, + Operation::TernaryArithmetic(_) => &mut flags.ternary_op, Operation::KeccakGeneral => &mut flags.keccak_general, Operation::ProverInput => &mut flags.prover_input, Operation::Pop => &mut flags.pop, @@ -188,8 +179,7 @@ fn fill_op_flag(op: Operation, row: &mut CpuColumnsView) { Operation::Mload32Bytes => &mut flags.mload_32bytes, Operation::Mstore32Bytes => &mut flags.mstore_32bytes, Operation::ExitKernel => &mut flags.exit_kernel, - Operation::MloadGeneral => &mut flags.mload_general, - Operation::MstoreGeneral => &mut flags.mstore_general, + Operation::MloadGeneral | Operation::MstoreGeneral => &mut flags.m_op_general, } = F::ONE; } @@ -204,8 +194,8 @@ fn perform_op( Operation::Swap(n) => generate_swap(n, state, row)?, Operation::Iszero => generate_iszero(state, row)?, Operation::Not => generate_not(state, row)?, - Operation::Shl => generate_shl(state, row)?, - Operation::Shr => generate_shr(state, row)?, + Operation::BinaryArithmetic(arithmetic::BinaryOperator::Shl) => generate_shl(state, row)?, + Operation::BinaryArithmetic(arithmetic::BinaryOperator::Shr) => generate_shr(state, row)?, Operation::Syscall(opcode, stack_values_read, stack_len_increased) => { generate_syscall(opcode, stack_values_read, stack_len_increased, state, row)? } @@ -296,7 +286,7 @@ fn log_kernel_instruction(state: &GenerationState, op: Operation) { let pc = state.registers.program_counter; let is_interesting_offset = KERNEL .offset_label(pc) - .filter(|label| !label.starts_with("halt_pc")) + .filter(|label| !label.starts_with("halt")) .is_some(); let level = if is_interesting_offset { log::Level::Debug diff --git a/evm/tests/basic_smart_contract.rs b/evm/tests/basic_smart_contract.rs index 4d0a2090b6..2cd549ff9e 100644 --- a/evm/tests/basic_smart_contract.rs +++ b/evm/tests/basic_smart_contract.rs @@ -53,7 +53,10 @@ fn test_basic_smart_contract() -> anyhow::Result<()> { let code_gas = 3 + 3 + 3; let code_hash = keccak(code); - let beneficiary_account_before = AccountRlp::default(); + let beneficiary_account_before = AccountRlp { + nonce: 1.into(), + ..AccountRlp::default() + }; let sender_account_before = AccountRlp { nonce: 5.into(), balance: eth_to_wei(100_000.into()), @@ -66,6 +69,11 @@ fn test_basic_smart_contract() -> anyhow::Result<()> { let state_trie_before = { let mut children = core::array::from_fn(|_| Node::Empty.into()); + children[beneficiary_nibbles.get_nibble(0) as usize] = Node::Leaf { + nibbles: beneficiary_nibbles.truncate_n_nibbles_front(1), + value: rlp::encode(&beneficiary_account_before).to_vec(), + } + .into(); children[sender_nibbles.get_nibble(0) as usize] = Node::Leaf { nibbles: sender_nibbles.truncate_n_nibbles_front(1), value: rlp::encode(&sender_account_before).to_vec(), @@ -90,25 +98,33 @@ fn test_basic_smart_contract() -> anyhow::Result<()> { storage_tries: vec![], }; + let txdata_gas = 2 * 16; + let gas_used = 21_000 + code_gas + txdata_gas; + // Generated using a little py-evm script. let txn = hex!("f861050a8255f094a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0648242421ba02c89eb757d9deeb1f5b3859a9d4d679951ef610ac47ad4608dc142beb1b7e313a05af7e9fbab825455d36c36c7f4cfcafbeafa9a77bdff936b52afb36d4fe4bcdd"); let value = U256::from(100u32); let block_metadata = BlockMetadata { block_beneficiary: Address::from(beneficiary), - ..BlockMetadata::default() + block_difficulty: 0x20000.into(), + block_number: 1.into(), + block_chain_id: 1.into(), + block_timestamp: 0x03e8.into(), + block_gaslimit: 0xff112233u32.into(), + block_gas_used: gas_used.into(), + block_bloom: [0.into(); 8], + block_base_fee: 0xa.into(), }; let mut contract_code = HashMap::new(); contract_code.insert(keccak(vec![]), vec![]); contract_code.insert(code_hash, code.to_vec()); - let txdata_gas = 2 * 16; - let gas_used = 21_000 + code_gas + txdata_gas; let expected_state_trie_after: HashedPartialTrie = { let beneficiary_account_after = AccountRlp { - balance: beneficiary_account_before.balance + gas_used * 10, - ..beneficiary_account_before + nonce: 1.into(), + ..AccountRlp::default() }; let sender_account_after = AccountRlp { balance: sender_account_before.balance - value - gas_used * 10, diff --git a/evm/tests/self_balance_gas_cost.rs b/evm/tests/self_balance_gas_cost.rs index d346164725..9ba1ac5497 100644 --- a/evm/tests/self_balance_gas_cost.rs +++ b/evm/tests/self_balance_gas_cost.rs @@ -5,7 +5,7 @@ use std::time::Duration; use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; use eth_trie_utils::nibbles::Nibbles; use eth_trie_utils::partial_trie::{HashedPartialTrie, PartialTrie}; -use ethereum_types::{Address, H256}; +use ethereum_types::{Address, H256, U256}; use hex_literal::hex; use keccak_hash::keccak; use plonky2::field::goldilocks_field::GoldilocksField; @@ -62,7 +62,10 @@ fn self_balance_gas_cost() -> anyhow::Result<()> { + 22100; // SSTORE let code_hash = keccak(code); - let beneficiary_account_before = AccountRlp::default(); + let beneficiary_account_before = AccountRlp { + nonce: 1.into(), + ..AccountRlp::default() + }; let sender_account_before = AccountRlp { balance: 0x3635c9adc5dea00000u128.into(), ..AccountRlp::default() @@ -89,10 +92,18 @@ fn self_balance_gas_cost() -> anyhow::Result<()> { let txn = hex!("f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509b"); + let gas_used = 21_000 + code_gas; + let block_metadata = BlockMetadata { block_beneficiary: Address::from(beneficiary), + block_difficulty: 0x20000.into(), + block_number: 1.into(), + block_chain_id: 1.into(), + block_timestamp: 0x03e8.into(), + block_gaslimit: 0xff112233u32.into(), + block_gas_used: gas_used.into(), + block_bloom: [0.into(); 8], block_base_fee: 0xa.into(), - ..BlockMetadata::default() }; let mut contract_code = HashMap::new(); @@ -100,9 +111,12 @@ fn self_balance_gas_cost() -> anyhow::Result<()> { contract_code.insert(code_hash, code.to_vec()); let expected_state_trie_after = { - let beneficiary_account_after = AccountRlp::default(); + let beneficiary_account_after = AccountRlp { + nonce: 1.into(), + ..AccountRlp::default() + }; let sender_account_after = AccountRlp { - balance: 999999999999999568680u128.into(), + balance: sender_account_before.balance - U256::from(gas_used) * U256::from(10), nonce: 1.into(), ..AccountRlp::default() }; @@ -132,7 +146,6 @@ fn self_balance_gas_cost() -> anyhow::Result<()> { expected_state_trie_after }; - let gas_used = 21_000 + code_gas; let receipt_0 = LegacyReceiptRlp { status: true, cum_gas_used: gas_used.into(),