From 8fe3394ec7437c40cfd0ae491bf1f7348371cf45 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Sat, 9 Nov 2024 16:29:36 -0500 Subject: [PATCH 01/41] test: add test for tenure-extend upon failed miner --- testnet/stacks-node/src/tests/signer/v0.rs | 422 ++++++++++++++++++++- 1 file changed, 406 insertions(+), 16 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 5ac25f97bb..79f55d7e7b 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -420,6 +420,22 @@ impl SignerTest { } } +fn verify_last_block_contains_tenure_change_tx(cause: TenureChangeCause) { + let blocks = test_observer::get_blocks(); + let tenure_change_tx = &blocks.last().unwrap(); + let transactions = tenure_change_tx["transactions"].as_array().unwrap(); + let tx = transactions.first().expect("No transactions in block"); + let raw_tx = tx["raw_tx"].as_str().unwrap(); + let tx_bytes = hex_bytes(&raw_tx[2..]).unwrap(); + let parsed = StacksTransaction::consensus_deserialize(&mut &tx_bytes[..]).unwrap(); + match &parsed.payload { + TransactionPayload::TenureChange(payload) => { + assert_eq!(payload.cause, cause); + } + _ => panic!("Expected tenure change transaction, got {parsed:?}"), + }; +} + #[test] #[ignore] /// Test that a signer can respond to an invalid block proposal @@ -5841,22 +5857,6 @@ fn continue_after_fast_block_no_sortition() { let blocks_mined1 = signer_test.running_nodes.nakamoto_blocks_mined.clone(); // Some helper functions for verifying the blocks contain their expected transactions - let verify_last_block_contains_tenure_change_tx = |cause: TenureChangeCause| { - let blocks = test_observer::get_blocks(); - let tenure_change_tx = &blocks.last().unwrap(); - let transactions = tenure_change_tx["transactions"].as_array().unwrap(); - let tx = transactions.first().expect("No transactions in block"); - let raw_tx = tx["raw_tx"].as_str().unwrap(); - let tx_bytes = hex_bytes(&raw_tx[2..]).unwrap(); - let parsed = StacksTransaction::consensus_deserialize(&mut &tx_bytes[..]).unwrap(); - match &parsed.payload { - TransactionPayload::TenureChange(payload) => { - assert_eq!(payload.cause, cause); - } - _ => panic!("Expected tenure change transaction, got {parsed:?}"), - }; - }; - let verify_last_block_contains_transfer_tx = || { let blocks = test_observer::get_blocks(); let tenure_change_tx = &blocks.last().unwrap(); @@ -6868,3 +6868,393 @@ fn block_commit_delay() { signer_test.shutdown(); } + +#[test] +#[ignore] +/// Test that a miner will extend its tenure after the proceeding miner fails to mine a block. +/// - Miner 1 wins a tenure and mines normally +/// - Miner 2 wins a tenure but fails to mine a block +/// - Miner 1 extends its tenure +fn tenure_extend_after_failed_miner() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + + let num_signers = 5; + let recipient = PrincipalData::from(StacksAddress::burn_address(false)); + let sender_sk = Secp256k1PrivateKey::new(); + let sender_addr = tests::to_addr(&sender_sk); + let send_amt = 100; + let send_fee = 180; + let num_txs = 1; + let sender_nonce = 0; + + let btc_miner_1_seed = vec![1, 1, 1, 1]; + let btc_miner_2_seed = vec![2, 2, 2, 2]; + let btc_miner_1_pk = Keychain::default(btc_miner_1_seed.clone()).get_pub_key(); + let btc_miner_2_pk = Keychain::default(btc_miner_2_seed.clone()).get_pub_key(); + + let node_1_rpc = gen_random_port(); + let node_1_p2p = gen_random_port(); + let node_2_rpc = gen_random_port(); + let node_2_p2p = gen_random_port(); + + let localhost = "127.0.0.1"; + let node_1_rpc_bind = format!("{localhost}:{node_1_rpc}"); + let node_2_rpc_bind = format!("{localhost}:{node_2_rpc}"); + let mut node_2_listeners = Vec::new(); + + let max_nakamoto_tenures = 30; + + info!("------------------------- Test Setup -------------------------"); + // partition the signer set so that ~half are listening and using node 1 for RPC and events, + // and the rest are using node 2 + + let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( + num_signers, + vec![(sender_addr, (send_amt + send_fee) * num_txs)], + |signer_config| { + let node_host = if signer_config.endpoint.port() % 2 == 0 { + &node_1_rpc_bind + } else { + &node_2_rpc_bind + }; + signer_config.node_host = node_host.to_string(); + signer_config.block_proposal_timeout = Duration::from_secs(30); + }, + |config| { + config.node.rpc_bind = format!("{localhost}:{node_1_rpc}"); + config.node.p2p_bind = format!("{localhost}:{node_1_p2p}"); + config.node.data_url = format!("http://{localhost}:{node_1_rpc}"); + config.node.p2p_address = format!("{localhost}:{node_1_p2p}"); + config.miner.wait_on_interim_blocks = Duration::from_secs(5); + config.node.pox_sync_sample_secs = 30; + config.burnchain.pox_reward_length = Some(max_nakamoto_tenures); + + config.node.seed = btc_miner_1_seed.clone(); + config.node.local_peer_seed = btc_miner_1_seed.clone(); + config.burnchain.local_mining_public_key = Some(btc_miner_1_pk.to_hex()); + config.miner.mining_key = Some(Secp256k1PrivateKey::from_seed(&[1])); + + config.events_observers.retain(|listener| { + let Ok(addr) = std::net::SocketAddr::from_str(&listener.endpoint) else { + warn!( + "Cannot parse {} to a socket, assuming it isn't a signer-listener binding", + listener.endpoint + ); + return true; + }; + if addr.port() % 2 == 0 || addr.port() == test_observer::EVENT_OBSERVER_PORT { + return true; + } + node_2_listeners.push(listener.clone()); + false + }) + }, + Some(vec![btc_miner_1_pk, btc_miner_2_pk]), + None, + ); + let conf = signer_test.running_nodes.conf.clone(); + let mut conf_node_2 = conf.clone(); + conf_node_2.node.rpc_bind = format!("{localhost}:{node_2_rpc}"); + conf_node_2.node.p2p_bind = format!("{localhost}:{node_2_p2p}"); + conf_node_2.node.data_url = format!("http://{localhost}:{node_2_rpc}"); + conf_node_2.node.p2p_address = format!("{localhost}:{node_2_p2p}"); + conf_node_2.node.seed = btc_miner_2_seed.clone(); + conf_node_2.burnchain.local_mining_public_key = Some(btc_miner_2_pk.to_hex()); + conf_node_2.node.local_peer_seed = btc_miner_2_seed.clone(); + conf_node_2.miner.mining_key = Some(Secp256k1PrivateKey::from_seed(&[2])); + conf_node_2.node.miner = true; + conf_node_2.events_observers.clear(); + conf_node_2.events_observers.extend(node_2_listeners); + assert!(!conf_node_2.events_observers.is_empty()); + + let node_1_sk = Secp256k1PrivateKey::from_seed(&conf.node.local_peer_seed); + let node_1_pk = StacksPublicKey::from_private(&node_1_sk); + + conf_node_2.node.working_dir = format!("{}-1", conf_node_2.node.working_dir); + + conf_node_2.node.set_bootstrap_nodes( + format!("{}@{}", &node_1_pk.to_hex(), conf.node.p2p_bind), + conf.burnchain.chain_id, + conf.burnchain.peer_version, + ); + let http_origin = format!("http://{}", &signer_test.running_nodes.conf.node.rpc_bind); + + let mut run_loop_2 = boot_nakamoto::BootRunLoop::new(conf_node_2.clone()).unwrap(); + let run_loop_stopper_2 = run_loop_2.get_termination_switch(); + let rl2_coord_channels = run_loop_2.coordinator_channels(); + let Counters { + naka_submitted_commits: rl2_commits, + naka_skip_commit_op: rl2_skip_commit_op, + .. + } = run_loop_2.counters(); + + let blocks_mined1 = signer_test.running_nodes.nakamoto_blocks_mined.clone(); + + info!("------------------------- Pause Miner 2's Block Commits -------------------------"); + + // Make sure Miner 2 cannot win a sortition at first. + rl2_skip_commit_op.set(true); + + info!("------------------------- Boot to Epoch 3.0 -------------------------"); + + let run_loop_2_thread = thread::Builder::new() + .name("run_loop_2".into()) + .spawn(move || run_loop_2.start(None, 0)) + .unwrap(); + + signer_test.boot_to_epoch_3(); + + wait_for(120, || { + let Some(node_1_info) = get_chain_info_opt(&conf) else { + return Ok(false); + }; + let Some(node_2_info) = get_chain_info_opt(&conf_node_2) else { + return Ok(false); + }; + Ok(node_1_info.stacks_tip_height == node_2_info.stacks_tip_height) + }) + .expect("Timed out waiting for boostrapped node to catch up to the miner"); + + let mining_pkh_1 = Hash160::from_node_public_key(&StacksPublicKey::from_private( + &conf.miner.mining_key.unwrap(), + )); + let mining_pkh_2 = Hash160::from_node_public_key(&StacksPublicKey::from_private( + &conf_node_2.miner.mining_key.unwrap(), + )); + debug!("The mining key for miner 1 is {mining_pkh_1}"); + debug!("The mining key for miner 2 is {mining_pkh_2}"); + + info!("------------------------- Reached Epoch 3.0 -------------------------"); + + let burnchain = signer_test.running_nodes.conf.get_burnchain(); + let sortdb = burnchain.open_sortition_db(true).unwrap(); + + let get_burn_height = || { + SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) + .unwrap() + .block_height + }; + + info!("------------------------- Pause Miner 1's Block Commit -------------------------"); + // Make sure miner 1 doesn't submit any further block commits for the next tenure BEFORE mining the bitcoin block + signer_test + .running_nodes + .nakamoto_test_skip_commit_op + .set(true); + + info!("------------------------- Miner 1 Wins Normal Tenure A -------------------------"); + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + signer_test + .running_nodes + .btc_regtest_controller + .build_next_block(1); + + // assure we have a successful sortition that miner A won + let tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + assert!(tip.sortition); + assert_eq!(tip.miner_pk_hash.unwrap(), mining_pkh_1); + + // wait for the new block to be processed + wait_for(60, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .unwrap(); + + verify_last_block_contains_tenure_change_tx(TenureChangeCause::BlockFound); + + info!("------------------------- Miner 1 Mines Another Block -------------------------"); + + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + // submit a tx so that the miner will mine an extra block + let transfer_tx = make_stacks_transfer( + &sender_sk, + sender_nonce, + send_fee, + signer_test.running_nodes.conf.burnchain.chain_id, + &recipient, + send_amt, + ); + submit_tx(&http_origin, &transfer_tx); + + // wait for the new block to be processed + wait_for(30, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for block to be mined and processed"); + + info!("------------------------- Pause Block Proposals -------------------------"); + TEST_MINE_STALL.lock().unwrap().replace(true); + + // Unpause miner 2's block commits + let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); + rl2_skip_commit_op.set(false); + + // Ensure miner 2 submits a block commit before mining the bitcoin block + wait_for(30, || { + Ok(rl2_commits.load(Ordering::SeqCst) > rl2_commits_before) + }) + .unwrap(); + + info!("------------------------- Miner 2 Wins Tenure B, Mines No Blocks -------------------------"); + + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + let burn_height_before = get_burn_height(); + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 60, + || Ok(get_burn_height() > burn_height_before), + ) + .unwrap(); + + // assure we have a successful sortition that miner B won + let tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + assert!(tip.sortition); + assert_eq!(tip.miner_pk_hash.unwrap(), mining_pkh_2); + + info!("------------------------- Wait for Block Proposal Timeout -------------------------"); + sleep_ms( + signer_test.signer_configs[0] + .block_proposal_timeout + .as_millis() as u64 + * 2, + ); + + info!("------------------------- Miner 1 Extends Tenure A -------------------------"); + + // Re-enable block mining + TEST_MINE_STALL.lock().unwrap().replace(false); + + // wait for a tenure extend block from miner 1 to be processed + wait_for(60, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for tenure extend block to be mined and processed"); + + verify_last_block_contains_tenure_change_tx(TenureChangeCause::Extended); + + info!("------------------------- Miner 1 Mines Another Block -------------------------"); + + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + // submit a tx so that the miner will mine an extra block + let transfer_tx = make_stacks_transfer( + &sender_sk, + sender_nonce, + send_fee, + signer_test.running_nodes.conf.burnchain.chain_id, + &recipient, + send_amt, + ); + submit_tx(&http_origin, &transfer_tx); + + // wait for the new block to be processed + wait_for(30, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for block to be mined and processed"); + + // Re-enable block commits for miner 2 + let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); + rl2_skip_commit_op.set(true); + + // Wait for block commit from miner 2 + wait_for(30, || { + Ok(rl2_commits.load(Ordering::SeqCst) > rl2_commits_before) + }) + .expect("Timed out waiting for block commit from miner 2"); + + info!("------------------------- Miner 2 Mines the Next Tenure -------------------------"); + + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 60, + || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok(stacks_height > stacks_height_before) + }, + ) + .expect("Timed out waiting for final block to be mined and processed"); + + info!("------------------------- Shutdown -------------------------"); + rl2_coord_channels + .lock() + .expect("Mutex poisoned") + .stop_chains_coordinator(); + run_loop_stopper_2.store(false, Ordering::SeqCst); + run_loop_2_thread.join().unwrap(); + signer_test.shutdown(); +} From 311ad504dd3449638a4ad5680e9a043e5fc9bc2f Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 12 Nov 2024 15:49:06 -0500 Subject: [PATCH 02/41] feat: implement tenure-extend after bad sortition winner See #5361 --- .../stacks-node/src/nakamoto_node/relayer.rs | 125 ++++- testnet/stacks-node/src/tests/signer/v0.rs | 439 +++++++++++++++++- 2 files changed, 540 insertions(+), 24 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 7c8dc6f2c5..805703264e 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -382,20 +382,50 @@ impl RelayerThread { /// parent block could be an epoch 2 block. In this case, the right thing to do is to wait for /// the next block-commit. pub(crate) fn choose_miner_directive( - config: &Config, - sortdb: &SortitionDB, + &self, sn: BlockSnapshot, won_sortition: bool, committed_index_hash: StacksBlockId, ) -> Option { + let (cur_stacks_tip_ch, cur_stacks_tip_bh) = + SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()) + .expect("FATAL: failed to query sortition DB for stacks tip"); + + let stacks_tip = StacksBlockId::new(&cur_stacks_tip_ch, &cur_stacks_tip_bh); + let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( + &mut self.chainstate.index_conn(), + &stacks_tip, + &cur_stacks_tip_ch, + ) + .expect( + "Relayer: Failed to get tenure-start block header for stacks tip {stacks_tip}: {e:?}", + ) + .expect("Relayer: Failed to find tenure-start block header for stacks tip {stacks_tip}"); + let directive = if sn.sortition { Some( - if won_sortition || config.get_node_config(false).mock_mining { + if won_sortition || self.config.get_node_config(false).mock_mining { + info!("Relayer: Won sortition; begin tenure."); MinerDirective::BeginTenure { parent_tenure_start: committed_index_hash, burnchain_tip: sn, } + } else if committed_index_hash + != highest_tenure_start_block_header.index_block_hash() + { + info!( + "Relayer: Winner of sortition {} did not commit to the correct parent tenure. Attempt to continue tenure.", + &sn.consensus_hash + ); + // We didn't win the sortition, but the miner that did win + // did not commit to the correct parent tenure. This means + // it will be unable to produce a valid block, so we should + // continue our tenure. + MinerDirective::ContinueTenure { + new_burn_view: sn.consensus_hash, + } } else { + info!("Relayer: Stop tenure"); MinerDirective::StopTenure }, ) @@ -404,16 +434,16 @@ impl RelayerThread { // If it's in epoch 2.x, then we must always begin a new tenure, but we can't do so // right now since this sortition has no winner. let (cur_stacks_tip_ch, _cur_stacks_tip_bh) = - SortitionDB::get_canonical_stacks_chain_tip_hash(sortdb.conn()) + SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()) .expect("FATAL: failed to query sortition DB for stacks tip"); let stacks_tip_sn = - SortitionDB::get_block_snapshot_consensus(sortdb.conn(), &cur_stacks_tip_ch) + SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &cur_stacks_tip_ch) .expect("FATAL: failed to query sortiiton DB for epoch") .expect("FATAL: no sortition for canonical stacks tip"); let cur_epoch = - SortitionDB::get_stacks_epoch(sortdb.conn(), stacks_tip_sn.block_height) + SortitionDB::get_stacks_epoch(self.sortdb.conn(), stacks_tip_sn.block_height) .expect("FATAL: failed to query sortition DB for epoch") .expect("FATAL: no epoch defined for existing sortition"); @@ -424,6 +454,7 @@ impl RelayerThread { ); None } else { + info!("Relayer: No sortition; continue tenure."); Some(MinerDirective::ContinueTenure { new_burn_view: sn.consensus_hash, }) @@ -480,13 +511,7 @@ impl RelayerThread { return Ok(None); } - let directive_opt = Self::choose_miner_directive( - &self.config, - &self.sortdb, - sn, - won_sortition, - committed_index_hash, - ); + let directive_opt = self.choose_miner_directive(sn, won_sortition, committed_index_hash); Ok(directive_opt) } @@ -880,29 +905,79 @@ impl RelayerThread { }; let mining_pkh = Hash160::from_node_public_key(&StacksPublicKey::from_private(mining_key)); - // If we won the last sortition, then we should start a new tenure off of it. - let last_block_election_snapshot = { + let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( + &mut self.chainstate.index_conn(), + &canonical_stacks_tip, + &canonical_stacks_tip_ch, + ) + .map_err(|e| { + error!( + "Relayer: Failed to get tenure-start block header for stacks tip {canonical_stacks_tip}: {e:?}" + ); + NakamotoNodeError::ParentNotFound + })? + .ok_or_else(|| { + error!( + "Relayer: Failed to find tenure-start block header for stacks tip {canonical_stacks_tip}" + ); + NakamotoNodeError::ParentNotFound + })?; + let highest_tenure_bhh = + BlockHeaderHash(highest_tenure_start_block_header.index_block_hash().0); + + // If we won the last good sortition, then we should extend off of it. + let last_good_block_election_snapshot = { let ih = self.sortdb.index_handle(&burn_tip.sortition_id); - ih.get_last_snapshot_with_sortition(burn_tip.block_height) + info!( + "Relayer: Getting last snapshot with sortition for {}", + burn_tip.block_height + ); + + let sn = ih + .get_last_snapshot_with_sortition(burn_tip.block_height) .map_err(|e| { error!("Relayer: failed to get last snapshot with sortition: {e:?}"); NakamotoNodeError::SnapshotNotFoundForChainTip + })?; + if sn.winning_stacks_block_hash != highest_tenure_bhh { + info!( + "Relayer: Sortition winner is not committed to the canonical tip; allowing last miner to extend"; + "burn_block_height" => burn_tip.block_height, + "consensus_hash" => %burn_tip.consensus_hash, + ); + + SortitionDB::get_block_snapshot_consensus( + self.sortdb.conn(), + &canonical_stacks_tip_ch, + ) + .map_err(|e| { + error!("Relayer: failed to get block snapshot for canonical tip: {e:?}"); + NakamotoNodeError::SnapshotNotFoundForChainTip })? + .ok_or_else(|| { + error!("Relayer: failed to get block snapshot for canonical tip"); + NakamotoNodeError::SnapshotNotFoundForChainTip + })? + } else { + sn + } }; - let won_last_sortition = last_block_election_snapshot.miner_pk_hash == Some(mining_pkh); - debug!( - "Relayer: Current burn block had no sortition. Checking for tenure continuation."; + let won_last_sortition = + last_good_block_election_snapshot.miner_pk_hash == Some(mining_pkh); + info!( + "Relayer: Current burn block had no sortition or a bad sortition. Checking for tenure continuation."; "won_last_sortition" => won_last_sortition, "current_mining_pkh" => %mining_pkh, - "last_block_election_snapshot.consensus_hash" => %last_block_election_snapshot.consensus_hash, - "last_block_election_snapshot.miner_pk_hash" => ?last_block_election_snapshot.miner_pk_hash, + "last_good_block_election_snapshot.consensus_hash" => %last_good_block_election_snapshot.consensus_hash, + "last_good_block_election_snapshot.miner_pk_hash" => ?last_good_block_election_snapshot.miner_pk_hash, "canonical_stacks_tip_id" => %canonical_stacks_tip, "canonical_stacks_tip_ch" => %canonical_stacks_tip_ch, "burn_view_ch" => %new_burn_view, ); if !won_last_sortition { + info!("Relayer: Did not win the last sortition. Cannot continue tenure."); return Ok(()); } @@ -924,8 +999,12 @@ impl RelayerThread { if !won_canonical_block_snapshot { debug!("Relayer: Failed to issue a tenure change payload in our last tenure. Issue a new tenure change payload."); ( - StacksBlockId(last_block_election_snapshot.winning_stacks_block_hash.0), - last_block_election_snapshot, + StacksBlockId( + last_good_block_election_snapshot + .winning_stacks_block_hash + .0, + ), + last_good_block_election_snapshot, MinerReason::EmptyTenure, ) } else { diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 79f55d7e7b..318c642a46 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -6871,7 +6871,7 @@ fn block_commit_delay() { #[test] #[ignore] -/// Test that a miner will extend its tenure after the proceeding miner fails to mine a block. +/// Test that a miner will extend its tenure after the succeding miner fails to mine a block. /// - Miner 1 wins a tenure and mines normally /// - Miner 2 wins a tenure but fails to mine a block /// - Miner 1 extends its tenure @@ -7258,3 +7258,440 @@ fn tenure_extend_after_failed_miner() { run_loop_2_thread.join().unwrap(); signer_test.shutdown(); } + +#[test] +#[ignore] +/// Test that a miner will extend its tenure after the succeding miner commits to the wrong block. +/// - Miner 1 wins a tenure and mines normally +/// - Miner 1 wins another tenure and mines normally, but miner 2 does not see any blocks from this tenure +/// - Miner 2 wins a tenure and is unable to mine a block +/// - Miner 1 extends its tenure and mines an additional block +/// - Miner 2 wins the next tenure and mines normally +fn tenure_extend_after_bad_commit() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + + let num_signers = 5; + let recipient = PrincipalData::from(StacksAddress::burn_address(false)); + let sender_sk = Secp256k1PrivateKey::new(); + let sender_addr = tests::to_addr(&sender_sk); + let send_amt = 100; + let send_fee = 180; + let num_txs = 1; + let sender_nonce = 0; + + let btc_miner_1_seed = vec![1, 1, 1, 1]; + let btc_miner_2_seed = vec![2, 2, 2, 2]; + let btc_miner_1_pk = Keychain::default(btc_miner_1_seed.clone()).get_pub_key(); + let btc_miner_2_pk = Keychain::default(btc_miner_2_seed.clone()).get_pub_key(); + + let node_1_rpc = gen_random_port(); + let node_1_p2p = gen_random_port(); + let node_2_rpc = gen_random_port(); + let node_2_p2p = gen_random_port(); + + let localhost = "127.0.0.1"; + let node_1_rpc_bind = format!("{localhost}:{node_1_rpc}"); + let node_2_rpc_bind = format!("{localhost}:{node_2_rpc}"); + let mut node_2_listeners = Vec::new(); + + let max_nakamoto_tenures = 30; + + info!("------------------------- Test Setup -------------------------"); + // partition the signer set so that ~half are listening and using node 1 for RPC and events, + // and the rest are using node 2 + + let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( + num_signers, + vec![(sender_addr, (send_amt + send_fee) * num_txs)], + |signer_config| { + let node_host = if signer_config.endpoint.port() % 2 == 0 { + &node_1_rpc_bind + } else { + &node_2_rpc_bind + }; + signer_config.node_host = node_host.to_string(); + signer_config.block_proposal_timeout = Duration::from_secs(30); + }, + |config| { + config.node.rpc_bind = format!("{localhost}:{node_1_rpc}"); + config.node.p2p_bind = format!("{localhost}:{node_1_p2p}"); + config.node.data_url = format!("http://{localhost}:{node_1_rpc}"); + config.node.p2p_address = format!("{localhost}:{node_1_p2p}"); + config.miner.wait_on_interim_blocks = Duration::from_secs(5); + config.node.pox_sync_sample_secs = 30; + config.burnchain.pox_reward_length = Some(max_nakamoto_tenures); + + config.node.seed = btc_miner_1_seed.clone(); + config.node.local_peer_seed = btc_miner_1_seed.clone(); + config.burnchain.local_mining_public_key = Some(btc_miner_1_pk.to_hex()); + config.miner.mining_key = Some(Secp256k1PrivateKey::from_seed(&[1])); + + config.events_observers.retain(|listener| { + let Ok(addr) = std::net::SocketAddr::from_str(&listener.endpoint) else { + warn!( + "Cannot parse {} to a socket, assuming it isn't a signer-listener binding", + listener.endpoint + ); + return true; + }; + if addr.port() % 2 == 0 || addr.port() == test_observer::EVENT_OBSERVER_PORT { + return true; + } + node_2_listeners.push(listener.clone()); + false + }) + }, + Some(vec![btc_miner_1_pk, btc_miner_2_pk]), + None, + ); + + let rl1_commits = signer_test.running_nodes.commits_submitted.clone(); + let rl1_skip_commit_op = signer_test + .running_nodes + .nakamoto_test_skip_commit_op + .clone(); + + let conf = signer_test.running_nodes.conf.clone(); + let mut conf_node_2 = conf.clone(); + conf_node_2.node.rpc_bind = format!("{localhost}:{node_2_rpc}"); + conf_node_2.node.p2p_bind = format!("{localhost}:{node_2_p2p}"); + conf_node_2.node.data_url = format!("http://{localhost}:{node_2_rpc}"); + conf_node_2.node.p2p_address = format!("{localhost}:{node_2_p2p}"); + conf_node_2.node.seed = btc_miner_2_seed.clone(); + conf_node_2.burnchain.local_mining_public_key = Some(btc_miner_2_pk.to_hex()); + conf_node_2.node.local_peer_seed = btc_miner_2_seed.clone(); + conf_node_2.miner.mining_key = Some(Secp256k1PrivateKey::from_seed(&[2])); + conf_node_2.node.miner = true; + conf_node_2.events_observers.clear(); + conf_node_2.events_observers.extend(node_2_listeners); + assert!(!conf_node_2.events_observers.is_empty()); + + let node_1_sk = Secp256k1PrivateKey::from_seed(&conf.node.local_peer_seed); + let node_1_pk = StacksPublicKey::from_private(&node_1_sk); + + conf_node_2.node.working_dir = format!("{}-1", conf_node_2.node.working_dir); + + conf_node_2.node.set_bootstrap_nodes( + format!("{}@{}", &node_1_pk.to_hex(), conf.node.p2p_bind), + conf.burnchain.chain_id, + conf.burnchain.peer_version, + ); + let http_origin = format!("http://{}", &signer_test.running_nodes.conf.node.rpc_bind); + + let mut run_loop_2 = boot_nakamoto::BootRunLoop::new(conf_node_2.clone()).unwrap(); + let run_loop_stopper_2 = run_loop_2.get_termination_switch(); + let rl2_coord_channels = run_loop_2.coordinator_channels(); + let Counters { + naka_submitted_commits: rl2_commits, + naka_skip_commit_op: rl2_skip_commit_op, + .. + } = run_loop_2.counters(); + + let blocks_mined1 = signer_test.running_nodes.nakamoto_blocks_mined.clone(); + + info!("------------------------- Pause Miner 2's Block Commits -------------------------"); + + // Make sure Miner 2 cannot win a sortition at first. + rl2_skip_commit_op.set(true); + + info!("------------------------- Boot to Epoch 3.0 -------------------------"); + + let run_loop_2_thread = thread::Builder::new() + .name("run_loop_2".into()) + .spawn(move || run_loop_2.start(None, 0)) + .unwrap(); + + signer_test.boot_to_epoch_3(); + + wait_for(120, || { + let Some(node_1_info) = get_chain_info_opt(&conf) else { + return Ok(false); + }; + let Some(node_2_info) = get_chain_info_opt(&conf_node_2) else { + return Ok(false); + }; + Ok(node_1_info.stacks_tip_height == node_2_info.stacks_tip_height) + }) + .expect("Timed out waiting for boostrapped node to catch up to the miner"); + + let mining_pkh_1 = Hash160::from_node_public_key(&StacksPublicKey::from_private( + &conf.miner.mining_key.unwrap(), + )); + let mining_pkh_2 = Hash160::from_node_public_key(&StacksPublicKey::from_private( + &conf_node_2.miner.mining_key.unwrap(), + )); + debug!("The mining key for miner 1 is {mining_pkh_1}"); + debug!("The mining key for miner 2 is {mining_pkh_2}"); + + info!("------------------------- Reached Epoch 3.0 -------------------------"); + + let burnchain = signer_test.running_nodes.conf.get_burnchain(); + let sortdb = burnchain.open_sortition_db(true).unwrap(); + + let get_burn_height = || { + SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) + .unwrap() + .block_height + }; + + info!("------------------------- Pause Miner 1's Block Commit -------------------------"); + // Make sure miner 1 doesn't submit any further block commits for the next tenure BEFORE mining the bitcoin block + rl1_skip_commit_op.set(true); + + info!("------------------------- Miner 1 Wins Normal Tenure A -------------------------"); + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + signer_test + .running_nodes + .btc_regtest_controller + .build_next_block(1); + + // assure we have a successful sortition that miner A won + let tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + assert!(tip.sortition); + assert_eq!(tip.miner_pk_hash.unwrap(), mining_pkh_1); + + // wait for the new block to be processed + wait_for(60, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .unwrap(); + + verify_last_block_contains_tenure_change_tx(TenureChangeCause::BlockFound); + + info!("------------------------- Miner 1 Mines Another Block -------------------------"); + + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + // submit a tx so that the miner will mine an extra block + let transfer_tx = make_stacks_transfer( + &sender_sk, + sender_nonce, + send_fee, + signer_test.running_nodes.conf.burnchain.chain_id, + &recipient, + send_amt, + ); + submit_tx(&http_origin, &transfer_tx); + + // wait for the new block to be processed + wait_for(30, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for block to be mined and processed"); + + info!("------------------------- Pause Block Proposals -------------------------"); + TEST_MINE_STALL.lock().unwrap().replace(true); + + // Unpause miner 1's block commits + let rl1_commits_before = rl1_commits.load(Ordering::SeqCst); + rl1_skip_commit_op.set(false); + + // Ensure miner 1 submits a block commit before mining the bitcoin block + wait_for(30, || { + Ok(rl1_commits.load(Ordering::SeqCst) > rl1_commits_before) + }) + .unwrap(); + + rl1_skip_commit_op.set(true); + + info!("------------------------- Miner 1 Wins Tenure B -------------------------"); + + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + let burn_height_before = get_burn_height(); + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 60, + || Ok(get_burn_height() > burn_height_before), + ) + .unwrap(); + + // assure we have a successful sortition that miner B won + let tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + assert!(tip.sortition); + assert_eq!(tip.miner_pk_hash.unwrap(), mining_pkh_1); + + info!("----------------- Miner 2 Submits Block Commit Before Any Blocks ------------------"); + + let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); + rl2_skip_commit_op.set(false); + + wait_for(30, || { + Ok(rl2_commits.load(Ordering::SeqCst) > rl2_commits_before) + }) + .expect("Timed out waiting for block commit from miner 2"); + + // Re-pause block commits for miner 2 so that it cannot RBF its original commit + rl2_skip_commit_op.set(true); + + info!("----------------------------- Resume Block Production -----------------------------"); + + TEST_MINE_STALL.lock().unwrap().replace(false); + + wait_for(60, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for block to be mined and processed"); + + info!("--------------- Miner 2 Wins Tenure C With Old Block Commit ----------------"); + + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + let burn_height_before = get_burn_height(); + + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 60, + || Ok(get_burn_height() > burn_height_before), + ) + .expect("Timed out waiting for burn block to be processed"); + + info!("------------------------- Miner 1 Extends Tenure B -------------------------"); + + // wait for a tenure extend block from miner 1 to be processed + // (miner 2's proposals will be rejected) + wait_for(60, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for tenure extend block to be mined and processed"); + + verify_last_block_contains_tenure_change_tx(TenureChangeCause::Extended); + + info!("------------------------- Miner 1 Mines Another Block -------------------------"); + + let blocks_processed_before_1 = blocks_mined1.load(Ordering::SeqCst); + let nmb_old_blocks = test_observer::get_blocks().len(); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + // submit a tx so that the miner will mine an extra block + let transfer_tx = make_stacks_transfer( + &sender_sk, + sender_nonce, + send_fee, + signer_test.running_nodes.conf.burnchain.chain_id, + &recipient, + send_amt, + ); + submit_tx(&http_origin, &transfer_tx); + + // wait for the new block to be processed + wait_for(30, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined1.load(Ordering::SeqCst) > blocks_processed_before_1 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for block to be mined and processed"); + + // Re-enable block commits for miner 2 + let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); + rl2_skip_commit_op.set(true); + + // Wait for block commit from miner 2 + wait_for(30, || { + Ok(rl2_commits.load(Ordering::SeqCst) > rl2_commits_before) + }) + .expect("Timed out waiting for block commit from miner 2"); + + info!("------------------------- Miner 2 Mines the Next Tenure -------------------------"); + + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 60, + || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok(stacks_height > stacks_height_before) + }, + ) + .expect("Timed out waiting for final block to be mined and processed"); + + info!("------------------------- Shutdown -------------------------"); + rl2_coord_channels + .lock() + .expect("Mutex poisoned") + .stop_chains_coordinator(); + run_loop_stopper_2.store(false, Ordering::SeqCst); + run_loop_2_thread.join().unwrap(); + signer_test.shutdown(); +} From 2a4a09b13e8bd66f1f1ae1ae000dd51e45038944 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Wed, 13 Nov 2024 00:26:21 -0500 Subject: [PATCH 03/41] feat: make signer accept tenure extend on bad sortition With this change, the signer will accept a tenure extend from miner N-1 when miner N wins a sortition but commits to the wrong parent tenure. --- stacks-signer/src/chainstate.rs | 15 +++++++++++++++ stacks-signer/src/signerdb.rs | 9 +++++++++ testnet/stacks-node/src/tests/signer/v0.rs | 16 +++++++++------- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/stacks-signer/src/chainstate.rs b/stacks-signer/src/chainstate.rs index fa24c8b22e..aea2d93ef0 100644 --- a/stacks-signer/src/chainstate.rs +++ b/stacks-signer/src/chainstate.rs @@ -203,7 +203,22 @@ impl SortitionsView { "current_sortition_consensus_hash" => ?self.cur_sortition.consensus_hash, ); self.cur_sortition.miner_status = SortitionMinerStatus::InvalidatedBeforeFirstBlock; + } else if let Some(tip) = signer_db.get_canonical_tip()? { + // If this is a tenure change block, then the current sortition's parent tenure must be + // the canonical tip's tenure. If it's not, then the current tip may already be in this + // tenure. + if self.cur_sortition.parent_tenure_id != tip.block.header.consensus_hash + && self.cur_sortition.consensus_hash != tip.block.header.consensus_hash + { + warn!( + "Current sortition does not build off of canonical tip tenure, marking as invalid"; + "current_sortition_parent" => ?self.cur_sortition.parent_tenure_id, + "tip_consensus_hash" => ?tip.block.header.consensus_hash, + ); + self.cur_sortition.miner_status = SortitionMinerStatus::InvalidatedBeforeFirstBlock; + } } + if let Some(last_sortition) = self.last_sortition.as_mut() { if last_sortition.is_timed_out(self.config.block_proposal_timeout, signer_db)? { info!( diff --git a/stacks-signer/src/signerdb.rs b/stacks-signer/src/signerdb.rs index 1d2e650207..b65880f24e 100644 --- a/stacks-signer/src/signerdb.rs +++ b/stacks-signer/src/signerdb.rs @@ -600,6 +600,15 @@ impl SignerDb { try_deserialize(result) } + /// Return the canonical tip -- the last globally accepted block. + pub fn get_canonical_tip(&self) -> Result, DBError> { + let query = "SELECT block_info FROM blocks WHERE json_extract(block_info, '$.state') = ?1 ORDER BY stacks_height DESC LIMIT 1"; + let args = params![&BlockState::GloballyAccepted.to_string()]; + let result: Option = query_row(&self.db, query, args)?; + + try_deserialize(result) + } + /// Insert or replace a burn block into the database pub fn insert_burn_block( &mut self, diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 318c642a46..a2eabfa875 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -6886,8 +6886,8 @@ fn tenure_extend_after_failed_miner() { let sender_addr = tests::to_addr(&sender_sk); let send_amt = 100; let send_fee = 180; - let num_txs = 1; - let sender_nonce = 0; + let num_txs = 2; + let mut sender_nonce = 0; let btc_miner_1_seed = vec![1, 1, 1, 1]; let btc_miner_2_seed = vec![2, 2, 2, 2]; @@ -7100,6 +7100,7 @@ fn tenure_extend_after_failed_miner() { send_amt, ); submit_tx(&http_origin, &transfer_tx); + sender_nonce += 1; // wait for the new block to be processed wait_for(30, || { @@ -7278,8 +7279,8 @@ fn tenure_extend_after_bad_commit() { let sender_addr = tests::to_addr(&sender_sk); let send_amt = 100; let send_fee = 180; - let num_txs = 1; - let sender_nonce = 0; + let num_txs = 2; + let mut sender_nonce = 0; let btc_miner_1_seed = vec![1, 1, 1, 1]; let btc_miner_2_seed = vec![2, 2, 2, 2]; @@ -7496,6 +7497,7 @@ fn tenure_extend_after_bad_commit() { send_amt, ); submit_tx(&http_origin, &transfer_tx); + sender_nonce += 1; // wait for the new block to be processed wait_for(30, || { @@ -7654,9 +7656,11 @@ fn tenure_extend_after_bad_commit() { }) .expect("Timed out waiting for block to be mined and processed"); + info!("------------------------- Miner 2 Mines the Next Tenure -------------------------"); + // Re-enable block commits for miner 2 let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); - rl2_skip_commit_op.set(true); + rl2_skip_commit_op.set(false); // Wait for block commit from miner 2 wait_for(30, || { @@ -7664,8 +7668,6 @@ fn tenure_extend_after_bad_commit() { }) .expect("Timed out waiting for block commit from miner 2"); - info!("------------------------- Miner 2 Mines the Next Tenure -------------------------"); - let stacks_height_before = signer_test .stacks_client .get_peer_info() From a56a73c6835b03c8ad936b0cbe6642c309a91b3a Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 12:03:57 -0500 Subject: [PATCH 04/41] test: add `tenure_extend_after_bad_commit` to yaml file --- .github/workflows/bitcoin-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/bitcoin-tests.yml b/.github/workflows/bitcoin-tests.yml index 52f46fbc49..985143e9e2 100644 --- a/.github/workflows/bitcoin-tests.yml +++ b/.github/workflows/bitcoin-tests.yml @@ -126,6 +126,7 @@ jobs: - tests::signer::v0::block_commit_delay - tests::signer::v0::continue_after_fast_block_no_sortition - tests::signer::v0::block_validation_response_timeout + - tests::signer::v0::tenure_extend_after_bad_commit - tests::nakamoto_integrations::burn_ops_integration_test - tests::nakamoto_integrations::check_block_heights - tests::nakamoto_integrations::clarity_burn_state From 6438551aef193d793bef7ad6078b3209fe040c51 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 14:23:53 -0500 Subject: [PATCH 05/41] refactor: move the `StackerDBChannel` into the `EventDispatcher` The previous design using a global singleton causes trouble in testing, when we have multiple miners running in different threads of the same process. --- testnet/stacks-node/src/event_dispatcher.rs | 22 ++++++++--------- .../stacks-node/src/nakamoto_node/miner.rs | 1 + .../src/nakamoto_node/sign_coordinator.rs | 24 +++++++++++++++---- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/testnet/stacks-node/src/event_dispatcher.rs b/testnet/stacks-node/src/event_dispatcher.rs index 88bfc8dae7..8144cd8ec5 100644 --- a/testnet/stacks-node/src/event_dispatcher.rs +++ b/testnet/stacks-node/src/event_dispatcher.rs @@ -18,7 +18,7 @@ use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use std::sync::mpsc::{channel, Receiver, Sender}; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; use std::thread::sleep; use std::time::Duration; @@ -107,17 +107,8 @@ pub const PATH_BLOCK_PROCESSED: &str = "new_block"; pub const PATH_ATTACHMENT_PROCESSED: &str = "attachments/new"; pub const PATH_PROPOSAL_RESPONSE: &str = "proposal_response"; -pub static STACKER_DB_CHANNEL: StackerDBChannel = StackerDBChannel::new(); - /// This struct receives StackerDB event callbacks without registering -/// over the JSON/RPC interface. To ensure that any event observer -/// uses the same channel, we use a lazy_static global for the channel (this -/// implements a singleton using STACKER_DB_CHANNEL). -/// -/// This is in place because a Nakamoto miner needs to receive -/// StackerDB events. It could either poll the database (seems like a -/// bad idea) or listen for events. Registering for RPC callbacks -/// seems bad. So instead, it uses a singleton sync channel. +/// over the JSON/RPC interface. pub struct StackerDBChannel { sender_info: Mutex>, } @@ -923,6 +914,8 @@ pub struct EventDispatcher { /// Index into `registered_observers` that will receive block proposal events (Nakamoto and /// later) block_proposal_observers_lookup: HashSet, + /// Channel for sending StackerDB events to the miner coordinator + pub stackerdb_channel: Arc>, } /// This struct is used specifically for receiving proposal responses. @@ -1115,6 +1108,7 @@ impl Default for EventDispatcher { impl EventDispatcher { pub fn new() -> EventDispatcher { EventDispatcher { + stackerdb_channel: Arc::new(Mutex::new(StackerDBChannel::new())), registered_observers: vec![], contract_events_observers_lookup: HashMap::new(), assets_observers_lookup: HashMap::new(), @@ -1544,7 +1538,11 @@ impl EventDispatcher { let interested_observers = self.filter_observers(&self.stackerdb_observers_lookup, false); - let interested_receiver = STACKER_DB_CHANNEL.is_active(&contract_id); + let stackerdb_channel = self + .stackerdb_channel + .lock() + .expect("FATAL: failed to lock StackerDB channel mutex"); + let interested_receiver = stackerdb_channel.is_active(&contract_id); if interested_observers.is_empty() && interested_receiver.is_none() { return; } diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 411e4f3be8..fb35d60fc3 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -557,6 +557,7 @@ impl BlockMinerThread { miner_privkey, &self.config, self.globals.should_keep_running.clone(), + self.event_dispatcher.stackerdb_channel.clone(), ) .map_err(|e| { NakamotoNodeError::SigningCoordinatorFailure(format!( diff --git a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs index 14eeef20b9..2b1efcbfc5 100644 --- a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs @@ -16,7 +16,7 @@ use std::collections::BTreeMap; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::Receiver; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use std::time::Duration; use hashbrown::{HashMap, HashSet}; @@ -43,7 +43,7 @@ use stacks_common::codec::StacksMessageCodec; use stacks_common::types::chainstate::{StacksPrivateKey, StacksPublicKey}; use super::Error as NakamotoNodeError; -use crate::event_dispatcher::STACKER_DB_CHANNEL; +use crate::event_dispatcher::StackerDBChannel; use crate::neon::Counters; use crate::Config; @@ -68,11 +68,16 @@ pub struct SignCoordinator { total_weight: u32, keep_running: Arc, pub next_signer_bitvec: BitVec<4000>, + stackerdb_channel: Arc>, } impl Drop for SignCoordinator { fn drop(&mut self) { - STACKER_DB_CHANNEL.replace_receiver(self.receiver.take().expect( + let stackerdb_channel = self + .stackerdb_channel + .lock() + .expect("FATAL: failed to lock stackerdb channel"); + stackerdb_channel.replace_receiver(self.receiver.take().expect( "FATAL: lost possession of the StackerDB channel before dropping SignCoordinator", )); } @@ -87,6 +92,7 @@ impl SignCoordinator { message_key: StacksPrivateKey, config: &Config, keep_running: Arc, + stackerdb_channel: Arc>, ) -> Result { let is_mainnet = config.is_mainnet(); let Some(ref reward_set_signers) = reward_set.signers else { @@ -150,7 +156,10 @@ impl SignCoordinator { use crate::tests::nakamoto_integrations::TEST_SIGNING; if TEST_SIGNING.lock().unwrap().is_some() { debug!("Short-circuiting spinning up coordinator from signer commitments. Using test signers channel."); - let (receiver, replaced_other) = STACKER_DB_CHANNEL.register_miner_coordinator(); + let (receiver, replaced_other) = stackerdb_channel + .lock() + .expect("FATAL: failed to lock StackerDB channel") + .register_miner_coordinator(); if replaced_other { warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); } @@ -164,12 +173,16 @@ impl SignCoordinator { weight_threshold: threshold, total_weight, keep_running, + stackerdb_channel, }; return Ok(sign_coordinator); } } - let (receiver, replaced_other) = STACKER_DB_CHANNEL.register_miner_coordinator(); + let (receiver, replaced_other) = stackerdb_channel + .lock() + .expect("FATAL: failed to lock StackerDB channel") + .register_miner_coordinator(); if replaced_other { warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); } @@ -184,6 +197,7 @@ impl SignCoordinator { weight_threshold: threshold, total_weight, keep_running, + stackerdb_channel, }) } From 4420c82c325d7464c5f64e60851c123195f3e0c3 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 14:24:51 -0500 Subject: [PATCH 06/41] feat: add an index for block state --- stacks-signer/src/signerdb.rs | 44 +++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/stacks-signer/src/signerdb.rs b/stacks-signer/src/signerdb.rs index b65880f24e..f4b40dbf9b 100644 --- a/stacks-signer/src/signerdb.rs +++ b/stacks-signer/src/signerdb.rs @@ -308,6 +308,10 @@ static CREATE_INDEXES_3: &str = r#" CREATE INDEX IF NOT EXISTS block_rejection_signer_addrs_on_block_signature_hash ON block_rejection_signer_addrs(signer_signature_hash); "#; +static CREATE_INDEXES_4: &str = r#" +CREATE INDEX IF NOT EXISTS blocks_state ON blocks ((json_extract(block_info, '$.state'))); +"#; + static CREATE_SIGNER_STATE_TABLE: &str = " CREATE TABLE IF NOT EXISTS signer_states ( reward_cycle INTEGER PRIMARY KEY, @@ -345,6 +349,12 @@ static DROP_SCHEMA_2: &str = " DROP TABLE IF EXISTS blocks; DROP TABLE IF EXISTS db_config;"; +static DROP_SCHEMA_3: &str = " + DROP TABLE IF EXISTS burn_blocks; + DROP TABLE IF EXISTS signer_states; + DROP TABLE IF EXISTS blocks; + DROP TABLE IF EXISTS db_config;"; + static CREATE_BLOCK_SIGNATURES_TABLE: &str = r#" CREATE TABLE IF NOT EXISTS block_signatures ( -- The block sighash commits to all of the stacks and burnchain state as of its parent, @@ -405,9 +415,24 @@ static SCHEMA_3: &[&str] = &[ "INSERT INTO db_config (version) VALUES (3);", ]; +static SCHEMA_4: &[&str] = &[ + DROP_SCHEMA_3, + CREATE_DB_CONFIG, + CREATE_BURN_STATE_TABLE, + CREATE_BLOCKS_TABLE_2, + CREATE_SIGNER_STATE_TABLE, + CREATE_BLOCK_SIGNATURES_TABLE, + CREATE_BLOCK_REJECTION_SIGNER_ADDRS_TABLE, + CREATE_INDEXES_1, + CREATE_INDEXES_2, + CREATE_INDEXES_3, + CREATE_INDEXES_4, + "INSERT INTO db_config (version) VALUES (4);", +]; + impl SignerDb { /// The current schema version used in this build of the signer binary. - pub const SCHEMA_VERSION: u32 = 3; + pub const SCHEMA_VERSION: u32 = 4; /// Create a new `SignerState` instance. /// This will create a new SQLite database at the given path @@ -479,6 +504,20 @@ impl SignerDb { Ok(()) } + /// Migrate from schema 3 to schema 4 + fn schema_4_migration(tx: &Transaction) -> Result<(), DBError> { + if Self::get_schema_version(tx)? >= 4 { + // no migration necessary + return Ok(()); + } + + for statement in SCHEMA_4.iter() { + tx.execute_batch(statement)?; + } + + Ok(()) + } + /// Either instantiate a new database, or migrate an existing one /// If the detected version of the existing database is 0 (i.e., a pre-migration /// logic DB, the DB will be dropped). @@ -490,7 +529,8 @@ impl SignerDb { 0 => Self::schema_1_migration(&sql_tx)?, 1 => Self::schema_2_migration(&sql_tx)?, 2 => Self::schema_3_migration(&sql_tx)?, - 3 => break, + 3 => Self::schema_4_migration(&sql_tx)?, + 4 => break, x => return Err(DBError::Other(format!( "Database schema is newer than supported by this binary. Expected version = {}, Database version = {x}", Self::SCHEMA_VERSION, From 3fa811620f3135c12880639809f84357212db3d3 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 14:28:12 -0500 Subject: [PATCH 07/41] docs: update changelogs --- CHANGELOG.md | 1 + stacks-signer/CHANGELOG.md | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0470bab77b..3810b805da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE - Remove the panic for reporting DB deadlocks (just error and continue waiting) - Add index to `metadata_table` in Clarity DB on `blockhash` - Add `block_commit_delay_ms` to the config file to control the time to wait after seeing a new burn block, before submitting a block commit, to allow time for the first Nakamoto block of the new tenure to be mined, allowing this miner to avoid the need to RBF the block commit. +- If the winning miner of a sortition is committed to the wrong parent tenure, the previous miner can immediately tenure extend and continue mining since the winning miner would never be able to propose a valid block. (#5361) ## [3.0.0.0.1] diff --git a/stacks-signer/CHANGELOG.md b/stacks-signer/CHANGELOG.md index 3183c0d5c3..2895f1161f 100644 --- a/stacks-signer/CHANGELOG.md +++ b/stacks-signer/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE ### Changed +- Allow a miner to extend their tenure immediately if the winner of the next tenure has committed to the wrong parent tenure (#5361) + ## [3.0.0.0.1.0] ### Changed From df8f240daa0fa4fe4dacc07d63df3d4d03c70ec9 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 15:31:12 -0500 Subject: [PATCH 08/41] chore: improve comment about checking the parent tenure --- stacks-signer/src/chainstate.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/stacks-signer/src/chainstate.rs b/stacks-signer/src/chainstate.rs index aea2d93ef0..f469f3a882 100644 --- a/stacks-signer/src/chainstate.rs +++ b/stacks-signer/src/chainstate.rs @@ -204,11 +204,13 @@ impl SortitionsView { ); self.cur_sortition.miner_status = SortitionMinerStatus::InvalidatedBeforeFirstBlock; } else if let Some(tip) = signer_db.get_canonical_tip()? { - // If this is a tenure change block, then the current sortition's parent tenure must be - // the canonical tip's tenure. If it's not, then the current tip may already be in this - // tenure. - if self.cur_sortition.parent_tenure_id != tip.block.header.consensus_hash - && self.cur_sortition.consensus_hash != tip.block.header.consensus_hash + // Check if the current sortition is aligned with the expected tenure: + // - If the tip is in the current tenure, we are in the process of mining this tenure. + // - If the tip is not in the current tenure, then we’re starting a new tenure, + // and the current sortition's parent tenure must match the tenure of the tip. + // - Else the miner of the current sortition has committed to an incorrect parent tenure. + if self.cur_sortition.consensus_hash != tip.block.header.consensus_hash + && self.cur_sortition.parent_tenure_id != tip.block.header.consensus_hash { warn!( "Current sortition does not build off of canonical tip tenure, marking as invalid"; From 44769cf0dd15d7450a802598180d4333c6e39b1d Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 18:30:23 -0500 Subject: [PATCH 09/41] test: add unit test for `SignerDb::get_canonical_tip` --- stacks-signer/src/signerdb.rs | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/stacks-signer/src/signerdb.rs b/stacks-signer/src/signerdb.rs index f4b40dbf9b..28b771d145 100644 --- a/stacks-signer/src/signerdb.rs +++ b/stacks-signer/src/signerdb.rs @@ -1275,4 +1275,45 @@ mod tests { assert!(!block.check_state(BlockState::GloballyAccepted)); assert!(block.check_state(BlockState::GloballyRejected)); } + + #[test] + fn test_get_canonical_tip() { + let db_path = tmp_db_path(); + let mut db = SignerDb::new(db_path).expect("Failed to create signer db"); + + let (mut block_info_1, _block_proposal_1) = create_block_override(|b| { + b.block.header.miner_signature = MessageSignature([0x01; 65]); + b.block.header.chain_length = 1; + b.burn_height = 1; + }); + + let (mut block_info_2, _block_proposal_2) = create_block_override(|b| { + b.block.header.miner_signature = MessageSignature([0x02; 65]); + b.block.header.chain_length = 2; + b.burn_height = 2; + }); + + db.insert_block(&block_info_1) + .expect("Unable to insert block into db"); + db.insert_block(&block_info_2) + .expect("Unable to insert block into db"); + + assert!(db.get_canonical_tip().unwrap().is_none()); + + block_info_1 + .mark_globally_accepted() + .expect("Failed to mark block as globally accepted"); + db.insert_block(&block_info_1) + .expect("Unable to insert block into db"); + + assert_eq!(db.get_canonical_tip().unwrap().unwrap(), block_info_1); + + block_info_2 + .mark_globally_accepted() + .expect("Failed to mark block as globally accepted"); + db.insert_block(&block_info_2) + .expect("Unable to insert block into db"); + + assert_eq!(db.get_canonical_tip().unwrap().unwrap(), block_info_2); + } } From 4c7c5aab838a168683e66048f4485966dae4ec3e Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 18:34:01 -0500 Subject: [PATCH 10/41] chore: remove unnecessary log --- testnet/stacks-node/src/nakamoto_node/relayer.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 805703264e..0fe5efcdee 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -928,11 +928,6 @@ impl RelayerThread { // If we won the last good sortition, then we should extend off of it. let last_good_block_election_snapshot = { let ih = self.sortdb.index_handle(&burn_tip.sortition_id); - info!( - "Relayer: Getting last snapshot with sortition for {}", - burn_tip.block_height - ); - let sn = ih .get_last_snapshot_with_sortition(burn_tip.block_height) .map_err(|e| { From a9acfa048f846cf99c136c5ab01fc1b9c347ddda Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 18:39:32 -0500 Subject: [PATCH 11/41] feat: simplify signerdb migration --- stacks-signer/src/signerdb.rs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/stacks-signer/src/signerdb.rs b/stacks-signer/src/signerdb.rs index 28b771d145..5bd201540c 100644 --- a/stacks-signer/src/signerdb.rs +++ b/stacks-signer/src/signerdb.rs @@ -416,18 +416,8 @@ static SCHEMA_3: &[&str] = &[ ]; static SCHEMA_4: &[&str] = &[ - DROP_SCHEMA_3, - CREATE_DB_CONFIG, - CREATE_BURN_STATE_TABLE, - CREATE_BLOCKS_TABLE_2, - CREATE_SIGNER_STATE_TABLE, - CREATE_BLOCK_SIGNATURES_TABLE, - CREATE_BLOCK_REJECTION_SIGNER_ADDRS_TABLE, - CREATE_INDEXES_1, - CREATE_INDEXES_2, - CREATE_INDEXES_3, CREATE_INDEXES_4, - "INSERT INTO db_config (version) VALUES (4);", + "INSERT OR REPLACE INTO db_config (version) VALUES (4);", ]; impl SignerDb { @@ -452,7 +442,7 @@ impl SignerDb { return Ok(0); } let result = conn - .query_row("SELECT version FROM db_config LIMIT 1", [], |row| { + .query_row("SELECT MAX(version) FROM db_config LIMIT 1", [], |row| { row.get(0) }) .optional(); From 58fda005622d95e119c0909a3ee3658b0247c79e Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 21:54:22 -0500 Subject: [PATCH 12/41] chore: cleanup unused --- stacks-signer/src/signerdb.rs | 6 ------ testnet/stacks-node/src/tests/epoch_25.rs | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/stacks-signer/src/signerdb.rs b/stacks-signer/src/signerdb.rs index 5bd201540c..ff264f3cef 100644 --- a/stacks-signer/src/signerdb.rs +++ b/stacks-signer/src/signerdb.rs @@ -349,12 +349,6 @@ static DROP_SCHEMA_2: &str = " DROP TABLE IF EXISTS blocks; DROP TABLE IF EXISTS db_config;"; -static DROP_SCHEMA_3: &str = " - DROP TABLE IF EXISTS burn_blocks; - DROP TABLE IF EXISTS signer_states; - DROP TABLE IF EXISTS blocks; - DROP TABLE IF EXISTS db_config;"; - static CREATE_BLOCK_SIGNATURES_TABLE: &str = r#" CREATE TABLE IF NOT EXISTS block_signatures ( -- The block sighash commits to all of the stacks and burnchain state as of its parent, diff --git a/testnet/stacks-node/src/tests/epoch_25.rs b/testnet/stacks-node/src/tests/epoch_25.rs index 34083fb22a..bc30e51528 100644 --- a/testnet/stacks-node/src/tests/epoch_25.rs +++ b/testnet/stacks-node/src/tests/epoch_25.rs @@ -23,7 +23,7 @@ use stacks_common::types::chainstate::StacksPrivateKey; use crate::config::InitialBalance; use crate::tests::bitcoin_regtest::BitcoinCoreController; -use crate::tests::nakamoto_integrations::{next_block_and, wait_for}; +use crate::tests::nakamoto_integrations::wait_for; use crate::tests::neon_integrations::{ get_account, get_chain_info, neon_integration_test_conf, next_block_and_wait, submit_tx, test_observer, wait_for_runloop, From 21788465fb012c57a692e523a6fb2a2dce3b5355 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 21:59:23 -0500 Subject: [PATCH 13/41] refactor: clean up `continue_tenure` --- .../stacks-node/src/nakamoto_node/relayer.rs | 232 +++++++++++------- 1 file changed, 138 insertions(+), 94 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 0fe5efcdee..d86dcc3fad 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -877,74 +877,113 @@ impl RelayerThread { Ok(()) } - fn continue_tenure(&mut self, new_burn_view: ConsensusHash) -> Result<(), NakamotoNodeError> { - if let Err(e) = self.stop_tenure() { - error!("Relayer: Failed to stop tenure: {e:?}"); - return Ok(()); - } - debug!("Relayer: successfully stopped tenure."); - // Check if we should undergo a tenure change to switch to the new burn view - let burn_tip = - SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &new_burn_view) - .map_err(|e| { - error!("Relayer: failed to get block snapshot for new burn view: {e:?}"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })? - .ok_or_else(|| { - error!("Relayer: failed to get block snapshot for new burn view"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })?; + fn get_burn_tip_snapshot( + &self, + new_burn_view: &ConsensusHash, + ) -> Result { + SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), new_burn_view) + .map_err(|e| { + error!( + "Relayer: failed to get block snapshot for new burn view: {:?}", + e + ); + NakamotoNodeError::SnapshotNotFoundForChainTip + })? + .ok_or_else(|| { + error!("Relayer: failed to get block snapshot for new burn view"); + NakamotoNodeError::SnapshotNotFoundForChainTip + }) + } - let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = + fn get_canonical_stacks_tip(&self) -> StacksBlockId { + let (ch, bh) = SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()).unwrap(); - let canonical_stacks_tip = - StacksBlockId::new(&canonical_stacks_tip_ch, &canonical_stacks_tip_bh); + StacksBlockId::new(&ch, &bh) + } + fn get_mining_key_pkh(&self) -> Option { let Some(ref mining_key) = self.config.miner.mining_key else { - return Ok(()); + return None; }; - let mining_pkh = Hash160::from_node_public_key(&StacksPublicKey::from_private(mining_key)); + Some(Hash160::from_node_public_key( + &StacksPublicKey::from_private(mining_key), + )) + } + fn get_highest_tenure_bhh( + &self, + tip_block_id: &StacksBlockId, + tip_ch: &ConsensusHash, + ) -> Result { let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( &mut self.chainstate.index_conn(), - &canonical_stacks_tip, - &canonical_stacks_tip_ch, + tip_block_id, + &tip_ch, ) .map_err(|e| { error!( - "Relayer: Failed to get tenure-start block header for stacks tip {canonical_stacks_tip}: {e:?}" + "Relayer: Failed to get tenure-start block header for stacks tip {tip_block_id}: {e:?}" ); NakamotoNodeError::ParentNotFound })? .ok_or_else(|| { error!( - "Relayer: Failed to find tenure-start block header for stacks tip {canonical_stacks_tip}" + "Relayer: Failed to find tenure-start block header for stacks tip {tip_block_id}" ); NakamotoNodeError::ParentNotFound })?; - let highest_tenure_bhh = - BlockHeaderHash(highest_tenure_start_block_header.index_block_hash().0); + Ok(BlockHeaderHash( + highest_tenure_start_block_header.index_block_hash().0, + )) + } - // If we won the last good sortition, then we should extend off of it. - let last_good_block_election_snapshot = { - let ih = self.sortdb.index_handle(&burn_tip.sortition_id); - let sn = ih - .get_last_snapshot_with_sortition(burn_tip.block_height) - .map_err(|e| { - error!("Relayer: failed to get last snapshot with sortition: {e:?}"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })?; - if sn.winning_stacks_block_hash != highest_tenure_bhh { - info!( - "Relayer: Sortition winner is not committed to the canonical tip; allowing last miner to extend"; - "burn_block_height" => burn_tip.block_height, - "consensus_hash" => %burn_tip.consensus_hash, - ); + fn determine_tenure_type( + &self, + canonical_snapshot: BlockSnapshot, + last_snapshot: BlockSnapshot, + new_burn_view: ConsensusHash, + mining_pkh: Hash160, + ) -> (StacksBlockId, BlockSnapshot, MinerReason) { + if canonical_snapshot.miner_pk_hash != Some(mining_pkh) { + debug!("Relayer: Failed to issue a tenure change payload in our last tenure. Issue a new tenure change payload."); + ( + StacksBlockId(last_snapshot.winning_stacks_block_hash.0), + last_snapshot, + MinerReason::EmptyTenure, + ) + } else { + debug!("Relayer: Successfully issued a tenure change payload. Issue a continue extend from the chain tip."); + ( + self.get_canonical_stacks_tip(), + canonical_snapshot, + MinerReason::Extended { + burn_view_consensus_hash: new_burn_view, + }, + ) + } + } - SortitionDB::get_block_snapshot_consensus( - self.sortdb.conn(), - &canonical_stacks_tip_ch, - ) + fn get_last_good_block_snapshot( + &self, + burn_tip: &BlockSnapshot, + highest_tenure_bhh: &BlockHeaderHash, + canonical_stacks_tip_ch: &ConsensusHash, + ) -> Result { + let ih = self.sortdb.index_handle(&burn_tip.sortition_id); + let sn = ih + .get_last_snapshot_with_sortition(burn_tip.block_height) + .map_err(|e| { + error!("Relayer: failed to get last snapshot with sortition: {e:?}"); + NakamotoNodeError::SnapshotNotFoundForChainTip + })?; + if &sn.winning_stacks_block_hash != highest_tenure_bhh { + info!( + "Relayer: Sortition winner is not committed to the canonical tip; allowing last miner to extend"; + "burn_block_height" => burn_tip.block_height, + "consensus_hash" => %burn_tip.consensus_hash, + ); + + SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), canonical_stacks_tip_ch) .map_err(|e| { error!("Relayer: failed to get block snapshot for canonical tip: {e:?}"); NakamotoNodeError::SnapshotNotFoundForChainTip @@ -952,11 +991,47 @@ impl RelayerThread { .ok_or_else(|| { error!("Relayer: failed to get block snapshot for canonical tip"); NakamotoNodeError::SnapshotNotFoundForChainTip - })? - } else { - sn - } + }) + } else { + Ok(sn) + } + } + + fn get_block_snapshot(&self, ch: &ConsensusHash) -> Result { + SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &ch) + .map_err(|e| { + error!("Relayer: failed to get block snapshot for canonical tip: {e:?}"); + NakamotoNodeError::SnapshotNotFoundForChainTip + })? + .ok_or_else(|| { + error!("Relayer: failed to get block snapshot for canonical tip"); + NakamotoNodeError::SnapshotNotFoundForChainTip + }) + } + + fn continue_tenure(&mut self, new_burn_view: ConsensusHash) -> Result<(), NakamotoNodeError> { + if let Err(e) = self.stop_tenure() { + error!("Relayer: Failed to stop tenure: {e:?}"); + return Ok(()); + } + debug!("Relayer: successfully stopped tenure."); + + // Get the necessary snapshots and state + let burn_tip = self.get_burn_tip_snapshot(&new_burn_view)?; + let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = + SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()).unwrap(); + let canonical_stacks_tip = + StacksBlockId::new(&canonical_stacks_tip_ch, &canonical_stacks_tip_bh); + let Some(mining_pkh) = self.get_mining_key_pkh() else { + return Ok(()); }; + let highest_tenure_bhh = + self.get_highest_tenure_bhh(&canonical_stacks_tip, &canonical_stacks_tip_ch)?; + let last_good_block_election_snapshot = self.get_last_good_block_snapshot( + &burn_tip, + &highest_tenure_bhh, + &canonical_stacks_tip_ch, + )?; let won_last_sortition = last_good_block_election_snapshot.miner_pk_hash == Some(mining_pkh); @@ -976,54 +1051,23 @@ impl RelayerThread { return Ok(()); } - let canonical_block_snapshot = - SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &canonical_stacks_tip_ch) - .map_err(|e| { - error!("Relayer: failed to get block snapshot for canonical tip: {e:?}"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })? - .ok_or_else(|| { - error!("Relayer: failed to get block snapshot for canonical tip"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })?; - - let won_canonical_block_snapshot = - canonical_block_snapshot.miner_pk_hash == Some(mining_pkh); - - let (parent_tenure_start, block_election_snapshot, reason) = - if !won_canonical_block_snapshot { - debug!("Relayer: Failed to issue a tenure change payload in our last tenure. Issue a new tenure change payload."); - ( - StacksBlockId( - last_good_block_election_snapshot - .winning_stacks_block_hash - .0, - ), - last_good_block_election_snapshot, - MinerReason::EmptyTenure, - ) - } else { - debug!("Relayer: Successfully issued a tenure change payload in its tenure. Issue a continue extend from the chain tip."); - ( - canonical_stacks_tip, //For tenure extend, we should be extending off the canonical tip - canonical_block_snapshot, - MinerReason::Extended { - burn_view_consensus_hash: new_burn_view, - }, - ) - }; - match self.start_new_tenure( + let canonical_snapshot = self.get_block_snapshot(&canonical_stacks_tip_ch)?; + let (parent_tenure_start, block_election_snapshot, reason) = self.determine_tenure_type( + canonical_snapshot, + last_good_block_election_snapshot, + new_burn_view, + mining_pkh, + ); + + if let Err(e) = self.start_new_tenure( parent_tenure_start, block_election_snapshot, burn_tip, reason, ) { - Ok(()) => { - debug!("Relayer: successfully started new tenure."); - } - Err(e) => { - error!("Relayer: Failed to start new tenure: {e:?}"); - } + error!("Relayer: Failed to start new tenure: {e:?}"); + } else { + debug!("Relayer: successfully started new tenure."); } Ok(()) } From d8140e063328fb8bb8e0f89e61831b6df69f8117 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 22:15:25 -0500 Subject: [PATCH 14/41] refactor: `last_block_contains_tenure_change_tx` --- testnet/stacks-node/src/tests/signer/v0.rs | 47 ++++++++-------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index fc26e2b482..eaf57dffd7 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -420,20 +420,25 @@ impl SignerTest { } } -fn verify_last_block_contains_tenure_change_tx(cause: TenureChangeCause) { +fn last_block_contains_tenure_change_tx(cause: TenureChangeCause) -> bool { let blocks = test_observer::get_blocks(); - let tenure_change_tx = &blocks.last().unwrap(); - let transactions = tenure_change_tx["transactions"].as_array().unwrap(); + let last_block = &blocks.last().unwrap(); + let transactions = last_block["transactions"].as_array().unwrap(); let tx = transactions.first().expect("No transactions in block"); let raw_tx = tx["raw_tx"].as_str().unwrap(); let tx_bytes = hex_bytes(&raw_tx[2..]).unwrap(); let parsed = StacksTransaction::consensus_deserialize(&mut &tx_bytes[..]).unwrap(); match &parsed.payload { - TransactionPayload::TenureChange(payload) => { - assert_eq!(payload.cause, cause); + TransactionPayload::TenureChange(payload) if payload.cause == cause => { + info!("Found tenure change transaction: {parsed:?}"); + true } - _ => panic!("Expected tenure change transaction, got {parsed:?}"), - }; + _ => false, + } +} + +fn verify_last_block_contains_tenure_change_tx(cause: TenureChangeCause) { + assert!(last_block_contains_tenure_change_tx(cause)); } #[test] @@ -2800,27 +2805,9 @@ fn empty_sortition_before_approval() { // Wait for a block with a tenure extend to be mined wait_for(60, || { - let blocks = test_observer::get_blocks(); - let last_block = blocks.last().unwrap(); - info!("Last block mined: {:?}", last_block); - for tx in last_block["transactions"].as_array().unwrap() { - let raw_tx = tx["raw_tx"].as_str().unwrap(); - if raw_tx == "0x00" { - continue; - } - let tx_bytes = hex_bytes(&raw_tx[2..]).unwrap(); - let parsed = StacksTransaction::consensus_deserialize(&mut &tx_bytes[..]).unwrap(); - if let TransactionPayload::TenureChange(payload) = &parsed.payload { - match payload.cause { - TenureChangeCause::Extended => { - info!("Found tenure extend block"); - return Ok(true); - } - TenureChangeCause::BlockFound => {} - } - }; - } - Ok(false) + Ok(last_block_contains_tenure_change_tx( + TenureChangeCause::Extended, + )) }) .expect("Timed out waiting for tenure extend"); @@ -5858,8 +5845,8 @@ fn continue_after_fast_block_no_sortition() { // Some helper functions for verifying the blocks contain their expected transactions let verify_last_block_contains_transfer_tx = || { let blocks = test_observer::get_blocks(); - let tenure_change_tx = &blocks.last().unwrap(); - let transactions = tenure_change_tx["transactions"].as_array().unwrap(); + let last_block = &blocks.last().unwrap(); + let transactions = last_block["transactions"].as_array().unwrap(); let tx = transactions.first().expect("No transactions in block"); let raw_tx = tx["raw_tx"].as_str().unwrap(); let tx_bytes = hex_bytes(&raw_tx[2..]).unwrap(); From 54c88c6a253f40e85755c36f57e1999ea47ca4f7 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 14 Nov 2024 22:24:33 -0500 Subject: [PATCH 15/41] test: additional checks requested in PR review --- testnet/stacks-node/src/tests/signer/v0.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index eaf57dffd7..5f5890183b 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7702,7 +7702,7 @@ fn tenure_extend_after_bad_commit() { ) .unwrap(); - // assure we have a successful sortition that miner B won + // assure we have a successful sortition that miner 1 won let tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); assert!(tip.sortition); assert_eq!(tip.miner_pk_hash.unwrap(), mining_pkh_1); @@ -7756,6 +7756,11 @@ fn tenure_extend_after_bad_commit() { ) .expect("Timed out waiting for burn block to be processed"); + // assure we have a successful sortition that miner 2 won + let tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + assert!(tip.sortition); + assert_eq!(tip.miner_pk_hash.unwrap(), mining_pkh_2); + info!("------------------------- Miner 1 Extends Tenure B -------------------------"); // wait for a tenure extend block from miner 1 to be processed @@ -7844,6 +7849,12 @@ fn tenure_extend_after_bad_commit() { ) .expect("Timed out waiting for final block to be mined and processed"); + // assure we have a successful sortition that miner 2 won and it had a block found tenure change + let tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + assert!(tip.sortition); + assert_eq!(tip.miner_pk_hash.unwrap(), mining_pkh_2); + verify_last_block_contains_tenure_change_tx(TenureChangeCause::BlockFound); + info!("------------------------- Shutdown -------------------------"); rl2_coord_channels .lock() From ba2faf7035b9e3c472d21685121c75f139f02dd2 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 15 Nov 2024 09:16:01 -0500 Subject: [PATCH 16/41] feat: add ability to disable tenure-extend for tests This is useful when checking the behavior during forking. --- testnet/stacks-node/src/nakamoto_node/miner.rs | 2 ++ .../stacks-node/src/nakamoto_node/relayer.rs | 8 +++++++- testnet/stacks-node/src/tests/signer/v0.rs | 18 +++++++++++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index fb35d60fc3..877eab88a1 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -62,6 +62,8 @@ pub static TEST_BROADCAST_STALL: std::sync::Mutex> = std::sync::Mut pub static TEST_BLOCK_ANNOUNCE_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); #[cfg(test)] pub static TEST_SKIP_P2P_BROADCAST: std::sync::Mutex> = std::sync::Mutex::new(None); +#[cfg(test)] +pub static TEST_NO_TENURE_EXTEND: std::sync::Mutex> = std::sync::Mutex::new(None); /// If the miner was interrupted while mining a block, how long should the /// miner thread sleep before trying again? diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index d86dcc3fad..5aef6f2612 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -58,7 +58,7 @@ use super::{ BLOCK_PROCESSOR_STACK_SIZE, }; use crate::burnchains::BurnchainController; -use crate::nakamoto_node::miner::{BlockMinerThread, MinerDirective}; +use crate::nakamoto_node::miner::{BlockMinerThread, MinerDirective, TEST_NO_TENURE_EXTEND}; use crate::neon_node::{ fault_injection_skip_mining, open_chainstate_with_faults, LeaderKeyRegistrationState, }; @@ -1016,6 +1016,12 @@ impl RelayerThread { } debug!("Relayer: successfully stopped tenure."); + #[cfg(test)] + if *TEST_NO_TENURE_EXTEND.lock().unwrap() == Some(true) { + info!("Relayer: TEST_NO_TENURE_EXTEND is set; skipping tenure extension."); + return Ok(()); + } + // Get the necessary snapshots and state let burn_tip = self.get_burn_tip_snapshot(&new_burn_view)?; let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 5f5890183b..72c8e29edb 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -67,7 +67,7 @@ use super::SignerTest; use crate::config::{EventKeyType, EventObserverConfig}; use crate::event_dispatcher::MinedNakamotoBlockEvent; use crate::nakamoto_node::miner::{ - TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, + TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_NO_TENURE_EXTEND, }; use crate::nakamoto_node::sign_coordinator::TEST_IGNORE_SIGNERS; use crate::neon::Counters; @@ -952,6 +952,10 @@ fn forked_tenure_testing( sleep_ms(1000); info!("------------------------- Reached Epoch 3.0 -------------------------"); + // Disable tenure extend so that miners will not tenure extend when the + // test is checking for fork behavior. + TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); + let naka_conf = signer_test.running_nodes.conf.clone(); let burnchain = naka_conf.get_burnchain(); let sortdb = burnchain.open_sortition_db(true).unwrap(); @@ -1283,6 +1287,10 @@ fn bitcoind_forking_test() { let pre_epoch_3_nonce = get_account(&http_origin, &miner_address).nonce; let pre_fork_tenures = 10; + // Disable tenure extend so that miners will not tenure extend when the + // test is checking for fork behavior. + TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); + for i in 0..pre_fork_tenures { info!("Mining pre-fork tenure {} of {pre_fork_tenures}", i + 1); signer_test.mine_nakamoto_block(Duration::from_secs(30)); @@ -1924,6 +1932,10 @@ fn miner_forking() { "RL1 did not win the sortition" ); + // Disable tenure extend so that miners will not tenure extend when the + // test is checking for fork behavior. + TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); + info!( "------------------------- RL2 Wins Sortition With Outdated View -------------------------" ); @@ -4261,6 +4273,10 @@ fn partial_tenure_fork() { info!("------------------------- Reached Epoch 3.0 -------------------------"); + // Disable tenure extend so that miners will not tenure extend when the + // test is checking for fork behavior. + TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); + // due to the random nature of mining sortitions, the way this test is structured // is that we keep track of how many tenures each miner produced, and once enough sortitions // have been produced such that each miner has produced 3 tenures, we stop and check the From 965f58ba294f965741079c83d69c974310df2edd Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 15 Nov 2024 09:47:29 -0500 Subject: [PATCH 17/41] fix: fix import for test-only feature --- testnet/stacks-node/src/nakamoto_node/relayer.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 5aef6f2612..ef73c252c4 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -58,7 +58,9 @@ use super::{ BLOCK_PROCESSOR_STACK_SIZE, }; use crate::burnchains::BurnchainController; -use crate::nakamoto_node::miner::{BlockMinerThread, MinerDirective, TEST_NO_TENURE_EXTEND}; +#[cfg(test)] +use crate::nakamoto_node::miner::TEST_NO_TENURE_EXTEND; +use crate::nakamoto_node::miner::{BlockMinerThread, MinerDirective}; use crate::neon_node::{ fault_injection_skip_mining, open_chainstate_with_faults, LeaderKeyRegistrationState, }; From cd5e7cc7b084151ea796b34e10ba79cb743b72f1 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 15 Nov 2024 09:56:44 -0500 Subject: [PATCH 18/41] refactor: add comments and improve naming --- .../stacks-node/src/nakamoto_node/relayer.rs | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index ef73c252c4..c8228e5375 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -879,11 +879,12 @@ impl RelayerThread { Ok(()) } - fn get_burn_tip_snapshot( + /// Get a snapshot for an existing burn chain block given its consensus hash. + fn get_block_snapshot_consensus( &self, - new_burn_view: &ConsensusHash, + ch: &ConsensusHash, ) -> Result { - SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), new_burn_view) + SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), ch) .map_err(|e| { error!( "Relayer: failed to get block snapshot for new burn view: {:?}", @@ -897,12 +898,14 @@ impl RelayerThread { }) } + /// Get the Stacks block ID for the canonical tip. fn get_canonical_stacks_tip(&self) -> StacksBlockId { let (ch, bh) = SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()).unwrap(); StacksBlockId::new(&ch, &bh) } + /// Get the public key hash for the mining key. fn get_mining_key_pkh(&self) -> Option { let Some(ref mining_key) = self.config.miner.mining_key else { return None; @@ -912,15 +915,22 @@ impl RelayerThread { )) } - fn get_highest_tenure_bhh( + /// Get the tenure-start block header hash of a given consensus hash. + /// For Nakamoto blocks, this is the first block in the tenure identified by the consensus + /// hash. + /// For epoch2 blocks, this is simply the block whose winning sortition happened in the + /// sortition identified by the consensus hash. + /// + /// `tip_block_id` is the chain tip from which to perform the query. + fn get_tenure_bhh( &self, tip_block_id: &StacksBlockId, - tip_ch: &ConsensusHash, + ch: &ConsensusHash, ) -> Result { let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( &mut self.chainstate.index_conn(), tip_block_id, - &tip_ch, + &ch, ) .map_err(|e| { error!( @@ -939,6 +949,8 @@ impl RelayerThread { )) } + /// Determine the type of tenure change to issue based on whether this + /// miner successfully issued a tenure change in the last tenure. fn determine_tenure_type( &self, canonical_snapshot: BlockSnapshot, @@ -965,6 +977,9 @@ impl RelayerThread { } } + /// Get the block snapshot of the most recent sortition that committed to + /// the canonical tip. If the latest sortition did not commit to the + /// canonical tip, then the tip's tenure is the last good sortition. fn get_last_good_block_snapshot( &self, burn_tip: &BlockSnapshot, @@ -1011,6 +1026,13 @@ impl RelayerThread { }) } + /// Attempt to continue a miner's tenure into the next burn block. + /// This is allowed if the miner won the last good sortition and one of the + /// following conditions is met: + /// - There was no sortition in the latest burn block + /// - The winner of the latest sortition did not commit to the canonical tip + /// - The winner of the latest sortition did not mine any blocks within the + /// timeout period (not yet implemented) fn continue_tenure(&mut self, new_burn_view: ConsensusHash) -> Result<(), NakamotoNodeError> { if let Err(e) = self.stop_tenure() { error!("Relayer: Failed to stop tenure: {e:?}"); @@ -1025,7 +1047,7 @@ impl RelayerThread { } // Get the necessary snapshots and state - let burn_tip = self.get_burn_tip_snapshot(&new_burn_view)?; + let burn_tip = self.get_block_snapshot_consensus(&new_burn_view)?; let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()).unwrap(); let canonical_stacks_tip = @@ -1034,7 +1056,7 @@ impl RelayerThread { return Ok(()); }; let highest_tenure_bhh = - self.get_highest_tenure_bhh(&canonical_stacks_tip, &canonical_stacks_tip_ch)?; + self.get_tenure_bhh(&canonical_stacks_tip, &canonical_stacks_tip_ch)?; let last_good_block_election_snapshot = self.get_last_good_block_snapshot( &burn_tip, &highest_tenure_bhh, From f0228c9ff5fce2c00ba490b92c51c8466dc4ee64 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 21 Nov 2024 15:20:55 -0500 Subject: [PATCH 19/41] feat: move sign coordinator logic into its own thread `SignerDBListener` struct is for a new thread that is always processing StackerDB messages from the signers during a mining tenure. `SignerCoordinator` is the interface that the miner uses with the `SignerDBListener`, to propose a block and wait for signatures. --- testnet/stacks-node/src/nakamoto_node.rs | 3 +- .../stacks-node/src/nakamoto_node/miner.rs | 119 ++-- .../src/nakamoto_node/sign_coordinator.rs | 616 ------------------ .../src/nakamoto_node/signer_coordinator.rs | 371 +++++++++++ .../src/nakamoto_node/signerdb_listener.rs | 391 +++++++++++ testnet/stacks-node/src/neon_node.rs | 6 +- testnet/stacks-node/src/tests/signer/v0.rs | 2 +- 7 files changed, 819 insertions(+), 689 deletions(-) delete mode 100644 testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs create mode 100644 testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs create mode 100644 testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs diff --git a/testnet/stacks-node/src/nakamoto_node.rs b/testnet/stacks-node/src/nakamoto_node.rs index edaf12e98b..090170837a 100644 --- a/testnet/stacks-node/src/nakamoto_node.rs +++ b/testnet/stacks-node/src/nakamoto_node.rs @@ -42,7 +42,8 @@ use crate::run_loop::RegisteredKey; pub mod miner; pub mod peer; pub mod relayer; -pub mod sign_coordinator; +pub mod signer_coordinator; +pub mod signerdb_listener; use self::peer::PeerThread; use self::relayer::{RelayerDirective, RelayerThread}; diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 877eab88a1..d51cccb4ba 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -41,14 +41,13 @@ use stacks::net::p2p::NetworkHandle; use stacks::net::stackerdb::StackerDBs; use stacks::net::{NakamotoBlocksData, StacksMessageType}; use stacks::util::get_epoch_time_secs; -use stacks::util::secp256k1::MessageSignature; use stacks_common::types::chainstate::{StacksAddress, StacksBlockId}; use stacks_common::types::{PrivateKey, StacksEpochId}; use stacks_common::util::vrf::VRFProof; use super::relayer::RelayerThread; -use super::sign_coordinator::SignCoordinator; use super::{Config, Error as NakamotoNodeError, EventDispatcher, Keychain}; +use crate::nakamoto_node::signer_coordinator::SignerCoordinator; use crate::nakamoto_node::VRF_MOCK_MINER_KEY; use crate::neon_node; use crate::run_loop::nakamoto::Globals; @@ -291,6 +290,37 @@ impl BlockMinerThread { .map_err(|e| NakamotoNodeError::MiningFailure(ChainstateError::NetError(e)))?; let mut last_block_rejected = false; + let reward_set = self.load_signer_set()?; + let Some(miner_privkey) = self.config.miner.mining_key else { + return Err(NakamotoNodeError::MinerConfigurationFailed( + "No mining key configured, cannot mine", + )); + }; + let sortdb = SortitionDB::open( + &self.config.get_burn_db_file_path(), + true, + self.burnchain.pox_constants.clone(), + ) + .expect("FATAL: could not open sortition DB"); + let burn_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) + .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); + + // Start the signer coordinator + let mut coordinator = SignerCoordinator::new( + self.event_dispatcher.stackerdb_channel.clone(), + self.globals.should_keep_running.clone(), + &reward_set, + &burn_tip, + &self.burnchain, + miner_privkey, + &self.config, + ) + .map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to initialize the signing coordinator. Cannot mine! {e:?}" + )) + })?; + // now, actually run this tenure loop { #[cfg(test)] @@ -371,9 +401,22 @@ impl BlockMinerThread { if let Some(mut new_block) = new_block { Self::fault_injection_block_broadcast_stall(&new_block); - let (reward_set, signer_signature) = match self - .gather_signatures(&mut new_block, &mut stackerdbs) - { + let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) + .map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to open chainstate DB. Cannot mine! {e:?}" + )) + })?; + let signer_signature = match coordinator.propose_block( + &mut new_block, + &self.burn_block, + &self.burnchain, + &sortdb, + &mut chain_state, + &mut stackerdbs, + &self.globals.counters, + &self.burn_election_block.consensus_hash, + ) { Ok(x) => x, Err(e) => match e { NakamotoNodeError::StacksTipChanged => { @@ -413,6 +456,8 @@ impl BlockMinerThread { }; last_block_rejected = false; + let reward_set = self.load_signer_set()?; + new_block.header.signer_signature = signer_signature; if let Err(e) = self.broadcast(new_block.clone(), reward_set, &stackerdbs) { warn!("Error accepting own block: {e:?}. Will try mining again."); @@ -526,68 +571,6 @@ impl BlockMinerThread { Ok(reward_set) } - /// Gather a list of signatures from the signers for the block - fn gather_signatures( - &mut self, - new_block: &mut NakamotoBlock, - stackerdbs: &mut StackerDBs, - ) -> Result<(RewardSet, Vec), NakamotoNodeError> { - let Some(miner_privkey) = self.config.miner.mining_key else { - return Err(NakamotoNodeError::MinerConfigurationFailed( - "No mining key configured, cannot mine", - )); - }; - let sort_db = SortitionDB::open( - &self.config.get_burn_db_file_path(), - true, - self.burnchain.pox_constants.clone(), - ) - .map_err(|e| { - NakamotoNodeError::SigningCoordinatorFailure(format!( - "Failed to open sortition DB. Cannot mine! {e:?}" - )) - })?; - - let reward_set = self.load_signer_set()?; - - if self.config.get_node_config(false).mock_mining { - return Ok((reward_set, Vec::new())); - } - - let mut coordinator = SignCoordinator::new( - &reward_set, - miner_privkey, - &self.config, - self.globals.should_keep_running.clone(), - self.event_dispatcher.stackerdb_channel.clone(), - ) - .map_err(|e| { - NakamotoNodeError::SigningCoordinatorFailure(format!( - "Failed to initialize the signing coordinator. Cannot mine! {e:?}" - )) - })?; - - let mut chain_state = - neon_node::open_chainstate_with_faults(&self.config).map_err(|e| { - NakamotoNodeError::SigningCoordinatorFailure(format!( - "Failed to open chainstate DB. Cannot mine! {e:?}" - )) - })?; - - let signature = coordinator.run_sign_v0( - new_block, - &self.burn_block, - &self.burnchain, - &sort_db, - &mut chain_state, - stackerdbs, - &self.globals.counters, - &self.burn_election_block.consensus_hash, - )?; - - Ok((reward_set, signature)) - } - /// Fault injection -- possibly fail to broadcast /// Return true to drop the block fn fault_injection_broadcast_fail(&self) -> bool { @@ -706,7 +689,7 @@ impl BlockMinerThread { let miners_contract_id = boot_code_id(MINERS_NAME, chain_state.mainnet); let mut miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id); - SignCoordinator::send_miners_message( + SignerCoordinator::send_miners_message( miner_privkey, &sort_db, &self.burn_block, diff --git a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs deleted file mode 100644 index 0d52f9d14a..0000000000 --- a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs +++ /dev/null @@ -1,616 +0,0 @@ -// Copyright (C) 2024 Stacks Open Internet Foundation -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -use std::collections::BTreeMap; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::mpsc::Receiver; -use std::sync::{Arc, Mutex}; -use std::time::Duration; - -use hashbrown::{HashMap, HashSet}; -use libsigner::v0::messages::{ - BlockAccepted, BlockResponse, MinerSlotID, SignerMessage as SignerMessageV0, -}; -use libsigner::{BlockProposal, SignerEntries, SignerEvent, SignerSession, StackerDBSession}; -use stacks::burnchains::Burnchain; -use stacks::chainstate::burn::db::sortdb::SortitionDB; -use stacks::chainstate::burn::{BlockSnapshot, ConsensusHash}; -use stacks::chainstate::nakamoto::{NakamotoBlock, NakamotoBlockHeader, NakamotoChainState}; -use stacks::chainstate::stacks::boot::{NakamotoSignerEntry, RewardSet, MINERS_NAME, SIGNERS_NAME}; -use stacks::chainstate::stacks::db::StacksChainState; -use stacks::chainstate::stacks::events::StackerDBChunksEvent; -use stacks::chainstate::stacks::Error as ChainstateError; -use stacks::libstackerdb::StackerDBChunkData; -use stacks::net::stackerdb::StackerDBs; -use stacks::types::PublicKey; -use stacks::util::hash::MerkleHashFunc; -use stacks::util::secp256k1::MessageSignature; -use stacks::util_lib::boot::boot_code_id; -use stacks_common::bitvec::BitVec; -use stacks_common::codec::StacksMessageCodec; -use stacks_common::types::chainstate::{StacksPrivateKey, StacksPublicKey}; - -use super::Error as NakamotoNodeError; -use crate::event_dispatcher::StackerDBChannel; -use crate::neon::Counters; -use crate::Config; - -/// Fault injection flag to prevent the miner from seeing enough signer signatures. -/// Used to test that the signers will broadcast a block if it gets enough signatures -#[cfg(test)] -pub static TEST_IGNORE_SIGNERS: std::sync::Mutex> = std::sync::Mutex::new(None); - -/// How long should the coordinator poll on the event receiver before -/// waking up to check timeouts? -static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500); - -/// The `SignCoordinator` struct sole function is to serve as the coordinator for Nakamoto block signing. -/// This struct is used by Nakamoto miners to act as the coordinator for the blocks they produce. -pub struct SignCoordinator { - receiver: Option>, - message_key: StacksPrivateKey, - is_mainnet: bool, - miners_session: StackerDBSession, - signer_entries: HashMap, - weight_threshold: u32, - total_weight: u32, - keep_running: Arc, - pub next_signer_bitvec: BitVec<4000>, - stackerdb_channel: Arc>, -} - -impl Drop for SignCoordinator { - fn drop(&mut self) { - let stackerdb_channel = self - .stackerdb_channel - .lock() - .expect("FATAL: failed to lock stackerdb channel"); - stackerdb_channel.replace_receiver(self.receiver.take().expect( - "FATAL: lost possession of the StackerDB channel before dropping SignCoordinator", - )); - } -} - -impl SignCoordinator { - /// * `reward_set` - the active reward set data, used to construct the signer - /// set parameters. - /// * `aggregate_public_key` - the active aggregate key for this cycle - pub fn new( - reward_set: &RewardSet, - message_key: StacksPrivateKey, - config: &Config, - keep_running: Arc, - stackerdb_channel: Arc>, - ) -> Result { - let is_mainnet = config.is_mainnet(); - let Some(ref reward_set_signers) = reward_set.signers else { - error!("Could not initialize signing coordinator for reward set without signer"); - debug!("reward set: {reward_set:?}"); - return Err(ChainstateError::NoRegisteredSigners(0)); - }; - - let signer_entries = SignerEntries::parse(is_mainnet, reward_set_signers).map_err(|e| { - ChainstateError::InvalidStacksBlock(format!( - "Failed to parse NakamotoSignerEntries: {e:?}" - )) - })?; - let rpc_socket = config - .node - .get_rpc_loopback() - .ok_or_else(|| ChainstateError::MinerAborted)?; - let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); - let miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id); - - let next_signer_bitvec: BitVec<4000> = BitVec::zeros( - reward_set_signers - .clone() - .len() - .try_into() - .expect("FATAL: signer set length greater than u16"), - ) - .expect("FATAL: unable to construct initial bitvec for signer set"); - - debug!( - "Initializing miner/coordinator"; - "num_signers" => signer_entries.signer_pks.len(), - "signer_public_keys" => ?signer_entries.signer_pks, - ); - - let total_weight = reward_set.total_signing_weight().map_err(|e| { - warn!("Failed to calculate total weight for the reward set: {e:?}"); - ChainstateError::NoRegisteredSigners(0) - })?; - - let threshold = NakamotoBlockHeader::compute_voting_weight_threshold(total_weight)?; - - let signer_public_keys = reward_set_signers - .iter() - .cloned() - .enumerate() - .map(|(idx, signer)| { - let Ok(slot_id) = u32::try_from(idx) else { - return Err(ChainstateError::InvalidStacksBlock( - "Signer index exceeds u32".into(), - )); - }; - Ok((slot_id, signer)) - }) - .collect::, ChainstateError>>()?; - #[cfg(test)] - { - // In test mode, short-circuit spinning up the SignCoordinator if the TEST_SIGNING - // channel has been created. This allows integration tests for the stacks-node - // independent of the stacks-signer. - use crate::tests::nakamoto_integrations::TEST_SIGNING; - if TEST_SIGNING.lock().unwrap().is_some() { - debug!("Short-circuiting spinning up coordinator from signer commitments. Using test signers channel."); - let (receiver, replaced_other) = stackerdb_channel - .lock() - .expect("FATAL: failed to lock StackerDB channel") - .register_miner_coordinator(); - if replaced_other { - warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); - } - let sign_coordinator = Self { - message_key, - receiver: Some(receiver), - is_mainnet, - miners_session, - next_signer_bitvec, - signer_entries: signer_public_keys, - weight_threshold: threshold, - total_weight, - keep_running, - stackerdb_channel, - }; - return Ok(sign_coordinator); - } - } - - let (receiver, replaced_other) = stackerdb_channel - .lock() - .expect("FATAL: failed to lock StackerDB channel") - .register_miner_coordinator(); - if replaced_other { - warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); - } - - Ok(Self { - receiver: Some(receiver), - message_key, - is_mainnet, - miners_session, - next_signer_bitvec, - signer_entries: signer_public_keys, - weight_threshold: threshold, - total_weight, - keep_running, - stackerdb_channel, - }) - } - - /// Send a message over the miners contract using a `StacksPrivateKey` - #[allow(clippy::too_many_arguments)] - pub fn send_miners_message( - miner_sk: &StacksPrivateKey, - sortdb: &SortitionDB, - tip: &BlockSnapshot, - stackerdbs: &StackerDBs, - message: M, - miner_slot_id: MinerSlotID, - is_mainnet: bool, - miners_session: &mut StackerDBSession, - election_sortition: &ConsensusHash, - ) -> Result<(), String> { - let Some(slot_range) = NakamotoChainState::get_miner_slot(sortdb, tip, election_sortition) - .map_err(|e| format!("Failed to read miner slot information: {e:?}"))? - else { - return Err("No slot for miner".into()); - }; - - let slot_id = slot_range - .start - .saturating_add(miner_slot_id.to_u8().into()); - if !slot_range.contains(&slot_id) { - return Err("Not enough slots for miner messages".into()); - } - // Get the LAST slot version number written to the DB. If not found, use 0. - // Add 1 to get the NEXT version number - // Note: we already check above for the slot's existence - let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); - let slot_version = stackerdbs - .get_slot_version(&miners_contract_id, slot_id) - .map_err(|e| format!("Failed to read slot version: {e:?}"))? - .unwrap_or(0) - .saturating_add(1); - let mut chunk = StackerDBChunkData::new(slot_id, slot_version, message.serialize_to_vec()); - chunk - .sign(miner_sk) - .map_err(|_| "Failed to sign StackerDB chunk")?; - - match miners_session.put_chunk(&chunk) { - Ok(ack) => { - if ack.accepted { - debug!("Wrote message to stackerdb: {ack:?}"); - Ok(()) - } else { - Err(format!("{ack:?}")) - } - } - Err(e) => Err(format!("{e:?}")), - } - } - - /// Do we ignore signer signatures? - #[cfg(test)] - fn fault_injection_ignore_signatures() -> bool { - if *TEST_IGNORE_SIGNERS.lock().unwrap() == Some(true) { - return true; - } - false - } - - #[cfg(not(test))] - fn fault_injection_ignore_signatures() -> bool { - false - } - - /// Check if the tenure needs to change - fn check_burn_tip_changed(sortdb: &SortitionDB, burn_block: &BlockSnapshot) -> bool { - let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) - .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - - if cur_burn_chain_tip.consensus_hash != burn_block.consensus_hash { - info!("SignCoordinator: Cancel signature aggregation; burnchain tip has changed"); - true - } else { - false - } - } - - /// Start gathering signatures for a Nakamoto block. - /// This function begins by sending a `BlockProposal` message - /// to the signers, and then waits for the signers to respond - /// with their signatures. It does so in two ways, concurrently: - /// * It waits for signer StackerDB messages with signatures. If enough signatures can be - /// found, then the block can be broadcast. - /// * It waits for the chainstate to contain the relayed block. If so, then its signatures are - /// loaded and returned. This can happen if the node receives the block via a signer who - /// fetched all signatures and assembled the signature vector, all before we could. - // Mutants skip here: this function is covered via integration tests, - // which the mutation testing does not see. - #[cfg_attr(test, mutants::skip)] - #[allow(clippy::too_many_arguments)] - pub fn run_sign_v0( - &mut self, - block: &NakamotoBlock, - burn_tip: &BlockSnapshot, - burnchain: &Burnchain, - sortdb: &SortitionDB, - chain_state: &mut StacksChainState, - stackerdbs: &StackerDBs, - counters: &Counters, - election_sortition: &ConsensusHash, - ) -> Result, NakamotoNodeError> { - let reward_cycle_id = burnchain - .block_height_to_reward_cycle(burn_tip.block_height) - .expect("FATAL: tried to initialize coordinator before first burn block height"); - - let block_proposal = BlockProposal { - block: block.clone(), - burn_height: burn_tip.block_height, - reward_cycle: reward_cycle_id, - }; - - let block_proposal_message = SignerMessageV0::BlockProposal(block_proposal); - debug!("Sending block proposal message to signers"; - "signer_signature_hash" => %block.header.signer_signature_hash(), - ); - Self::send_miners_message::( - &self.message_key, - sortdb, - burn_tip, - stackerdbs, - block_proposal_message, - MinerSlotID::BlockProposal, - self.is_mainnet, - &mut self.miners_session, - election_sortition, - ) - .map_err(NakamotoNodeError::SigningCoordinatorFailure)?; - counters.bump_naka_proposed_blocks(); - - #[cfg(test)] - { - info!( - "SignCoordinator: sent block proposal to .miners, waiting for test signing channel" - ); - // In test mode, short-circuit waiting for the signers if the TEST_SIGNING - // channel has been created. This allows integration tests for the stacks-node - // independent of the stacks-signer. - if let Some(signatures) = - crate::tests::nakamoto_integrations::TestSigningChannel::get_signature() - { - debug!("Short-circuiting waiting for signers, using test signature"); - return Ok(signatures); - } - } - - let Some(ref mut receiver) = self.receiver else { - return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Failed to obtain the StackerDB event receiver".into(), - )); - }; - - let mut total_weight_signed: u32 = 0; - let mut total_reject_weight: u32 = 0; - let mut responded_signers = HashSet::new(); - let mut gathered_signatures = BTreeMap::new(); - - info!("SignCoordinator: beginning to watch for block signatures OR posted blocks."; - "threshold" => self.weight_threshold, - ); - - loop { - // look in the nakamoto staging db -- a block can only get stored there if it has - // enough signing weight to clear the threshold - if let Ok(Some((stored_block, _sz))) = chain_state - .nakamoto_blocks_db() - .get_nakamoto_block(&block.block_id()) - .map_err(|e| { - warn!( - "Failed to query chainstate for block {}: {e:?}", - &block.block_id() - ); - e - }) - { - debug!("SignCoordinator: Found signatures in relayed block"); - counters.bump_naka_signer_pushed_blocks(); - return Ok(stored_block.header.signer_signature); - } - - if Self::check_burn_tip_changed(sortdb, burn_tip) { - debug!("SignCoordinator: Exiting due to new burnchain tip"); - return Err(NakamotoNodeError::BurnchainTipChanged); - } - - // one of two things can happen: - // * we get enough signatures from stackerdb from the signers, OR - // * we see our block get processed in our chainstate (meaning, the signers broadcasted - // the block and our node got it and processed it) - let event = match receiver.recv_timeout(EVENT_RECEIVER_POLL) { - Ok(event) => event, - Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { - continue; - } - Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => { - return Err(NakamotoNodeError::SigningCoordinatorFailure( - "StackerDB event receiver disconnected".into(), - )) - } - }; - - // was the node asked to stop? - if !self.keep_running.load(Ordering::SeqCst) { - info!("SignerCoordinator: received node exit request. Aborting"); - return Err(NakamotoNodeError::ChannelClosed); - } - - // check to see if this event we got is a signer event - let is_signer_event = - event.contract_id.name.starts_with(SIGNERS_NAME) && event.contract_id.is_boot(); - - if !is_signer_event { - debug!("Ignoring StackerDB event for non-signer contract"; "contract" => %event.contract_id); - continue; - } - - let modified_slots = &event.modified_slots.clone(); - - let Ok(signer_event) = SignerEvent::::try_from(event).map_err(|e| { - warn!("Failure parsing StackerDB event into signer event. Ignoring message."; "err" => ?e); - }) else { - continue; - }; - let SignerEvent::SignerMessages(signer_set, messages) = signer_event else { - debug!("Received signer event other than a signer message. Ignoring."); - continue; - }; - if signer_set != u32::try_from(reward_cycle_id % 2).unwrap() { - debug!("Received signer event for other reward cycle. Ignoring."); - continue; - }; - let slot_ids = modified_slots - .iter() - .map(|chunk| chunk.slot_id) - .collect::>(); - - debug!("SignCoordinator: Received messages from signers"; - "count" => messages.len(), - "slot_ids" => ?slot_ids, - "threshold" => self.weight_threshold - ); - - for (message, slot_id) in messages.into_iter().zip(slot_ids) { - let Some(signer_entry) = &self.signer_entries.get(&slot_id) else { - return Err(NakamotoNodeError::SignerSignatureError( - "Signer entry not found".into(), - )); - }; - let Ok(signer_pubkey) = StacksPublicKey::from_slice(&signer_entry.signing_key) - else { - return Err(NakamotoNodeError::SignerSignatureError( - "Failed to parse signer public key".into(), - )); - }; - - if responded_signers.contains(&signer_pubkey) { - debug!( - "Signer {slot_id} already responded for block {}. Ignoring {message:?}.", block.header.signer_signature_hash(); - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id() - ); - continue; - } - - match message { - SignerMessageV0::BlockResponse(BlockResponse::Accepted(accepted)) => { - let BlockAccepted { - signer_signature_hash: response_hash, - signature, - metadata, - tenure_extend_timestamp: _, // TOOD: utilize this info - } = accepted; - let block_sighash = block.header.signer_signature_hash(); - if block_sighash != response_hash { - warn!( - "Processed signature for a different block. Will try to continue."; - "signature" => %signature, - "block_signer_signature_hash" => %block_sighash, - "response_hash" => %response_hash, - "slot_id" => slot_id, - "reward_cycle_id" => reward_cycle_id, - "response_hash" => %response_hash, - "server_version" => %metadata.server_version - ); - continue; - } - debug!("SignCoordinator: Received valid signature from signer"; "slot_id" => slot_id, "signature" => %signature); - let Ok(valid_sig) = signer_pubkey.verify(block_sighash.bits(), &signature) - else { - warn!("Got invalid signature from a signer. Ignoring."); - continue; - }; - if !valid_sig { - warn!( - "Processed signature but didn't validate over the expected block. Ignoring"; - "signature" => %signature, - "block_signer_signature_hash" => %block_sighash, - "slot_id" => slot_id, - ); - continue; - } - - if Self::fault_injection_ignore_signatures() { - warn!("SignCoordinator: fault injection: ignoring well-formed signature for block"; - "block_signer_sighash" => %block_sighash, - "signer_pubkey" => signer_pubkey.to_hex(), - "signer_slot_id" => slot_id, - "signature" => %signature, - "signer_weight" => signer_entry.weight, - "total_weight_signed" => total_weight_signed, - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id() - ); - continue; - } - - if !gathered_signatures.contains_key(&slot_id) { - total_weight_signed = total_weight_signed - .checked_add(signer_entry.weight) - .expect("FATAL: total weight signed exceeds u32::MAX"); - } - - info!("SignCoordinator: Signature Added to block"; - "block_signer_sighash" => %block_sighash, - "signer_pubkey" => signer_pubkey.to_hex(), - "signer_slot_id" => slot_id, - "signature" => %signature, - "signer_weight" => signer_entry.weight, - "total_weight_signed" => total_weight_signed, - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id(), - "server_version" => metadata.server_version, - ); - gathered_signatures.insert(slot_id, signature); - responded_signers.insert(signer_pubkey); - } - SignerMessageV0::BlockResponse(BlockResponse::Rejected(rejected_data)) => { - let block_sighash = block.header.signer_signature_hash(); - if block_sighash != rejected_data.signer_signature_hash { - warn!( - "Processed rejection for a different block. Will try to continue."; - "block_signer_signature_hash" => %block_sighash, - "rejected_data.signer_signature_hash" => %rejected_data.signer_signature_hash, - "slot_id" => slot_id, - "reward_cycle_id" => reward_cycle_id, - ); - continue; - } - let rejected_pubkey = match rejected_data.recover_public_key() { - Ok(rejected_pubkey) => { - if rejected_pubkey != signer_pubkey { - warn!("Recovered public key from rejected data does not match signer's public key. Ignoring."); - continue; - } - rejected_pubkey - } - Err(e) => { - warn!("Failed to recover public key from rejected data: {e:?}. Ignoring."); - continue; - } - }; - responded_signers.insert(rejected_pubkey); - debug!( - "Signer {slot_id} rejected our block {}/{}", - &block.header.consensus_hash, - &block.header.block_hash() - ); - total_reject_weight = total_reject_weight - .checked_add(signer_entry.weight) - .expect("FATAL: total weight rejected exceeds u32::MAX"); - - if total_reject_weight.saturating_add(self.weight_threshold) - > self.total_weight - { - debug!( - "{total_reject_weight}/{} signers vote to reject our block {}/{}", - self.total_weight, - &block.header.consensus_hash, - &block.header.block_hash() - ); - counters.bump_naka_rejected_blocks(); - return Err(NakamotoNodeError::SignersRejected); - } - continue; - } - SignerMessageV0::BlockProposal(_) => { - debug!("Received block proposal message. Ignoring."); - continue; - } - SignerMessageV0::BlockPushed(_) => { - debug!("Received block pushed message. Ignoring."); - continue; - } - SignerMessageV0::MockSignature(_) - | SignerMessageV0::MockProposal(_) - | SignerMessageV0::MockBlock(_) => { - debug!("Received mock message. Ignoring."); - continue; - } - }; - } - // After gathering all signatures, return them if we've hit the threshold - if total_weight_signed >= self.weight_threshold { - info!("SignCoordinator: Received enough signatures. Continuing."; - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id() - ); - return Ok(gathered_signatures.values().cloned().collect()); - } - } - } -} diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs new file mode 100644 index 0000000000..f461ce93ec --- /dev/null +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -0,0 +1,371 @@ +// Copyright (C) 2024 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::collections::BTreeMap; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Condvar, Mutex}; +use std::thread::JoinHandle; + +use hashbrown::{HashMap, HashSet}; +use libsigner::v0::messages::{MinerSlotID, SignerMessage as SignerMessageV0}; +use libsigner::{BlockProposal, SignerSession, StackerDBSession}; +use stacks::burnchains::Burnchain; +use stacks::chainstate::burn::db::sortdb::SortitionDB; +use stacks::chainstate::burn::{BlockSnapshot, ConsensusHash}; +use stacks::chainstate::nakamoto::{NakamotoBlock, NakamotoChainState}; +use stacks::chainstate::stacks::boot::{RewardSet, MINERS_NAME}; +use stacks::chainstate::stacks::db::StacksChainState; +use stacks::chainstate::stacks::Error as ChainstateError; +use stacks::codec::StacksMessageCodec; +use stacks::libstackerdb::StackerDBChunkData; +use stacks::net::stackerdb::StackerDBs; +use stacks::types::chainstate::{StacksPrivateKey, StacksPublicKey}; +use stacks::util::hash::Sha512Trunc256Sum; +use stacks::util::secp256k1::MessageSignature; +use stacks::util_lib::boot::boot_code_id; + +use super::signerdb_listener::{SignerDBListener, TimestampInfo}; +use super::Error as NakamotoNodeError; +use crate::event_dispatcher::StackerDBChannel; +use crate::nakamoto_node::signerdb_listener::BlockStatus; +use crate::neon::Counters; +use crate::Config; + +/// Helper function to determine if we should wait for more signatures +fn should_wait(status: Option<&BlockStatus>, weight_threshold: u32, total_weight: u32) -> bool { + match status { + Some(status) => { + status.total_weight_signed < weight_threshold + && status.total_reject_weight.saturating_add(weight_threshold) <= total_weight + } + None => true, + } +} + +/// The state of the signer database listener, used by the miner thread to +/// interact with the signer listener. +pub struct SignerCoordinator { + /// The private key used to sign messages from the miner + message_key: StacksPrivateKey, + /// Is this mainnet? + is_mainnet: bool, + /// The session for writing to the miners contract in the stackerdb + miners_session: StackerDBSession, + /// The total weight of all signers + total_weight: u32, + /// The weight threshold for block approval + weight_threshold: u32, + /// Tracks signatures for blocks + /// - key: Sha512Trunc256Sum (signer signature hash) + /// - value: BlockStatus + blocks: Arc<(Mutex>, Condvar)>, + /// Tracks the timestamps from signers to decide when they should be + /// willing to accept time-based tenure extensions + /// - key: StacksPublicKey + /// - value: TimestampInfo + signer_idle_timestamps: Arc>>, + /// Handle for the signer DB listener thread + listener_thread: Option>, +} + +impl SignerCoordinator { + /// Create a new `SignerCoordinator` instance. + /// This will spawn a new thread to listen for messages from the signer DB. + pub fn new( + stackerdb_channel: Arc>, + keep_running: Arc, + reward_set: &RewardSet, + burn_tip: &BlockSnapshot, + burnchain: &Burnchain, + message_key: StacksPrivateKey, + config: &Config, + ) -> Result { + // Create the signer DB listener + let mut listener = SignerDBListener::new( + stackerdb_channel, + keep_running.clone(), + reward_set, + burn_tip, + burnchain, + )?; + let is_mainnet = config.is_mainnet(); + let rpc_socket = config + .node + .get_rpc_loopback() + .ok_or_else(|| ChainstateError::MinerAborted)?; + let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); + let miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id); + + let mut sc = Self { + message_key, + is_mainnet, + miners_session, + total_weight: listener.total_weight, + weight_threshold: listener.weight_threshold, + blocks: listener.blocks.clone(), + signer_idle_timestamps: listener.signer_idle_timestamps.clone(), + listener_thread: None, + }; + + // Spawn the signer DB listener thread + let listener_thread = std::thread::Builder::new() + .name("signerdb_listener".to_string()) + .spawn(move || { + if let Err(e) = listener.run() { + error!("SignerDBListener: failed to run: {e:?}"); + } + }) + .map_err(|e| { + error!("Failed to spawn signerdb_listener thread: {e:?}"); + ChainstateError::MinerAborted + })?; + + sc.listener_thread = Some(listener_thread); + + Ok(sc) + } + + /// Send a message over the miners contract using a `StacksPrivateKey` + #[allow(clippy::too_many_arguments)] + pub fn send_miners_message( + miner_sk: &StacksPrivateKey, + sortdb: &SortitionDB, + tip: &BlockSnapshot, + stackerdbs: &StackerDBs, + message: M, + miner_slot_id: MinerSlotID, + is_mainnet: bool, + miners_session: &mut StackerDBSession, + election_sortition: &ConsensusHash, + ) -> Result<(), String> { + let Some(slot_range) = NakamotoChainState::get_miner_slot(sortdb, tip, election_sortition) + .map_err(|e| format!("Failed to read miner slot information: {e:?}"))? + else { + return Err("No slot for miner".into()); + }; + + let slot_id = slot_range + .start + .saturating_add(miner_slot_id.to_u8().into()); + if !slot_range.contains(&slot_id) { + return Err("Not enough slots for miner messages".into()); + } + // Get the LAST slot version number written to the DB. If not found, use 0. + // Add 1 to get the NEXT version number + // Note: we already check above for the slot's existence + let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); + let slot_version = stackerdbs + .get_slot_version(&miners_contract_id, slot_id) + .map_err(|e| format!("Failed to read slot version: {e:?}"))? + .unwrap_or(0) + .saturating_add(1); + let mut chunk = StackerDBChunkData::new(slot_id, slot_version, message.serialize_to_vec()); + chunk + .sign(miner_sk) + .map_err(|_| "Failed to sign StackerDB chunk")?; + + match miners_session.put_chunk(&chunk) { + Ok(ack) => { + if ack.accepted { + debug!("Wrote message to stackerdb: {ack:?}"); + Ok(()) + } else { + Err(format!("{ack:?}")) + } + } + Err(e) => Err(format!("{e:?}")), + } + } + + /// Propose a Nakamoto block and gather signatures for it. + /// This function begins by sending a `BlockProposal` message to the + /// signers, and then it waits for the signers to respond with their + /// signatures. It does so in two ways, concurrently: + /// * It waits for the signer DB listener to collect enough signatures to + /// accept or reject the block + /// * It waits for the chainstate to contain the relayed block. If so, then its signatures are + /// loaded and returned. This can happen if the node receives the block via a signer who + /// fetched all signatures and assembled the signature vector, all before we could. + // Mutants skip here: this function is covered via integration tests, + // which the mutation testing does not see. + #[cfg_attr(test, mutants::skip)] + #[allow(clippy::too_many_arguments)] + pub fn propose_block( + &mut self, + block: &NakamotoBlock, + burn_tip: &BlockSnapshot, + burnchain: &Burnchain, + sortdb: &SortitionDB, + chain_state: &mut StacksChainState, + stackerdbs: &StackerDBs, + counters: &Counters, + election_sortition: &ConsensusHash, + ) -> Result, NakamotoNodeError> { + // Add this block to the block status map + let (lock, _cvar) = &*self.blocks; + let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); + let block_status = BlockStatus { + responded_signers: HashSet::new(), + gathered_signatures: BTreeMap::new(), + total_weight_signed: 0, + total_reject_weight: 0, + }; + blocks.insert(block.header.signer_signature_hash(), block_status); + + let reward_cycle_id = burnchain + .block_height_to_reward_cycle(burn_tip.block_height) + .expect("FATAL: tried to initialize coordinator before first burn block height"); + + let block_proposal = BlockProposal { + block: block.clone(), + burn_height: burn_tip.block_height, + reward_cycle: reward_cycle_id, + }; + + let block_proposal_message = SignerMessageV0::BlockProposal(block_proposal); + debug!("Sending block proposal message to signers"; + "signer_signature_hash" => %block.header.signer_signature_hash(), + ); + Self::send_miners_message::( + &self.message_key, + sortdb, + burn_tip, + stackerdbs, + block_proposal_message, + MinerSlotID::BlockProposal, + self.is_mainnet, + &mut self.miners_session, + election_sortition, + ) + .map_err(NakamotoNodeError::SigningCoordinatorFailure)?; + counters.bump_naka_proposed_blocks(); + + #[cfg(test)] + { + info!( + "SignerCoordinator: sent block proposal to .miners, waiting for test signing channel" + ); + // In test mode, short-circuit waiting for the signers if the TEST_SIGNING + // channel has been created. This allows integration tests for the stacks-node + // independent of the stacks-signer. + if let Some(signatures) = + crate::tests::nakamoto_integrations::TestSigningChannel::get_signature() + { + debug!("Short-circuiting waiting for signers, using test signature"); + return Ok(signatures); + } + } + + self.get_block_status(&block.header.signer_signature_hash(), chain_state, counters) + } + + /// Get the block status for a given block hash. + /// If we have not yet received enough signatures for this block, this + /// method will block until we do. + fn get_block_status( + &self, + block_hash: &Sha512Trunc256Sum, + chain_state: &mut StacksChainState, + counters: &Counters, + ) -> Result, NakamotoNodeError> { + let (lock, cvar) = &*self.blocks; + let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); + + // TODO: integrate this check into the waiting for the condvar + // Look in the nakamoto staging db -- a block can only get stored there + // if it has enough signing weight to clear the threshold. + // if let Ok(Some((stored_block, _sz))) = chain_state + // .nakamoto_blocks_db() + // .get_nakamoto_block(&block.block_id()) + // .map_err(|e| { + // warn!( + // "Failed to query chainstate for block {}: {e:?}", + // &block.block_id() + // ); + // e + // }) + // { + // debug!("SignCoordinator: Found signatures in relayed block"); + // counters.bump_naka_signer_pushed_blocks(); + // return Ok(stored_block.header.signer_signature); + // } + + // if Self::check_burn_tip_changed(sortdb, burn_tip) { + // debug!("SignCoordinator: Exiting due to new burnchain tip"); + // return Err(NakamotoNodeError::BurnchainTipChanged); + // } + + blocks = cvar + .wait_while(blocks, |map| { + should_wait( + map.get(block_hash), + self.weight_threshold, + self.total_weight, + ) + }) + .expect("FATAL: failed to wait on block status"); + let block_status = blocks.get(block_hash).cloned().ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Block unexpectedly missing from map".into(), + ) + })?; + if block_status + .total_reject_weight + .saturating_add(self.weight_threshold) + > self.total_weight + { + info!( + "{}/{} signers vote to reject block", + block_status.total_reject_weight, self.total_weight; + "stacks_block_hash" => %block_hash, + ); + counters.bump_naka_rejected_blocks(); + Err(NakamotoNodeError::SignersRejected) + } else if block_status.total_weight_signed >= self.weight_threshold { + info!("Received enough signatures, block accepted"; + "stacks_block_hash" => %block_hash, + ); + Ok(block_status.gathered_signatures.values().cloned().collect()) + } else { + info!("Unblocked without reaching the threshold, likely due to an interruption"; + "stacks_block_hash" => %block_hash, + ); + Err(NakamotoNodeError::ChannelClosed) + } + } + + /// Get the timestamp at which at least 70% of the signing power should be + /// willing to accept a time-based tenure extension. + pub fn get_tenure_extend_timestamp(&self) -> u64 { + let signer_idle_timestamps = self + .signer_idle_timestamps + .lock() + .expect("FATAL: failed to lock signer idle timestamps"); + let mut idle_timestamps = signer_idle_timestamps.values().collect::>(); + idle_timestamps.sort_by_key(|info| info.timestamp); + let mut weight_sum = 0; + for info in idle_timestamps { + weight_sum += info.weight; + if weight_sum >= self.weight_threshold { + return info.timestamp; + } + } + + // We don't have enough information to reach a 70% threshold at any + // time, so return u64::MAX to indicate that we should not extend the + // tenure. + u64::MAX + } +} diff --git a/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs new file mode 100644 index 0000000000..89769b3e3e --- /dev/null +++ b/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs @@ -0,0 +1,391 @@ +// Copyright (C) 2024 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::collections::BTreeMap; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::Receiver; +use std::sync::{Arc, Condvar, Mutex}; +use std::time::Duration; + +use hashbrown::{HashMap, HashSet}; +use libsigner::v0::messages::{BlockAccepted, BlockResponse, SignerMessage as SignerMessageV0}; +use libsigner::SignerEvent; +use stacks::burnchains::Burnchain; +use stacks::chainstate::burn::BlockSnapshot; +use stacks::chainstate::nakamoto::NakamotoBlockHeader; +use stacks::chainstate::stacks::boot::{NakamotoSignerEntry, RewardSet, SIGNERS_NAME}; +use stacks::chainstate::stacks::events::StackerDBChunksEvent; +use stacks::chainstate::stacks::Error as ChainstateError; +use stacks::types::chainstate::StacksPublicKey; +use stacks::types::PublicKey; +use stacks::util::hash::{MerkleHashFunc, Sha512Trunc256Sum}; +use stacks::util::secp256k1::MessageSignature; + +use super::Error as NakamotoNodeError; +use crate::event_dispatcher::StackerDBChannel; + +/// Fault injection flag to prevent the miner from seeing enough signer signatures. +/// Used to test that the signers will broadcast a block if it gets enough signatures +#[cfg(test)] +pub static TEST_IGNORE_SIGNERS: std::sync::Mutex> = std::sync::Mutex::new(None); + +/// How long should the coordinator poll on the event receiver before +/// waking up to check timeouts? +pub static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500); + +#[derive(Debug, Clone)] +pub(crate) struct BlockStatus { + pub responded_signers: HashSet, + pub gathered_signatures: BTreeMap, + pub total_weight_signed: u32, + pub total_reject_weight: u32, +} + +#[derive(Debug, Clone)] +pub(crate) struct TimestampInfo { + pub timestamp: u64, + pub weight: u32, +} + +/// The listener for the signer database, which listens for messages from the +/// signers and tracks the state of block signatures and idle timestamps. +#[derive(Debug)] +pub struct SignerDBListener { + /// Channel to receive StackerDB events + receiver: Receiver, + /// Flag to shut the listener down + keep_running: Arc, + /// The signer set for this tenure (0 or 1) + signer_set: u32, + /// The total weight of all signers + pub(crate) total_weight: u32, + /// The weight threshold for block approval + pub(crate) weight_threshold: u32, + /// The signer entries for this tenure (keyed by slot_id) + signer_entries: HashMap, + /// Tracks signatures for blocks + /// - key: Sha512Trunc256Sum (signer signature hash) + /// - value: BlockStatus + pub(crate) blocks: Arc<(Mutex>, Condvar)>, + /// Tracks the timestamps from signers to decide when they should be + /// willing to accept time-based tenure extensions + /// - key: StacksPublicKey + /// - value: TimestampInfo + pub(crate) signer_idle_timestamps: Arc>>, +} + +impl SignerDBListener { + pub fn new( + stackerdb_channel: Arc>, + keep_running: Arc, + reward_set: &RewardSet, + burn_tip: &BlockSnapshot, + burnchain: &Burnchain, + ) -> Result { + let (receiver, replaced_other) = stackerdb_channel + .lock() + .expect("FATAL: failed to lock StackerDB channel") + .register_miner_coordinator(); + if replaced_other { + warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); + } + + let total_weight = reward_set.total_signing_weight().map_err(|e| { + warn!("Failed to calculate total weight for the reward set: {e:?}"); + ChainstateError::NoRegisteredSigners(0) + })?; + + let weight_threshold = NakamotoBlockHeader::compute_voting_weight_threshold(total_weight)?; + + let reward_cycle_id = burnchain + .block_height_to_reward_cycle(burn_tip.block_height) + .expect("FATAL: tried to initialize coordinator before first burn block height"); + let signer_set = + u32::try_from(reward_cycle_id % 2).expect("FATAL: reward cycle id % 2 exceeds u32"); + + let Some(ref reward_set_signers) = reward_set.signers else { + error!("Could not initialize signing coordinator for reward set without signer"); + debug!("reward set: {reward_set:?}"); + return Err(ChainstateError::NoRegisteredSigners(0)); + }; + + let signer_entries = reward_set_signers + .iter() + .cloned() + .enumerate() + .map(|(idx, signer)| { + let Ok(slot_id) = u32::try_from(idx) else { + return Err(ChainstateError::InvalidStacksBlock( + "Signer index exceeds u32".into(), + )); + }; + Ok((slot_id, signer)) + }) + .collect::, ChainstateError>>()?; + + Ok(Self { + receiver, + keep_running, + signer_set, + total_weight, + weight_threshold, + signer_entries, + blocks: Arc::new((Mutex::new(HashMap::new()), Condvar::new())), + signer_idle_timestamps: Arc::new(Mutex::new(HashMap::new())), + }) + } + + /// Run the signer database listener. + pub fn run(&mut self) -> Result<(), NakamotoNodeError> { + info!("SignerDBListener: Starting up"); + loop { + let event = match self.receiver.recv_timeout(EVENT_RECEIVER_POLL) { + Ok(event) => event, + Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { + continue; + } + Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => { + warn!("SignerDBListener: StackerDB event receiver disconnected"); + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "StackerDB event receiver disconnected".into(), + )); + } + }; + + // was the miner asked to stop? + if !self.keep_running.load(Ordering::SeqCst) { + info!("SignerDBListener: received miner exit request. Aborting"); + return Err(NakamotoNodeError::ChannelClosed); + } + + // check to see if this event we got is a signer event + let is_signer_event = + event.contract_id.name.starts_with(SIGNERS_NAME) && event.contract_id.is_boot(); + + if !is_signer_event { + debug!("SignerDBListener: Ignoring StackerDB event for non-signer contract"; "contract" => %event.contract_id); + continue; + } + + let modified_slots = &event.modified_slots.clone(); + + let Ok(signer_event) = SignerEvent::::try_from(event).map_err(|e| { + warn!("SignerDBListener: Failure parsing StackerDB event into signer event. Ignoring message."; "err" => ?e); + }) else { + continue; + }; + let SignerEvent::SignerMessages(signer_set, messages) = signer_event else { + debug!("SignerDBListener: Received signer event other than a signer message. Ignoring."); + continue; + }; + if signer_set != self.signer_set { + debug!("SignerDBListener: Received signer event for other reward cycle. Ignoring."); + continue; + }; + let slot_ids = modified_slots + .iter() + .map(|chunk| chunk.slot_id) + .collect::>(); + + debug!("SignerDBListener: Received messages from signers"; + "count" => messages.len(), + "slot_ids" => ?slot_ids, + ); + + for (message, slot_id) in messages.into_iter().zip(slot_ids) { + let Some(signer_entry) = &self.signer_entries.get(&slot_id) else { + return Err(NakamotoNodeError::SignerSignatureError( + "Signer entry not found".into(), + )); + }; + let Ok(signer_pubkey) = StacksPublicKey::from_slice(&signer_entry.signing_key) + else { + return Err(NakamotoNodeError::SignerSignatureError( + "Failed to parse signer public key".into(), + )); + }; + + match message { + SignerMessageV0::BlockResponse(BlockResponse::Accepted(accepted)) => { + let BlockAccepted { + signer_signature_hash: block_sighash, + signature, + metadata, + tenure_extend_timestamp, // TOOD: utilize this info + } = accepted; + let (lock, cvar) = &*self.blocks; + let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); + + let block = match blocks.get_mut(&block_sighash) { + Some(block) => block, + None => { + info!( + "SignerDBListener: Received signature for block that we did not request. Ignoring."; + "signature" => %signature, + "block_signer_sighash" => %block_sighash, + "slot_id" => slot_id, + "signer_set" => self.signer_set, + ); + continue; + } + }; + + let Ok(valid_sig) = signer_pubkey.verify(block_sighash.bits(), &signature) + else { + warn!( + "SignerDBListener: Got invalid signature from a signer. Ignoring." + ); + continue; + }; + if !valid_sig { + warn!( + "SignerDBListener: Processed signature but didn't validate over the expected block. Ignoring"; + "signature" => %signature, + "block_signer_signature_hash" => %block_sighash, + "slot_id" => slot_id, + ); + continue; + } + + if Self::fault_injection_ignore_signatures() { + warn!("SignerDBListener: fault injection: ignoring well-formed signature for block"; + "block_signer_sighash" => %block_sighash, + "signer_pubkey" => signer_pubkey.to_hex(), + "signer_slot_id" => slot_id, + "signature" => %signature, + "signer_weight" => signer_entry.weight, + "total_weight_signed" => block.total_weight_signed, + ); + continue; + } + + if !block.gathered_signatures.contains_key(&slot_id) { + block.total_weight_signed = block + .total_weight_signed + .checked_add(signer_entry.weight) + .expect("FATAL: total weight signed exceeds u32::MAX"); + } + + info!("SignerDBListener: Signature Added to block"; + "block_signer_sighash" => %block_sighash, + "signer_pubkey" => signer_pubkey.to_hex(), + "signer_slot_id" => slot_id, + "signature" => %signature, + "signer_weight" => signer_entry.weight, + "total_weight_signed" => block.total_weight_signed, + "tenure_extend_timestamp" => tenure_extend_timestamp, + "server_version" => metadata.server_version, + ); + block.gathered_signatures.insert(slot_id, signature); + block.responded_signers.insert(signer_pubkey); + + if block.total_weight_signed >= self.weight_threshold { + // Signal to anyone waiting on this block that we have enough signatures + cvar.notify_all(); + } + } + SignerMessageV0::BlockResponse(BlockResponse::Rejected(rejected_data)) => { + let (lock, cvar) = &*self.blocks; + let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); + + let block = match blocks.get_mut(&rejected_data.signer_signature_hash) { + Some(block) => block, + None => { + info!( + "SignerDBListener: Received rejection for block that we did not request. Ignoring."; + "block_signer_sighash" => %rejected_data.signer_signature_hash, + "slot_id" => slot_id, + "signer_set" => self.signer_set, + ); + continue; + } + }; + + let rejected_pubkey = match rejected_data.recover_public_key() { + Ok(rejected_pubkey) => { + if rejected_pubkey != signer_pubkey { + warn!("SignerDBListener: Recovered public key from rejected data does not match signer's public key. Ignoring."); + continue; + } + rejected_pubkey + } + Err(e) => { + warn!("SignerDBListener: Failed to recover public key from rejected data: {e:?}. Ignoring."); + continue; + } + }; + block.responded_signers.insert(rejected_pubkey); + block.total_reject_weight = block + .total_reject_weight + .checked_add(signer_entry.weight) + .expect("FATAL: total weight rejected exceeds u32::MAX"); + + info!("SignerDBListener: Signer rejected block"; + "block_signer_sighash" => %rejected_data.signer_signature_hash, + "signer_pubkey" => rejected_pubkey.to_hex(), + "signer_slot_id" => slot_id, + "signature" => %rejected_data.signature, + "signer_weight" => signer_entry.weight, + "total_weight_signed" => block.total_weight_signed, + "reason" => rejected_data.reason, + "reason_code" => %rejected_data.reason_code, + "tenure_extend_timestamp" => rejected_data.tenure_extend_timestamp, + "server_version" => rejected_data.metadata.server_version, + ); + + if block + .total_reject_weight + .saturating_add(self.weight_threshold) + > self.total_weight + { + // Signal to anyone waiting on this block that we have enough rejections + cvar.notify_all(); + } + + continue; + } + SignerMessageV0::BlockProposal(_) => { + debug!("Received block proposal message. Ignoring."); + continue; + } + SignerMessageV0::BlockPushed(_) => { + debug!("Received block pushed message. Ignoring."); + continue; + } + SignerMessageV0::MockSignature(_) + | SignerMessageV0::MockProposal(_) + | SignerMessageV0::MockBlock(_) => { + debug!("Received mock message. Ignoring."); + continue; + } + }; + } + } + } + + /// Do we ignore signer signatures? + #[cfg(test)] + fn fault_injection_ignore_signatures() -> bool { + if *TEST_IGNORE_SIGNERS.lock().unwrap() == Some(true) { + return true; + } + false + } + + #[cfg(not(test))] + fn fault_injection_ignore_signatures() -> bool { + false + } +} diff --git a/testnet/stacks-node/src/neon_node.rs b/testnet/stacks-node/src/neon_node.rs index b688db100d..1639f93c43 100644 --- a/testnet/stacks-node/src/neon_node.rs +++ b/testnet/stacks-node/src/neon_node.rs @@ -223,7 +223,7 @@ use crate::burnchains::{make_bitcoin_indexer, Error as BurnchainControllerError} use crate::chain_data::MinerStats; use crate::config::NodeConfig; use crate::globals::{NeonGlobals as Globals, RelayerDirective}; -use crate::nakamoto_node::sign_coordinator::SignCoordinator; +use crate::nakamoto_node::signer_coordinator::SignerCoordinator; use crate::run_loop::neon::RunLoop; use crate::run_loop::RegisteredKey; use crate::ChainTip; @@ -2364,7 +2364,7 @@ impl BlockMinerThread { let mut miners_stackerdb = StackerDBSession::new(&self.config.node.rpc_bind, miner_contract_id); - SignCoordinator::send_miners_message( + SignerCoordinator::send_miners_message( &mining_key, &burn_db, &self.burn_block, @@ -2392,7 +2392,7 @@ impl BlockMinerThread { }; info!("Sending mock block to stackerdb: {mock_block:?}"); - SignCoordinator::send_miners_message( + SignerCoordinator::send_miners_message( &mining_key, &burn_db, &self.burn_block, diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index fd8d6c1096..fc7e31bea6 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -68,7 +68,7 @@ use crate::event_dispatcher::MinedNakamotoBlockEvent; use crate::nakamoto_node::miner::{ TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_NO_TENURE_EXTEND, }; -use crate::nakamoto_node::sign_coordinator::TEST_IGNORE_SIGNERS; +use crate::nakamoto_node::signerdb_listener::TEST_IGNORE_SIGNERS; use crate::neon::Counters; use crate::run_loop::boot_nakamoto; use crate::tests::nakamoto_integrations::{ From 04270d7522a86977148e9edb522952cf5dbe57cf Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 22 Nov 2024 15:43:28 -0500 Subject: [PATCH 20/41] feat: add timeout and additional checks in `get_block_status` --- .../src/nakamoto_node/signer_coordinator.rs | 174 +++++++++++------- .../src/nakamoto_node/signerdb_listener.rs | 26 ++- 2 files changed, 129 insertions(+), 71 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index f461ce93ec..5588bfaf3e 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -31,24 +31,28 @@ use stacks::chainstate::stacks::Error as ChainstateError; use stacks::codec::StacksMessageCodec; use stacks::libstackerdb::StackerDBChunkData; use stacks::net::stackerdb::StackerDBs; -use stacks::types::chainstate::{StacksPrivateKey, StacksPublicKey}; +use stacks::types::chainstate::{StacksBlockId, StacksPrivateKey, StacksPublicKey}; use stacks::util::hash::Sha512Trunc256Sum; use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; -use super::signerdb_listener::{SignerDBListener, TimestampInfo}; +use super::signerdb_listener::{SignerDBListener, TimestampInfo, EVENT_RECEIVER_POLL}; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; use crate::nakamoto_node::signerdb_listener::BlockStatus; use crate::neon::Counters; use crate::Config; -/// Helper function to determine if we should wait for more signatures -fn should_wait(status: Option<&BlockStatus>, weight_threshold: u32, total_weight: u32) -> bool { +/// Helper function to determine if signer threshold has been reached for a block +fn is_threshold_reached( + status: Option<&BlockStatus>, + weight_threshold: u32, + total_weight: u32, +) -> bool { match status { Some(status) => { - status.total_weight_signed < weight_threshold - && status.total_reject_weight.saturating_add(weight_threshold) <= total_weight + status.total_weight_signed >= weight_threshold + || status.total_reject_weight.saturating_add(weight_threshold) > total_weight } None => true, } @@ -268,81 +272,104 @@ impl SignerCoordinator { } } - self.get_block_status(&block.header.signer_signature_hash(), chain_state, counters) + self.get_block_status( + &block.header.signer_signature_hash(), + &block.block_id(), + chain_state, + sortdb, + burn_tip, + counters, + ) } /// Get the block status for a given block hash. /// If we have not yet received enough signatures for this block, this - /// method will block until we do. + /// method will block until we do. If this block shows up in the staging DB + /// before we have enough signatures, we will return the signatures from + /// there. If a new burnchain tip is detected, we will return an error. fn get_block_status( &self, - block_hash: &Sha512Trunc256Sum, + block_signer_sighash: &Sha512Trunc256Sum, + block_id: &StacksBlockId, chain_state: &mut StacksChainState, + sortdb: &SortitionDB, + burn_tip: &BlockSnapshot, counters: &Counters, ) -> Result, NakamotoNodeError> { let (lock, cvar) = &*self.blocks; let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); - // TODO: integrate this check into the waiting for the condvar - // Look in the nakamoto staging db -- a block can only get stored there - // if it has enough signing weight to clear the threshold. - // if let Ok(Some((stored_block, _sz))) = chain_state - // .nakamoto_blocks_db() - // .get_nakamoto_block(&block.block_id()) - // .map_err(|e| { - // warn!( - // "Failed to query chainstate for block {}: {e:?}", - // &block.block_id() - // ); - // e - // }) - // { - // debug!("SignCoordinator: Found signatures in relayed block"); - // counters.bump_naka_signer_pushed_blocks(); - // return Ok(stored_block.header.signer_signature); - // } + loop { + let (guard, timeout_result) = cvar + .wait_timeout_while(blocks, EVENT_RECEIVER_POLL, |map| { + !is_threshold_reached( + map.get(block_signer_sighash), + self.weight_threshold, + self.total_weight, + ) + }) + .expect("FATAL: failed to wait on block status cond var"); + blocks = guard; - // if Self::check_burn_tip_changed(sortdb, burn_tip) { - // debug!("SignCoordinator: Exiting due to new burnchain tip"); - // return Err(NakamotoNodeError::BurnchainTipChanged); - // } + // If we just received a timeout, we should check if the burnchain + // tip has changed or if we received this signed block already in + // the staging db. + if timeout_result.timed_out() { + // Look in the nakamoto staging db -- a block can only get stored there + // if it has enough signing weight to clear the threshold. + if let Ok(Some((stored_block, _sz))) = chain_state + .nakamoto_blocks_db() + .get_nakamoto_block(block_id) + .map_err(|e| { + warn!( + "Failed to query chainstate for block: {e:?}"; + "block_id" => %block_id, + "block_signer_sighash" => %block_signer_sighash, + ); + e + }) + { + debug!("SignCoordinator: Found signatures in relayed block"); + counters.bump_naka_signer_pushed_blocks(); + return Ok(stored_block.header.signer_signature); + } - blocks = cvar - .wait_while(blocks, |map| { - should_wait( - map.get(block_hash), - self.weight_threshold, - self.total_weight, - ) - }) - .expect("FATAL: failed to wait on block status"); - let block_status = blocks.get(block_hash).cloned().ok_or_else(|| { - NakamotoNodeError::SigningCoordinatorFailure( - "Block unexpectedly missing from map".into(), - ) - })?; - if block_status - .total_reject_weight - .saturating_add(self.weight_threshold) - > self.total_weight - { - info!( - "{}/{} signers vote to reject block", - block_status.total_reject_weight, self.total_weight; - "stacks_block_hash" => %block_hash, - ); - counters.bump_naka_rejected_blocks(); - Err(NakamotoNodeError::SignersRejected) - } else if block_status.total_weight_signed >= self.weight_threshold { - info!("Received enough signatures, block accepted"; - "stacks_block_hash" => %block_hash, - ); - Ok(block_status.gathered_signatures.values().cloned().collect()) - } else { - info!("Unblocked without reaching the threshold, likely due to an interruption"; - "stacks_block_hash" => %block_hash, - ); - Err(NakamotoNodeError::ChannelClosed) + if Self::check_burn_tip_changed(sortdb, burn_tip) { + debug!("SignCoordinator: Exiting due to new burnchain tip"); + return Err(NakamotoNodeError::BurnchainTipChanged); + } + } + // Else, we have received enough signatures to proceed + else { + let block_status = blocks.get(block_signer_sighash).ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Block unexpectedly missing from map".into(), + ) + })?; + + if block_status + .total_reject_weight + .saturating_add(self.weight_threshold) + > self.total_weight + { + info!( + "{}/{} signers vote to reject block", + block_status.total_reject_weight, self.total_weight; + "block_signer_sighash" => %block_signer_sighash, + ); + counters.bump_naka_rejected_blocks(); + return Err(NakamotoNodeError::SignersRejected); + } else if block_status.total_weight_signed >= self.weight_threshold { + info!("Received enough signatures, block accepted"; + "block_signer_sighash" => %block_signer_sighash, + ); + return Ok(block_status.gathered_signatures.values().cloned().collect()); + } else { + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "Unblocked without reaching the threshold".into(), + )); + } + } } } @@ -368,4 +395,17 @@ impl SignerCoordinator { // tenure. u64::MAX } + + /// Check if the tenure needs to change + fn check_burn_tip_changed(sortdb: &SortitionDB, burn_block: &BlockSnapshot) -> bool { + let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) + .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); + + if cur_burn_chain_tip.consensus_hash != burn_block.consensus_hash { + info!("SignCoordinator: Cancel signature aggregation; burnchain tip has changed"); + true + } else { + false + } + } } diff --git a/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs index 89769b3e3e..1b70beee07 100644 --- a/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs @@ -295,6 +295,13 @@ impl SignerDBListener { // Signal to anyone waiting on this block that we have enough signatures cvar.notify_all(); } + + // Update the idle timestamp for this signer + self.update_idle_timestamp( + signer_pubkey, + tenure_extend_timestamp, + signer_entry.weight, + ); } SignerMessageV0::BlockResponse(BlockResponse::Rejected(rejected_data)) => { let (lock, cvar) = &*self.blocks; @@ -354,27 +361,38 @@ impl SignerDBListener { cvar.notify_all(); } - continue; + // Update the idle timestamp for this signer + self.update_idle_timestamp( + signer_pubkey, + rejected_data.tenure_extend_timestamp, + signer_entry.weight, + ); } SignerMessageV0::BlockProposal(_) => { debug!("Received block proposal message. Ignoring."); - continue; } SignerMessageV0::BlockPushed(_) => { debug!("Received block pushed message. Ignoring."); - continue; } SignerMessageV0::MockSignature(_) | SignerMessageV0::MockProposal(_) | SignerMessageV0::MockBlock(_) => { debug!("Received mock message. Ignoring."); - continue; } }; } } } + fn update_idle_timestamp(&self, signer_pubkey: StacksPublicKey, timestamp: u64, weight: u32) { + let mut idle_timestamps = self + .signer_idle_timestamps + .lock() + .expect("FATAL: failed to lock idle timestamps"); + let timestamp_info = TimestampInfo { timestamp, weight }; + idle_timestamps.insert(signer_pubkey, timestamp_info); + } + /// Do we ignore signer signatures? #[cfg(test)] fn fault_injection_ignore_signatures() -> bool { From f92c8198967ca806be498e14d670db9c1cdff7a4 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 22 Nov 2024 15:51:23 -0500 Subject: [PATCH 21/41] chore: `SignerDBListener` -> `StackerDBListener` --- testnet/stacks-node/src/nakamoto_node.rs | 2 +- ...erdb_listener.rs => stackerdb_listener.rs} | 44 ++++++++++--------- 2 files changed, 24 insertions(+), 22 deletions(-) rename testnet/stacks-node/src/nakamoto_node/{signerdb_listener.rs => stackerdb_listener.rs} (89%) diff --git a/testnet/stacks-node/src/nakamoto_node.rs b/testnet/stacks-node/src/nakamoto_node.rs index 090170837a..9944bc16b1 100644 --- a/testnet/stacks-node/src/nakamoto_node.rs +++ b/testnet/stacks-node/src/nakamoto_node.rs @@ -43,7 +43,7 @@ pub mod miner; pub mod peer; pub mod relayer; pub mod signer_coordinator; -pub mod signerdb_listener; +pub mod stackerdb_listener; use self::peer::PeerThread; use self::relayer::{RelayerDirective, RelayerThread}; diff --git a/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs similarity index 89% rename from testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs rename to testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 1b70beee07..91f9539de1 100644 --- a/testnet/stacks-node/src/nakamoto_node/signerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -59,10 +59,10 @@ pub(crate) struct TimestampInfo { pub weight: u32, } -/// The listener for the signer database, which listens for messages from the +/// The listener for the StackerDB, which listens for messages from the /// signers and tracks the state of block signatures and idle timestamps. #[derive(Debug)] -pub struct SignerDBListener { +pub struct StackerDBListener { /// Channel to receive StackerDB events receiver: Receiver, /// Flag to shut the listener down @@ -86,7 +86,7 @@ pub struct SignerDBListener { pub(crate) signer_idle_timestamps: Arc>>, } -impl SignerDBListener { +impl StackerDBListener { pub fn new( stackerdb_channel: Arc>, keep_running: Arc, @@ -147,9 +147,9 @@ impl SignerDBListener { }) } - /// Run the signer database listener. + /// Run the StackerDB listener. pub fn run(&mut self) -> Result<(), NakamotoNodeError> { - info!("SignerDBListener: Starting up"); + info!("StackerDBListener: Starting up"); loop { let event = match self.receiver.recv_timeout(EVENT_RECEIVER_POLL) { Ok(event) => event, @@ -157,7 +157,7 @@ impl SignerDBListener { continue; } Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => { - warn!("SignerDBListener: StackerDB event receiver disconnected"); + warn!("StackerDBListener: StackerDB event receiver disconnected"); return Err(NakamotoNodeError::SigningCoordinatorFailure( "StackerDB event receiver disconnected".into(), )); @@ -166,7 +166,7 @@ impl SignerDBListener { // was the miner asked to stop? if !self.keep_running.load(Ordering::SeqCst) { - info!("SignerDBListener: received miner exit request. Aborting"); + info!("StackerDBListener: received miner exit request. Aborting"); return Err(NakamotoNodeError::ChannelClosed); } @@ -175,23 +175,25 @@ impl SignerDBListener { event.contract_id.name.starts_with(SIGNERS_NAME) && event.contract_id.is_boot(); if !is_signer_event { - debug!("SignerDBListener: Ignoring StackerDB event for non-signer contract"; "contract" => %event.contract_id); + debug!("StackerDBListener: Ignoring StackerDB event for non-signer contract"; "contract" => %event.contract_id); continue; } let modified_slots = &event.modified_slots.clone(); let Ok(signer_event) = SignerEvent::::try_from(event).map_err(|e| { - warn!("SignerDBListener: Failure parsing StackerDB event into signer event. Ignoring message."; "err" => ?e); + warn!("StackerDBListener: Failure parsing StackerDB event into signer event. Ignoring message."; "err" => ?e); }) else { continue; }; let SignerEvent::SignerMessages(signer_set, messages) = signer_event else { - debug!("SignerDBListener: Received signer event other than a signer message. Ignoring."); + debug!("StackerDBListener: Received signer event other than a signer message. Ignoring."); continue; }; if signer_set != self.signer_set { - debug!("SignerDBListener: Received signer event for other reward cycle. Ignoring."); + debug!( + "StackerDBListener: Received signer event for other reward cycle. Ignoring." + ); continue; }; let slot_ids = modified_slots @@ -199,7 +201,7 @@ impl SignerDBListener { .map(|chunk| chunk.slot_id) .collect::>(); - debug!("SignerDBListener: Received messages from signers"; + debug!("StackerDBListener: Received messages from signers"; "count" => messages.len(), "slot_ids" => ?slot_ids, ); @@ -232,7 +234,7 @@ impl SignerDBListener { Some(block) => block, None => { info!( - "SignerDBListener: Received signature for block that we did not request. Ignoring."; + "StackerDBListener: Received signature for block that we did not request. Ignoring."; "signature" => %signature, "block_signer_sighash" => %block_sighash, "slot_id" => slot_id, @@ -245,13 +247,13 @@ impl SignerDBListener { let Ok(valid_sig) = signer_pubkey.verify(block_sighash.bits(), &signature) else { warn!( - "SignerDBListener: Got invalid signature from a signer. Ignoring." + "StackerDBListener: Got invalid signature from a signer. Ignoring." ); continue; }; if !valid_sig { warn!( - "SignerDBListener: Processed signature but didn't validate over the expected block. Ignoring"; + "StackerDBListener: Processed signature but didn't validate over the expected block. Ignoring"; "signature" => %signature, "block_signer_signature_hash" => %block_sighash, "slot_id" => slot_id, @@ -260,7 +262,7 @@ impl SignerDBListener { } if Self::fault_injection_ignore_signatures() { - warn!("SignerDBListener: fault injection: ignoring well-formed signature for block"; + warn!("StackerDBListener: fault injection: ignoring well-formed signature for block"; "block_signer_sighash" => %block_sighash, "signer_pubkey" => signer_pubkey.to_hex(), "signer_slot_id" => slot_id, @@ -278,7 +280,7 @@ impl SignerDBListener { .expect("FATAL: total weight signed exceeds u32::MAX"); } - info!("SignerDBListener: Signature Added to block"; + info!("StackerDBListener: Signature Added to block"; "block_signer_sighash" => %block_sighash, "signer_pubkey" => signer_pubkey.to_hex(), "signer_slot_id" => slot_id, @@ -311,7 +313,7 @@ impl SignerDBListener { Some(block) => block, None => { info!( - "SignerDBListener: Received rejection for block that we did not request. Ignoring."; + "StackerDBListener: Received rejection for block that we did not request. Ignoring."; "block_signer_sighash" => %rejected_data.signer_signature_hash, "slot_id" => slot_id, "signer_set" => self.signer_set, @@ -323,13 +325,13 @@ impl SignerDBListener { let rejected_pubkey = match rejected_data.recover_public_key() { Ok(rejected_pubkey) => { if rejected_pubkey != signer_pubkey { - warn!("SignerDBListener: Recovered public key from rejected data does not match signer's public key. Ignoring."); + warn!("StackerDBListener: Recovered public key from rejected data does not match signer's public key. Ignoring."); continue; } rejected_pubkey } Err(e) => { - warn!("SignerDBListener: Failed to recover public key from rejected data: {e:?}. Ignoring."); + warn!("StackerDBListener: Failed to recover public key from rejected data: {e:?}. Ignoring."); continue; } }; @@ -339,7 +341,7 @@ impl SignerDBListener { .checked_add(signer_entry.weight) .expect("FATAL: total weight rejected exceeds u32::MAX"); - info!("SignerDBListener: Signer rejected block"; + info!("StackerDBListener: Signer rejected block"; "block_signer_sighash" => %rejected_data.signer_signature_hash, "signer_pubkey" => rejected_pubkey.to_hex(), "signer_slot_id" => slot_id, From cf540a5752df7a6ded37981160ec23be741d1523 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 22 Nov 2024 15:53:25 -0500 Subject: [PATCH 22/41] ifix: resolve merge errors --- .../stacks-node/src/nakamoto_node/stackerdb_listener.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 91f9539de1..6b02dd73d6 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -225,8 +225,10 @@ impl StackerDBListener { signer_signature_hash: block_sighash, signature, metadata, - tenure_extend_timestamp, // TOOD: utilize this info + response_data, } = accepted; + let tenure_extend_timestamp = response_data.tenure_extend_timestamp; + let (lock, cvar) = &*self.blocks; let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); @@ -350,7 +352,7 @@ impl StackerDBListener { "total_weight_signed" => block.total_weight_signed, "reason" => rejected_data.reason, "reason_code" => %rejected_data.reason_code, - "tenure_extend_timestamp" => rejected_data.tenure_extend_timestamp, + "tenure_extend_timestamp" => rejected_data.response_data.tenure_extend_timestamp, "server_version" => rejected_data.metadata.server_version, ); @@ -366,7 +368,7 @@ impl StackerDBListener { // Update the idle timestamp for this signer self.update_idle_timestamp( signer_pubkey, - rejected_data.tenure_extend_timestamp, + rejected_data.response_data.tenure_extend_timestamp, signer_entry.weight, ); } From aea205baf062d25db7df387831ecab7d22598766 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 22 Nov 2024 15:55:54 -0500 Subject: [PATCH 23/41] chore: finish rename --- .../src/nakamoto_node/signer_coordinator.rs | 15 ++++++++------- testnet/stacks-node/src/tests/signer/v0.rs | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 5588bfaf3e..018edc2740 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -36,10 +36,11 @@ use stacks::util::hash::Sha512Trunc256Sum; use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; -use super::signerdb_listener::{SignerDBListener, TimestampInfo, EVENT_RECEIVER_POLL}; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; -use crate::nakamoto_node::signerdb_listener::BlockStatus; +use crate::nakamoto_node::stackerdb_listener::{ + BlockStatus, StackerDBListener, TimestampInfo, EVENT_RECEIVER_POLL, +}; use crate::neon::Counters; use crate::Config; @@ -96,8 +97,8 @@ impl SignerCoordinator { message_key: StacksPrivateKey, config: &Config, ) -> Result { - // Create the signer DB listener - let mut listener = SignerDBListener::new( + // Create the stacker DB listener + let mut listener = StackerDBListener::new( stackerdb_channel, keep_running.clone(), reward_set, @@ -125,14 +126,14 @@ impl SignerCoordinator { // Spawn the signer DB listener thread let listener_thread = std::thread::Builder::new() - .name("signerdb_listener".to_string()) + .name("stackerdb_listener".to_string()) .spawn(move || { if let Err(e) = listener.run() { - error!("SignerDBListener: failed to run: {e:?}"); + error!("StackerDBListener: failed to run: {e:?}"); } }) .map_err(|e| { - error!("Failed to spawn signerdb_listener thread: {e:?}"); + error!("Failed to spawn stackerdb_listener thread: {e:?}"); ChainstateError::MinerAborted })?; diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index fc7e31bea6..3a1c7f0179 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -68,7 +68,7 @@ use crate::event_dispatcher::MinedNakamotoBlockEvent; use crate::nakamoto_node::miner::{ TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_NO_TENURE_EXTEND, }; -use crate::nakamoto_node::signerdb_listener::TEST_IGNORE_SIGNERS; +use crate::nakamoto_node::stackerdb_listener::TEST_IGNORE_SIGNERS; use crate::neon::Counters; use crate::run_loop::boot_nakamoto; use crate::tests::nakamoto_integrations::{ From d173d98507c68cc2467292daa8ae1a38644117cf Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Fri, 22 Nov 2024 17:19:04 -0500 Subject: [PATCH 24/41] fix: replace the channel receiver when the listener stops --- .../src/nakamoto_node/stackerdb_listener.rs | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 6b02dd73d6..cfadbc6fee 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -61,10 +61,11 @@ pub(crate) struct TimestampInfo { /// The listener for the StackerDB, which listens for messages from the /// signers and tracks the state of block signatures and idle timestamps. -#[derive(Debug)] pub struct StackerDBListener { - /// Channel to receive StackerDB events - receiver: Receiver, + /// Channel to communicate with StackerDB + stackerdb_channel: Arc>, + /// Receiver end of the StackerDB events channel + receiver: Option>, /// Flag to shut the listener down keep_running: Arc, /// The signer set for this tenure (0 or 1) @@ -136,7 +137,8 @@ impl StackerDBListener { .collect::, ChainstateError>>()?; Ok(Self { - receiver, + stackerdb_channel, + receiver: Some(receiver), keep_running, signer_set, total_weight, @@ -150,8 +152,15 @@ impl StackerDBListener { /// Run the StackerDB listener. pub fn run(&mut self) -> Result<(), NakamotoNodeError> { info!("StackerDBListener: Starting up"); + + let Some(receiver) = &self.receiver else { + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "StackerDBListener: Failed to obtain the StackerDB event receiver".into(), + )); + }; + loop { - let event = match self.receiver.recv_timeout(EVENT_RECEIVER_POLL) { + let event = match receiver.recv_timeout(EVENT_RECEIVER_POLL) { Ok(event) => event, Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { continue; @@ -411,3 +420,15 @@ impl StackerDBListener { false } } + +impl Drop for StackerDBListener { + fn drop(&mut self) { + let stackerdb_channel = self + .stackerdb_channel + .lock() + .expect("FATAL: failed to lock stackerdb channel"); + stackerdb_channel.replace_receiver(self.receiver.take().expect( + "FATAL: lost possession of the StackerDB channel before dropping SignCoordinator", + )); + } +} From a21fa4d8d8cbd07a3e6eccc5707139b188004901 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Mon, 25 Nov 2024 15:10:54 -0500 Subject: [PATCH 25/41] feat: shutdown stacker db listener with miner --- .../stacks-node/src/nakamoto_node/miner.rs | 342 +++++++++--------- .../src/nakamoto_node/signer_coordinator.rs | 20 +- .../src/nakamoto_node/stackerdb_listener.rs | 22 +- 3 files changed, 215 insertions(+), 169 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index d51cccb4ba..ea54f508b6 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -323,187 +323,205 @@ impl BlockMinerThread { // now, actually run this tenure loop { - #[cfg(test)] - if *TEST_MINE_STALL.lock().unwrap() == Some(true) { - // Do an extra check just so we don't log EVERY time. - warn!("Mining is stalled due to testing directive"); - while *TEST_MINE_STALL.lock().unwrap() == Some(true) { - std::thread::sleep(std::time::Duration::from_millis(10)); - } - warn!("Mining is no longer stalled due to testing directive. Continuing..."); + if let Err(e) = self.miner_main_loop( + &mut coordinator, + &sortdb, + &mut stackerdbs, + &mut last_block_rejected, + ) { + // Before stopping this miner, shutdown the coordinator thread. + coordinator.shutdown(); + return Err(e); } - let new_block = loop { - // If we're mock mining, we may not have processed the block that the - // actual tenure winner committed to yet. So, before attempting to - // mock mine, check if the parent is processed. - if self.config.get_node_config(false).mock_mining { - let burn_db_path = self.config.get_burn_db_file_path(); - let mut burn_db = SortitionDB::open( - &burn_db_path, - true, - self.burnchain.pox_constants.clone(), - ) - .expect("FATAL: could not open sortition DB"); - let burn_tip_changed = self.check_burn_tip_changed(&burn_db); - let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) - .expect("FATAL: could not open chainstate DB"); - match burn_tip_changed - .and_then(|_| self.load_block_parent_info(&mut burn_db, &mut chain_state)) - { - Ok(..) => {} - Err(NakamotoNodeError::ParentNotFound) => { - info!("Mock miner has not processed parent block yet, sleeping and trying again"); - thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); - continue; - } - Err(e) => { - warn!("Mock miner failed to load parent info: {e:?}"); - return Err(e); - } - } - } + } + } - match self.mine_block() { - Ok(x) => { - if !self.validate_timestamp(&x)? { - info!("Block mined too quickly. Will try again."; - "block_timestamp" => x.header.timestamp, - ); - continue; - } - break Some(x); - } - Err(NakamotoNodeError::MiningFailure(ChainstateError::MinerAborted)) => { - info!("Miner interrupted while mining, will try again"); - // sleep, and try again. if the miner was interrupted because the burnchain - // view changed, the next `mine_block()` invocation will error + /// The main loop for the miner thread. This is where the miner will mine + /// blocks and then attempt to sign and broadcast them. + fn miner_main_loop( + &mut self, + coordinator: &mut SignerCoordinator, + sortdb: &SortitionDB, + stackerdbs: &mut StackerDBs, + last_block_rejected: &mut bool, + ) -> Result<(), NakamotoNodeError> { + #[cfg(test)] + if *TEST_MINE_STALL.lock().unwrap() == Some(true) { + // Do an extra check just so we don't log EVERY time. + warn!("Mining is stalled due to testing directive"); + while *TEST_MINE_STALL.lock().unwrap() == Some(true) { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + warn!("Mining is no longer stalled due to testing directive. Continuing..."); + } + let new_block = loop { + // If we're mock mining, we may not have processed the block that the + // actual tenure winner committed to yet. So, before attempting to + // mock mine, check if the parent is processed. + if self.config.get_node_config(false).mock_mining { + let burn_db_path = self.config.get_burn_db_file_path(); + let mut burn_db = + SortitionDB::open(&burn_db_path, true, self.burnchain.pox_constants.clone()) + .expect("FATAL: could not open sortition DB"); + let burn_tip_changed = self.check_burn_tip_changed(&burn_db); + let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) + .expect("FATAL: could not open chainstate DB"); + match burn_tip_changed + .and_then(|_| self.load_block_parent_info(&mut burn_db, &mut chain_state)) + { + Ok(..) => {} + Err(NakamotoNodeError::ParentNotFound) => { + info!("Mock miner has not processed parent block yet, sleeping and trying again"); thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); continue; } - Err(NakamotoNodeError::MiningFailure( - ChainstateError::NoTransactionsToMine, - )) => { - debug!("Miner did not find any transactions to mine"); - break None; - } Err(e) => { - warn!("Failed to mine block: {e:?}"); - - // try again, in case a new sortition is pending - self.globals - .raise_initiative(format!("MiningFailure: {e:?}")); - return Err(NakamotoNodeError::MiningFailure( - ChainstateError::MinerAborted, - )); + warn!("Mock miner failed to load parent info: {e:?}"); + return Err(e); } } - }; - - if let Some(mut new_block) = new_block { - Self::fault_injection_block_broadcast_stall(&new_block); - let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) - .map_err(|e| { - NakamotoNodeError::SigningCoordinatorFailure(format!( - "Failed to open chainstate DB. Cannot mine! {e:?}" - )) - })?; - let signer_signature = match coordinator.propose_block( - &mut new_block, - &self.burn_block, - &self.burnchain, - &sortdb, - &mut chain_state, - &mut stackerdbs, - &self.globals.counters, - &self.burn_election_block.consensus_hash, - ) { - Ok(x) => x, - Err(e) => match e { - NakamotoNodeError::StacksTipChanged => { - info!("Stacks tip changed while waiting for signatures"; - "signer_sighash" => %new_block.header.signer_signature_hash(), - "block_height" => new_block.header.chain_length, - "consensus_hash" => %new_block.header.consensus_hash, - ); - return Err(e); - } - NakamotoNodeError::BurnchainTipChanged => { - info!("Burnchain tip changed while waiting for signatures"; - "signer_sighash" => %new_block.header.signer_signature_hash(), - "block_height" => new_block.header.chain_length, - "consensus_hash" => %new_block.header.consensus_hash, - ); - return Err(e); - } - _ => { - // Sleep for a bit to allow signers to catch up - let pause_ms = if last_block_rejected { - self.config.miner.subsequent_rejection_pause_ms - } else { - self.config.miner.first_rejection_pause_ms - }; - - error!("Error while gathering signatures: {e:?}. Will try mining again in {pause_ms}."; - "signer_sighash" => %new_block.header.signer_signature_hash(), - "block_height" => new_block.header.chain_length, - "consensus_hash" => %new_block.header.consensus_hash, - ); - thread::sleep(Duration::from_millis(pause_ms)); - last_block_rejected = true; - continue; - } - }, - }; - last_block_rejected = false; - - let reward_set = self.load_signer_set()?; + } - new_block.header.signer_signature = signer_signature; - if let Err(e) = self.broadcast(new_block.clone(), reward_set, &stackerdbs) { - warn!("Error accepting own block: {e:?}. Will try mining again."); + match self.mine_block() { + Ok(x) => { + if !self.validate_timestamp(&x)? { + info!("Block mined too quickly. Will try again."; + "block_timestamp" => x.header.timestamp, + ); + continue; + } + break Some(x); + } + Err(NakamotoNodeError::MiningFailure(ChainstateError::MinerAborted)) => { + info!("Miner interrupted while mining, will try again"); + // sleep, and try again. if the miner was interrupted because the burnchain + // view changed, the next `mine_block()` invocation will error + thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); continue; - } else { - info!( - "Miner: Block signed by signer set and broadcasted"; - "signer_sighash" => %new_block.header.signer_signature_hash(), - "stacks_block_hash" => %new_block.header.block_hash(), - "stacks_block_id" => %new_block.header.block_id(), - "block_height" => new_block.header.chain_length, - "consensus_hash" => %new_block.header.consensus_hash, - ); } - - // update mined-block counters and mined-tenure counters - self.globals.counters.bump_naka_mined_blocks(); - if self.last_block_mined.is_some() { - // this is the first block of the tenure, bump tenure counter - self.globals.counters.bump_naka_mined_tenures(); + Err(NakamotoNodeError::MiningFailure(ChainstateError::NoTransactionsToMine)) => { + debug!("Miner did not find any transactions to mine"); + break None; + } + Err(e) => { + warn!("Failed to mine block: {e:?}"); + + // try again, in case a new sortition is pending + self.globals + .raise_initiative(format!("MiningFailure: {e:?}")); + return Err(NakamotoNodeError::MiningFailure( + ChainstateError::MinerAborted, + )); } + } + }; + + if let Some(mut new_block) = new_block { + Self::fault_injection_block_broadcast_stall(&new_block); + let mut chain_state = + neon_node::open_chainstate_with_faults(&self.config).map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to open chainstate DB. Cannot mine! {e:?}" + )) + })?; + let signer_signature = match coordinator.propose_block( + &mut new_block, + &self.burn_block, + &self.burnchain, + &sortdb, + &mut chain_state, + stackerdbs, + &self.globals.counters, + &self.burn_election_block.consensus_hash, + ) { + Ok(x) => x, + Err(e) => match e { + NakamotoNodeError::StacksTipChanged => { + info!("Stacks tip changed while waiting for signatures"; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + return Err(e); + } + NakamotoNodeError::BurnchainTipChanged => { + info!("Burnchain tip changed while waiting for signatures"; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + return Err(e); + } + _ => { + // Sleep for a bit to allow signers to catch up + let pause_ms = if *last_block_rejected { + self.config.miner.subsequent_rejection_pause_ms + } else { + self.config.miner.first_rejection_pause_ms + }; + + error!("Error while gathering signatures: {e:?}. Will try mining again in {pause_ms}."; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + thread::sleep(Duration::from_millis(pause_ms)); + *last_block_rejected = true; + return Ok(()); + } + }, + }; + *last_block_rejected = false; + + let reward_set = self.load_signer_set()?; - // wake up chains coordinator - Self::fault_injection_block_announce_stall(&new_block); - self.globals.coord().announce_new_stacks_block(); + new_block.header.signer_signature = signer_signature; + if let Err(e) = self.broadcast(new_block.clone(), reward_set, &stackerdbs) { + warn!("Error accepting own block: {e:?}. Will try mining again."); + return Ok(()); + } else { + info!( + "Miner: Block signed by signer set and broadcasted"; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "stacks_block_hash" => %new_block.header.block_hash(), + "stacks_block_id" => %new_block.header.block_id(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + } - self.last_block_mined = Some(new_block); + // update mined-block counters and mined-tenure counters + self.globals.counters.bump_naka_mined_blocks(); + if self.last_block_mined.is_some() { + // this is the first block of the tenure, bump tenure counter + self.globals.counters.bump_naka_mined_tenures(); } - let Ok(sort_db) = SortitionDB::open( - &self.config.get_burn_db_file_path(), - true, - self.burnchain.pox_constants.clone(), - ) else { - error!("Failed to open sortition DB. Will try mining again."); - continue; - }; + // wake up chains coordinator + Self::fault_injection_block_announce_stall(&new_block); + self.globals.coord().announce_new_stacks_block(); - let wait_start = Instant::now(); - while wait_start.elapsed() < self.config.miner.wait_on_interim_blocks { - thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); - if self.check_burn_tip_changed(&sort_db).is_err() { - return Err(NakamotoNodeError::BurnchainTipChanged); - } + self.last_block_mined = Some(new_block); + } + + let Ok(sort_db) = SortitionDB::open( + &self.config.get_burn_db_file_path(), + true, + self.burnchain.pox_constants.clone(), + ) else { + error!("Failed to open sortition DB. Will try mining again."); + return Ok(()); + }; + + let wait_start = Instant::now(); + while wait_start.elapsed() < self.config.miner.wait_on_interim_blocks { + thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); + if self.check_burn_tip_changed(&sort_db).is_err() { + return Err(NakamotoNodeError::BurnchainTipChanged); } } + + Ok(()) } /// Load the signer set active for this miner's blocks. This is the diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 018edc2740..575f43adf7 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -81,6 +81,8 @@ pub struct SignerCoordinator { /// - key: StacksPublicKey /// - value: TimestampInfo signer_idle_timestamps: Arc>>, + /// Keep running flag for the signer DB listener thread + keep_running: Arc, /// Handle for the signer DB listener thread listener_thread: Option>, } @@ -90,16 +92,19 @@ impl SignerCoordinator { /// This will spawn a new thread to listen for messages from the signer DB. pub fn new( stackerdb_channel: Arc>, - keep_running: Arc, + node_keep_running: Arc, reward_set: &RewardSet, burn_tip: &BlockSnapshot, burnchain: &Burnchain, message_key: StacksPrivateKey, config: &Config, ) -> Result { + let keep_running = Arc::new(AtomicBool::new(true)); + // Create the stacker DB listener let mut listener = StackerDBListener::new( stackerdb_channel, + node_keep_running.clone(), keep_running.clone(), reward_set, burn_tip, @@ -121,6 +126,7 @@ impl SignerCoordinator { weight_threshold: listener.weight_threshold, blocks: listener.blocks.clone(), signer_idle_timestamps: listener.signer_idle_timestamps.clone(), + keep_running, listener_thread: None, }; @@ -409,4 +415,16 @@ impl SignerCoordinator { false } } + + pub fn shutdown(&mut self) { + if let Some(listener_thread) = self.listener_thread.take() { + info!("SignerCoordinator: shutting down stacker db listener thread"); + self.keep_running + .store(false, std::sync::atomic::Ordering::Relaxed); + if let Err(e) = listener_thread.join() { + error!("Failed to join signer listener thread: {e:?}"); + } + debug!("SignerCoordinator: stacker db listener thread has shut down"); + } + } } diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index cfadbc6fee..eddad6e6d8 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -66,6 +66,8 @@ pub struct StackerDBListener { stackerdb_channel: Arc>, /// Receiver end of the StackerDB events channel receiver: Option>, + /// Flag to shut the node down + node_keep_running: Arc, /// Flag to shut the listener down keep_running: Arc, /// The signer set for this tenure (0 or 1) @@ -90,6 +92,7 @@ pub struct StackerDBListener { impl StackerDBListener { pub fn new( stackerdb_channel: Arc>, + node_keep_running: Arc, keep_running: Arc, reward_set: &RewardSet, burn_tip: &BlockSnapshot, @@ -139,6 +142,7 @@ impl StackerDBListener { Ok(Self { stackerdb_channel, receiver: Some(receiver), + node_keep_running, keep_running, signer_set, total_weight, @@ -160,6 +164,18 @@ impl StackerDBListener { }; loop { + // was the node asked to stop? + if !self.node_keep_running.load(Ordering::SeqCst) { + info!("StackerDBListener: received node exit request. Aborting"); + return Err(NakamotoNodeError::ChannelClosed); + } + + // was the listener asked to stop? + if !self.keep_running.load(Ordering::SeqCst) { + info!("StackerDBListener: received listener exit request. Aborting"); + return Err(NakamotoNodeError::ChannelClosed); + } + let event = match receiver.recv_timeout(EVENT_RECEIVER_POLL) { Ok(event) => event, Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { @@ -173,12 +189,6 @@ impl StackerDBListener { } }; - // was the miner asked to stop? - if !self.keep_running.load(Ordering::SeqCst) { - info!("StackerDBListener: received miner exit request. Aborting"); - return Err(NakamotoNodeError::ChannelClosed); - } - // check to see if this event we got is a signer event let is_signer_event = event.contract_id.name.starts_with(SIGNERS_NAME) && event.contract_id.is_boot(); From ab0807e5bcc999025f7847cae6e7d0fcd4e3b9d3 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Mon, 25 Nov 2024 16:59:21 -0500 Subject: [PATCH 26/41] chore: return okay when exit is requested --- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 3 ++- testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 575f43adf7..2fcb7b1ce3 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -99,6 +99,7 @@ impl SignerCoordinator { message_key: StacksPrivateKey, config: &Config, ) -> Result { + info!("SignerCoordinator: starting up"); let keep_running = Arc::new(AtomicBool::new(true)); // Create the stacker DB listener @@ -135,7 +136,7 @@ impl SignerCoordinator { .name("stackerdb_listener".to_string()) .spawn(move || { if let Err(e) = listener.run() { - error!("StackerDBListener: failed to run: {e:?}"); + error!("StackerDBListener: exited with error: {e:?}"); } }) .map_err(|e| { diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index eddad6e6d8..4f213abf6c 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -167,13 +167,13 @@ impl StackerDBListener { // was the node asked to stop? if !self.node_keep_running.load(Ordering::SeqCst) { info!("StackerDBListener: received node exit request. Aborting"); - return Err(NakamotoNodeError::ChannelClosed); + return Ok(()); } // was the listener asked to stop? if !self.keep_running.load(Ordering::SeqCst) { info!("StackerDBListener: received listener exit request. Aborting"); - return Err(NakamotoNodeError::ChannelClosed); + return Ok(()); } let event = match receiver.recv_timeout(EVENT_RECEIVER_POLL) { From 5d516a801b4c848ab9e76a74ee90717851b56299 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Mon, 25 Nov 2024 21:28:36 -0500 Subject: [PATCH 27/41] fix: drop lock in `propose_block` --- .../src/nakamoto_node/signer_coordinator.rs | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 2fcb7b1ce3..b58f27aab9 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -225,16 +225,19 @@ impl SignerCoordinator { counters: &Counters, election_sortition: &ConsensusHash, ) -> Result, NakamotoNodeError> { - // Add this block to the block status map - let (lock, _cvar) = &*self.blocks; - let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); - let block_status = BlockStatus { - responded_signers: HashSet::new(), - gathered_signatures: BTreeMap::new(), - total_weight_signed: 0, - total_reject_weight: 0, - }; - blocks.insert(block.header.signer_signature_hash(), block_status); + // Add this block to the block status map. + // Create a scope to drop the lock on the block status map. + { + let (lock, _cvar) = &*self.blocks; + let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); + let block_status = BlockStatus { + responded_signers: HashSet::new(), + gathered_signatures: BTreeMap::new(), + total_weight_signed: 0, + total_reject_weight: 0, + }; + blocks.insert(block.header.signer_signature_hash(), block_status); + } let reward_cycle_id = burnchain .block_height_to_reward_cycle(burn_tip.block_height) @@ -410,7 +413,7 @@ impl SignerCoordinator { .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); if cur_burn_chain_tip.consensus_hash != burn_block.consensus_hash { - info!("SignCoordinator: Cancel signature aggregation; burnchain tip has changed"); + info!("SignerCoordinator: Cancel signature aggregation; burnchain tip has changed"); true } else { false From 1a67a1c8c1a0ca3022405cd4c4d79ce939134165 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Mon, 25 Nov 2024 22:11:22 -0500 Subject: [PATCH 28/41] feat: extend tenure based on time See #5476 --- .../stacks-node/src/nakamoto_node/miner.rs | 66 +++++++++++++++---- .../src/nakamoto_node/signer_coordinator.rs | 3 + 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index ea54f508b6..68dd3f4fd3 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -140,6 +140,8 @@ pub struct BlockMinerThread { burnchain: Burnchain, /// Last block mined last_block_mined: Option, + /// Number of blocks mined in this tenure + mined_blocks: u64, /// Copy of the node's registered VRF key registered_key: RegisteredKey, /// Burnchain block snapshot which elected this miner @@ -173,6 +175,7 @@ impl BlockMinerThread { keychain: rt.keychain.clone(), burnchain: rt.burnchain.clone(), last_block_mined: None, + mined_blocks: 0, registered_key, burn_election_block, burn_block, @@ -345,6 +348,7 @@ impl BlockMinerThread { stackerdbs: &mut StackerDBs, last_block_rejected: &mut bool, ) -> Result<(), NakamotoNodeError> { + info!("Miner: Starting main loop"); #[cfg(test)] if *TEST_MINE_STALL.lock().unwrap() == Some(true) { // Do an extra check just so we don't log EVERY time. @@ -382,7 +386,8 @@ impl BlockMinerThread { } } - match self.mine_block() { + info!("Miner: Mining a new block"); + match self.mine_block(coordinator) { Ok(x) => { if !self.validate_timestamp(&x)? { info!("Block mined too quickly. Will try again."; @@ -502,6 +507,7 @@ impl BlockMinerThread { self.globals.coord().announce_new_stacks_block(); self.last_block_mined = Some(new_block); + self.mined_blocks += 1; } let Ok(sort_db) = SortitionDB::open( @@ -999,8 +1005,12 @@ impl BlockMinerThread { #[cfg_attr(test, mutants::skip)] /// Try to mine a Stacks block by assembling one from mempool transactions and sending a /// burnchain block-commit transaction. If we succeed, then return the assembled block. - fn mine_block(&mut self) -> Result { + fn mine_block( + &mut self, + coordinator: &mut SignerCoordinator, + ) -> Result { debug!("block miner thread ID is {:?}", thread::current().id()); + info!("Miner: Mining block"); let burn_db_path = self.config.get_burn_db_file_path(); let reward_set = self.load_signer_set()?; @@ -1043,8 +1053,12 @@ impl BlockMinerThread { &parent_block_info, vrf_proof, target_epoch_id, + coordinator, )?; + // TODO: If we are doing a time-based tenure extend, we need to reset + // the budget and the block_count here + parent_block_info.stacks_parent_header.microblock_tail = None; let signer_bitvec_len = reward_set.rewarded_addresses.len().try_into().ok(); @@ -1128,24 +1142,52 @@ impl BlockMinerThread { #[cfg_attr(test, mutants::skip)] /// Create the tenure start info for the block we're going to build fn make_tenure_start_info( - &self, + &mut self, chainstate: &StacksChainState, parent_block_info: &ParentStacksBlockInfo, vrf_proof: VRFProof, target_epoch_id: StacksEpochId, + coordinator: &mut SignerCoordinator, ) -> Result { + info!("Miner: Creating tenure start info"); let current_miner_nonce = parent_block_info.coinbase_nonce; - let Some(parent_tenure_info) = &parent_block_info.parent_tenure else { - return Ok(NakamotoTenureInfo { - coinbase_tx: None, - tenure_change_tx: None, - }); + let parent_tenure_info = match &parent_block_info.parent_tenure { + Some(info) => info.clone(), + None => { + // We may be able to extend the current tenure + if self.last_block_mined.is_none() { + info!("Miner: No parent tenure and no last block mined"); + return Ok(NakamotoTenureInfo { + coinbase_tx: None, + tenure_change_tx: None, + }); + } + ParentTenureInfo { + parent_tenure_blocks: self.mined_blocks, + parent_tenure_consensus_hash: self.burn_election_block.consensus_hash, + } + } }; if self.last_block_mined.is_some() { - return Ok(NakamotoTenureInfo { - coinbase_tx: None, - tenure_change_tx: None, - }); + info!("make_tenure_start_info: last block mined is some"); + // Check if we can extend the current tenure + let tenure_extend_timestamp = coordinator.get_tenure_extend_timestamp(); + info!( + "make_tenure_start_info: tenure_extend_timestamp: {}, now: {}", + tenure_extend_timestamp, + get_epoch_time_secs() + ); + if get_epoch_time_secs() < tenure_extend_timestamp { + info!("Miner: Not extending tenure"); + return Ok(NakamotoTenureInfo { + coinbase_tx: None, + tenure_change_tx: None, + }); + } + info!("Miner: Extending tenure"); + self.reason = MinerReason::Extended { + burn_view_consensus_hash: self.burn_election_block.consensus_hash, + }; } let parent_block_id = parent_block_info.stacks_parent_header.index_block_hash(); diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index b58f27aab9..c12d2f9bcb 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -387,16 +387,19 @@ impl SignerCoordinator { /// Get the timestamp at which at least 70% of the signing power should be /// willing to accept a time-based tenure extension. pub fn get_tenure_extend_timestamp(&self) -> u64 { + info!("SignerCoordinator: getting tenure extension timestamp"); let signer_idle_timestamps = self .signer_idle_timestamps .lock() .expect("FATAL: failed to lock signer idle timestamps"); + info!("SignerCoordinator: signer_idle_timestamps: {signer_idle_timestamps:?}"); let mut idle_timestamps = signer_idle_timestamps.values().collect::>(); idle_timestamps.sort_by_key(|info| info.timestamp); let mut weight_sum = 0; for info in idle_timestamps { weight_sum += info.weight; if weight_sum >= self.weight_threshold { + info!("SignerCoordinator: 70% threshold reached"); return info.timestamp; } } From 48c8a1017c0a2101711488a5ef82a71b02314024 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 26 Nov 2024 07:13:00 -0500 Subject: [PATCH 29/41] chore: cleanup --- .../stacks-node/src/nakamoto_node/miner.rs | 31 ++++++++----------- .../src/nakamoto_node/signer_coordinator.rs | 3 +- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 68dd3f4fd3..aa8316ad70 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -348,7 +348,6 @@ impl BlockMinerThread { stackerdbs: &mut StackerDBs, last_block_rejected: &mut bool, ) -> Result<(), NakamotoNodeError> { - info!("Miner: Starting main loop"); #[cfg(test)] if *TEST_MINE_STALL.lock().unwrap() == Some(true) { // Do an extra check just so we don't log EVERY time. @@ -386,7 +385,6 @@ impl BlockMinerThread { } } - info!("Miner: Mining a new block"); match self.mine_block(coordinator) { Ok(x) => { if !self.validate_timestamp(&x)? { @@ -1056,9 +1054,6 @@ impl BlockMinerThread { coordinator, )?; - // TODO: If we are doing a time-based tenure extend, we need to reset - // the budget and the block_count here - parent_block_info.stacks_parent_header.microblock_tail = None; let signer_bitvec_len = reward_set.rewarded_addresses.len().try_into().ok(); @@ -1149,14 +1144,13 @@ impl BlockMinerThread { target_epoch_id: StacksEpochId, coordinator: &mut SignerCoordinator, ) -> Result { - info!("Miner: Creating tenure start info"); let current_miner_nonce = parent_block_info.coinbase_nonce; let parent_tenure_info = match &parent_block_info.parent_tenure { Some(info) => info.clone(), None => { // We may be able to extend the current tenure if self.last_block_mined.is_none() { - info!("Miner: No parent tenure and no last block mined"); + debug!("Miner: No parent tenure and no last block mined"); return Ok(NakamotoTenureInfo { coinbase_tx: None, tenure_change_tx: None, @@ -1169,25 +1163,19 @@ impl BlockMinerThread { } }; if self.last_block_mined.is_some() { - info!("make_tenure_start_info: last block mined is some"); // Check if we can extend the current tenure let tenure_extend_timestamp = coordinator.get_tenure_extend_timestamp(); - info!( - "make_tenure_start_info: tenure_extend_timestamp: {}, now: {}", - tenure_extend_timestamp, - get_epoch_time_secs() - ); if get_epoch_time_secs() < tenure_extend_timestamp { - info!("Miner: Not extending tenure"); return Ok(NakamotoTenureInfo { coinbase_tx: None, tenure_change_tx: None, }); } - info!("Miner: Extending tenure"); - self.reason = MinerReason::Extended { - burn_view_consensus_hash: self.burn_election_block.consensus_hash, - }; + debug!("Miner: Time-based tenure extend"; + "current_timestamp" => get_epoch_time_secs(), + "tenure_extend_timestamp" => tenure_extend_timestamp, + ); + self.tenure_extend_reset(); } let parent_block_id = parent_block_info.stacks_parent_header.index_block_hash(); @@ -1254,6 +1242,13 @@ impl BlockMinerThread { Ok(()) } } + + fn tenure_extend_reset(&mut self) { + self.reason = MinerReason::Extended { + burn_view_consensus_hash: self.burn_block.consensus_hash, + }; + self.mined_blocks = 0; + } } impl ParentStacksBlockInfo { diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index c12d2f9bcb..1e0e7694fd 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -387,12 +387,11 @@ impl SignerCoordinator { /// Get the timestamp at which at least 70% of the signing power should be /// willing to accept a time-based tenure extension. pub fn get_tenure_extend_timestamp(&self) -> u64 { - info!("SignerCoordinator: getting tenure extension timestamp"); let signer_idle_timestamps = self .signer_idle_timestamps .lock() .expect("FATAL: failed to lock signer idle timestamps"); - info!("SignerCoordinator: signer_idle_timestamps: {signer_idle_timestamps:?}"); + debug!("SignerCoordinator: signer_idle_timestamps: {signer_idle_timestamps:?}"); let mut idle_timestamps = signer_idle_timestamps.values().collect::>(); idle_timestamps.sort_by_key(|info| info.timestamp); let mut weight_sum = 0; From 8929537fc4c528c999155a46919032b1ede7ca77 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 26 Nov 2024 11:24:44 -0500 Subject: [PATCH 30/41] fix: resolve errors after merge --- .../stacks-node/src/nakamoto_node/miner.rs | 65 +------------------ .../stacks-node/src/nakamoto_node/relayer.rs | 2 - 2 files changed, 1 insertion(+), 66 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index e42e45d056..6b8b3ff0b9 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -61,8 +61,6 @@ pub static TEST_BROADCAST_STALL: std::sync::Mutex> = std::sync::Mut pub static TEST_BLOCK_ANNOUNCE_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); #[cfg(test)] pub static TEST_SKIP_P2P_BROADCAST: std::sync::Mutex> = std::sync::Mutex::new(None); -#[cfg(test)] -pub static TEST_NO_TENURE_EXTEND: std::sync::Mutex> = std::sync::Mutex::new(None); /// If the miner was interrupted while mining a block, how long should the /// miner thread sleep before trying again? @@ -388,7 +386,7 @@ impl BlockMinerThread { Ok(x) => { if !self.validate_timestamp(&x)? { info!("Block mined too quickly. Will try again."; - "block_timestamp" => x.header.timestamp, + "block_timestamp" => x.header.timestamp, ); continue; } @@ -590,67 +588,6 @@ impl BlockMinerThread { Ok(reward_set) } - /// Gather a list of signatures from the signers for the block - fn gather_signatures( - &mut self, - new_block: &mut NakamotoBlock, - stackerdbs: &mut StackerDBs, - ) -> Result<(RewardSet, Vec), NakamotoNodeError> { - let Some(miner_privkey) = self.config.miner.mining_key else { - return Err(NakamotoNodeError::MinerConfigurationFailed( - "No mining key configured, cannot mine", - )); - }; - let sort_db = SortitionDB::open( - &self.config.get_burn_db_file_path(), - true, - self.burnchain.pox_constants.clone(), - ) - .map_err(|e| { - NakamotoNodeError::SigningCoordinatorFailure(format!( - "Failed to open sortition DB. Cannot mine! {e:?}" - )) - })?; - - let reward_set = self.load_signer_set()?; - - if self.config.get_node_config(false).mock_mining { - return Ok((reward_set, Vec::new())); - } - - let mut coordinator = SignCoordinator::new( - &reward_set, - miner_privkey, - &self.config, - self.globals.should_keep_running.clone(), - ) - .map_err(|e| { - NakamotoNodeError::SigningCoordinatorFailure(format!( - "Failed to initialize the signing coordinator. Cannot mine! {e:?}" - )) - })?; - - let mut chain_state = - neon_node::open_chainstate_with_faults(&self.config).map_err(|e| { - NakamotoNodeError::SigningCoordinatorFailure(format!( - "Failed to open chainstate DB. Cannot mine! {e:?}" - )) - })?; - - let signature = coordinator.run_sign_v0( - new_block, - &self.burn_block, - &self.burnchain, - &sort_db, - &mut chain_state, - stackerdbs, - &self.globals.counters, - &self.burn_election_block.consensus_hash, - )?; - - Ok((reward_set, signature)) - } - /// Fault injection -- possibly fail to broadcast /// Return true to drop the block fn fault_injection_broadcast_fail(&self) -> bool { diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index fd86f476f3..b346cdc346 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -58,8 +58,6 @@ use super::{ BLOCK_PROCESSOR_STACK_SIZE, }; use crate::burnchains::BurnchainController; -#[cfg(test)] -use crate::nakamoto_node::miner::TEST_NO_TENURE_EXTEND; use crate::nakamoto_node::miner::{BlockMinerThread, MinerDirective}; use crate::neon_node::{ fault_injection_skip_mining, open_chainstate_with_faults, LeaderKeyRegistrationState, From 65c5b70c16cbdd70344565291e3b15846c16b21b Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 26 Nov 2024 11:29:25 -0500 Subject: [PATCH 31/41] chore: remove duplicates in CHANGELOGs due to merge --- CHANGELOG.md | 1 - stacks-signer/CHANGELOG.md | 1 - 2 files changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14401642f5..c89e9df484 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,6 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE - Remove the panic for reporting DB deadlocks (just error and continue waiting) - Add index to `metadata_table` in Clarity DB on `blockhash` - Add `block_commit_delay_ms` to the config file to control the time to wait after seeing a new burn block, before submitting a block commit, to allow time for the first Nakamoto block of the new tenure to be mined, allowing this miner to avoid the need to RBF the block commit. -- If the winning miner of a sortition is committed to the wrong parent tenure, the previous miner can immediately tenure extend and continue mining since the winning miner would never be able to propose a valid block. (#5361) - Add `tenure_cost_limit_per_block_percentage` to the miner config file to control the percentage remaining tenure cost limit to consume per nakamoto block. - Add `/v3/blocks/height/:block_height` rpc endpoint - If the winning miner of a sortition is committed to the wrong parent tenure, the previous miner can immediately tenure extend and continue mining since the winning miner would never be able to propose a valid block. (#5361) diff --git a/stacks-signer/CHANGELOG.md b/stacks-signer/CHANGELOG.md index 7f64a3298b..8514bc3b67 100644 --- a/stacks-signer/CHANGELOG.md +++ b/stacks-signer/CHANGELOG.md @@ -11,7 +11,6 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE ### Changed -- Allow a miner to extend their tenure immediately if the winner of the next tenure has committed to the wrong parent tenure (#5361) - Add tenure extend timestamp to signer block responses - Added tenure_idle_timeout_secs configuration option for determining when a tenure extend will be accepted - Allow a miner to extend their tenure immediately if the winner of the next tenure has committed to the wrong parent tenure (#5361) From 280d536046d8181b3e1fcfae029d22e707e90b56 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 26 Nov 2024 11:50:50 -0500 Subject: [PATCH 32/41] fix: merge artifact --- stacks-signer/src/signerdb.rs | 41 ----------------------------------- 1 file changed, 41 deletions(-) diff --git a/stacks-signer/src/signerdb.rs b/stacks-signer/src/signerdb.rs index 6c99aa5b64..ac76679f89 100644 --- a/stacks-signer/src/signerdb.rs +++ b/stacks-signer/src/signerdb.rs @@ -1512,45 +1512,4 @@ mod tests { .unwrap() .is_empty()); } - - #[test] - fn test_get_canonical_tip() { - let db_path = tmp_db_path(); - let mut db = SignerDb::new(db_path).expect("Failed to create signer db"); - - let (mut block_info_1, _block_proposal_1) = create_block_override(|b| { - b.block.header.miner_signature = MessageSignature([0x01; 65]); - b.block.header.chain_length = 1; - b.burn_height = 1; - }); - - let (mut block_info_2, _block_proposal_2) = create_block_override(|b| { - b.block.header.miner_signature = MessageSignature([0x02; 65]); - b.block.header.chain_length = 2; - b.burn_height = 2; - }); - - db.insert_block(&block_info_1) - .expect("Unable to insert block into db"); - db.insert_block(&block_info_2) - .expect("Unable to insert block into db"); - - assert!(db.get_canonical_tip().unwrap().is_none()); - - block_info_1 - .mark_globally_accepted() - .expect("Failed to mark block as globally accepted"); - db.insert_block(&block_info_1) - .expect("Unable to insert block into db"); - - assert_eq!(db.get_canonical_tip().unwrap().unwrap(), block_info_1); - - block_info_2 - .mark_globally_accepted() - .expect("Failed to mark block as globally accepted"); - db.insert_block(&block_info_2) - .expect("Unable to insert block into db"); - - assert_eq!(db.get_canonical_tip().unwrap().unwrap(), block_info_2); - } } From a4d378f623eec12b0b8a4b50912c2179407e99de Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 26 Nov 2024 11:53:03 -0500 Subject: [PATCH 33/41] fix: merge artifacts --- testnet/stacks-node/src/tests/signer/v0.rs | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 4574563efa..a0af4d58ac 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -67,7 +67,7 @@ use super::SignerTest; use crate::config::{EventKeyType, EventObserverConfig}; use crate::event_dispatcher::MinedNakamotoBlockEvent; use crate::nakamoto_node::miner::{ - TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_NO_TENURE_EXTEND, + TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, }; use crate::nakamoto_node::stackerdb_listener::TEST_IGNORE_SIGNERS; use crate::neon::Counters; @@ -960,10 +960,6 @@ fn forked_tenure_testing( sleep_ms(1000); info!("------------------------- Reached Epoch 3.0 -------------------------"); - // Disable tenure extend so that miners will not tenure extend when the - // test is checking for fork behavior. - TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); - let naka_conf = signer_test.running_nodes.conf.clone(); let burnchain = naka_conf.get_burnchain(); let sortdb = burnchain.open_sortition_db(true).unwrap(); @@ -1318,10 +1314,6 @@ fn bitcoind_forking_test() { let pre_epoch_3_nonce = get_account(&http_origin, &miner_address).nonce; let pre_fork_tenures = 10; - // Disable tenure extend so that miners will not tenure extend when the - // test is checking for fork behavior. - TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); - for i in 0..pre_fork_tenures { info!("Mining pre-fork tenure {} of {pre_fork_tenures}", i + 1); signer_test.mine_nakamoto_block(Duration::from_secs(30)); @@ -1969,10 +1961,6 @@ fn miner_forking() { "RL1 did not win the sortition" ); - // Disable tenure extend so that miners will not tenure extend when the - // test is checking for fork behavior. - TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); - info!( "------------------------- RL2 Wins Sortition With Outdated View -------------------------" ); @@ -4331,10 +4319,6 @@ fn partial_tenure_fork() { info!("------------------------- Reached Epoch 3.0 -------------------------"); - // Disable tenure extend so that miners will not tenure extend when the - // test is checking for fork behavior. - TEST_NO_TENURE_EXTEND.lock().unwrap().replace(true); - // due to the random nature of mining sortitions, the way this test is structured // is that we keep track of how many tenures each miner produced, and once enough sortitions // have been produced such that each miner has produced 3 tenures, we stop and check the From 379ce666e473b6ad90e6cd2030e4cb65193f5ae0 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 26 Nov 2024 14:26:02 -0500 Subject: [PATCH 34/41] chore: upgrade debug log to info --- testnet/stacks-node/src/nakamoto_node/miner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 6b8b3ff0b9..e048c9456a 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -1162,7 +1162,7 @@ impl BlockMinerThread { tenure_change_tx: None, }); } - debug!("Miner: Time-based tenure extend"; + info!("Miner: Time-based tenure extend"; "current_timestamp" => get_epoch_time_secs(), "tenure_extend_timestamp" => tenure_extend_timestamp, ); From cef0dd4d443b702dd19a354cdd7afe91b4f6a7eb Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Wed, 27 Nov 2024 08:41:51 -0500 Subject: [PATCH 35/41] refactor: move synchronization details into `StackerDBListenerComms` --- .../src/nakamoto_node/signer_coordinator.rs | 206 +++++++----------- .../src/nakamoto_node/stackerdb_listener.rs | 103 ++++++++- 2 files changed, 175 insertions(+), 134 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 1e0e7694fd..70c9aab190 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -13,12 +13,10 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -use std::collections::BTreeMap; use std::sync::atomic::AtomicBool; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; -use hashbrown::{HashMap, HashSet}; use libsigner::v0::messages::{MinerSlotID, SignerMessage as SignerMessageV0}; use libsigner::{BlockProposal, SignerSession, StackerDBSession}; use stacks::burnchains::Burnchain; @@ -31,34 +29,18 @@ use stacks::chainstate::stacks::Error as ChainstateError; use stacks::codec::StacksMessageCodec; use stacks::libstackerdb::StackerDBChunkData; use stacks::net::stackerdb::StackerDBs; -use stacks::types::chainstate::{StacksBlockId, StacksPrivateKey, StacksPublicKey}; +use stacks::types::chainstate::{StacksBlockId, StacksPrivateKey}; use stacks::util::hash::Sha512Trunc256Sum; use stacks::util::secp256k1::MessageSignature; use stacks::util_lib::boot::boot_code_id; +use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; -use crate::nakamoto_node::stackerdb_listener::{ - BlockStatus, StackerDBListener, TimestampInfo, EVENT_RECEIVER_POLL, -}; +use crate::nakamoto_node::stackerdb_listener::{StackerDBListener, EVENT_RECEIVER_POLL}; use crate::neon::Counters; use crate::Config; -/// Helper function to determine if signer threshold has been reached for a block -fn is_threshold_reached( - status: Option<&BlockStatus>, - weight_threshold: u32, - total_weight: u32, -) -> bool { - match status { - Some(status) => { - status.total_weight_signed >= weight_threshold - || status.total_reject_weight.saturating_add(weight_threshold) > total_weight - } - None => true, - } -} - /// The state of the signer database listener, used by the miner thread to /// interact with the signer listener. pub struct SignerCoordinator { @@ -72,15 +54,8 @@ pub struct SignerCoordinator { total_weight: u32, /// The weight threshold for block approval weight_threshold: u32, - /// Tracks signatures for blocks - /// - key: Sha512Trunc256Sum (signer signature hash) - /// - value: BlockStatus - blocks: Arc<(Mutex>, Condvar)>, - /// Tracks the timestamps from signers to decide when they should be - /// willing to accept time-based tenure extensions - /// - key: StacksPublicKey - /// - value: TimestampInfo - signer_idle_timestamps: Arc>>, + /// Interface to the StackerDB listener thread's data + stackerdb_comms: StackerDBListenerComms, /// Keep running flag for the signer DB listener thread keep_running: Arc, /// Handle for the signer DB listener thread @@ -125,8 +100,7 @@ impl SignerCoordinator { miners_session, total_weight: listener.total_weight, weight_threshold: listener.weight_threshold, - blocks: listener.blocks.clone(), - signer_idle_timestamps: listener.signer_idle_timestamps.clone(), + stackerdb_comms: listener.get_comms(), keep_running, listener_thread: None, }; @@ -226,18 +200,7 @@ impl SignerCoordinator { election_sortition: &ConsensusHash, ) -> Result, NakamotoNodeError> { // Add this block to the block status map. - // Create a scope to drop the lock on the block status map. - { - let (lock, _cvar) = &*self.blocks; - let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); - let block_status = BlockStatus { - responded_signers: HashSet::new(), - gathered_signatures: BTreeMap::new(), - total_weight_signed: 0, - total_reject_weight: 0, - }; - blocks.insert(block.header.signer_signature_hash(), block_status); - } + self.stackerdb_comms.insert_block(&block.header); let reward_cycle_id = burnchain .block_height_to_reward_cycle(burn_tip.block_height) @@ -307,79 +270,74 @@ impl SignerCoordinator { burn_tip: &BlockSnapshot, counters: &Counters, ) -> Result, NakamotoNodeError> { - let (lock, cvar) = &*self.blocks; - let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); - loop { - let (guard, timeout_result) = cvar - .wait_timeout_while(blocks, EVENT_RECEIVER_POLL, |map| { - !is_threshold_reached( - map.get(block_signer_sighash), - self.weight_threshold, - self.total_weight, - ) - }) - .expect("FATAL: failed to wait on block status cond var"); - blocks = guard; + let block_status = match self.stackerdb_comms.wait_for_block_status( + block_signer_sighash, + EVENT_RECEIVER_POLL, + |status| { + status.total_weight_signed < self.weight_threshold + && status + .total_reject_weight + .saturating_add(self.weight_threshold) + <= self.total_weight + }, + )? { + Some(status) => status, + None => { + // If we just received a timeout, we should check if the burnchain + // tip has changed or if we received this signed block already in + // the staging db. + debug!("SignerCoordinator: Timeout waiting for block signatures"); - // If we just received a timeout, we should check if the burnchain - // tip has changed or if we received this signed block already in - // the staging db. - if timeout_result.timed_out() { - // Look in the nakamoto staging db -- a block can only get stored there - // if it has enough signing weight to clear the threshold. - if let Ok(Some((stored_block, _sz))) = chain_state - .nakamoto_blocks_db() - .get_nakamoto_block(block_id) - .map_err(|e| { - warn!( - "Failed to query chainstate for block: {e:?}"; - "block_id" => %block_id, - "block_signer_sighash" => %block_signer_sighash, - ); - e - }) - { - debug!("SignCoordinator: Found signatures in relayed block"); - counters.bump_naka_signer_pushed_blocks(); - return Ok(stored_block.header.signer_signature); - } + // Look in the nakamoto staging db -- a block can only get stored there + // if it has enough signing weight to clear the threshold. + if let Ok(Some((stored_block, _sz))) = chain_state + .nakamoto_blocks_db() + .get_nakamoto_block(block_id) + .map_err(|e| { + warn!( + "Failed to query chainstate for block: {e:?}"; + "block_id" => %block_id, + "block_signer_sighash" => %block_signer_sighash, + ); + e + }) + { + debug!("SignCoordinator: Found signatures in relayed block"); + counters.bump_naka_signer_pushed_blocks(); + return Ok(stored_block.header.signer_signature); + } - if Self::check_burn_tip_changed(sortdb, burn_tip) { - debug!("SignCoordinator: Exiting due to new burnchain tip"); - return Err(NakamotoNodeError::BurnchainTipChanged); - } - } - // Else, we have received enough signatures to proceed - else { - let block_status = blocks.get(block_signer_sighash).ok_or_else(|| { - NakamotoNodeError::SigningCoordinatorFailure( - "Block unexpectedly missing from map".into(), - ) - })?; + if Self::check_burn_tip_changed(sortdb, burn_tip) { + debug!("SignCoordinator: Exiting due to new burnchain tip"); + return Err(NakamotoNodeError::BurnchainTipChanged); + } - if block_status - .total_reject_weight - .saturating_add(self.weight_threshold) - > self.total_weight - { - info!( - "{}/{} signers vote to reject block", - block_status.total_reject_weight, self.total_weight; - "block_signer_sighash" => %block_signer_sighash, - ); - counters.bump_naka_rejected_blocks(); - return Err(NakamotoNodeError::SignersRejected); - } else if block_status.total_weight_signed >= self.weight_threshold { - info!("Received enough signatures, block accepted"; - "block_signer_sighash" => %block_signer_sighash, - ); - return Ok(block_status.gathered_signatures.values().cloned().collect()); - } else { - return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Unblocked without reaching the threshold".into(), - )); + continue; } + }; + + if block_status + .total_reject_weight + .saturating_add(self.weight_threshold) + > self.total_weight + { + info!( + "{}/{} signers vote to reject block", + block_status.total_reject_weight, self.total_weight; + "block_signer_sighash" => %block_signer_sighash, + ); + counters.bump_naka_rejected_blocks(); + return Err(NakamotoNodeError::SignersRejected); + } else if block_status.total_weight_signed >= self.weight_threshold { + info!("Received enough signatures, block accepted"; + "block_signer_sighash" => %block_signer_sighash, + ); + return Ok(block_status.gathered_signatures.values().cloned().collect()); + } else { + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "Unblocked without reaching the threshold".into(), + )); } } } @@ -387,26 +345,8 @@ impl SignerCoordinator { /// Get the timestamp at which at least 70% of the signing power should be /// willing to accept a time-based tenure extension. pub fn get_tenure_extend_timestamp(&self) -> u64 { - let signer_idle_timestamps = self - .signer_idle_timestamps - .lock() - .expect("FATAL: failed to lock signer idle timestamps"); - debug!("SignerCoordinator: signer_idle_timestamps: {signer_idle_timestamps:?}"); - let mut idle_timestamps = signer_idle_timestamps.values().collect::>(); - idle_timestamps.sort_by_key(|info| info.timestamp); - let mut weight_sum = 0; - for info in idle_timestamps { - weight_sum += info.weight; - if weight_sum >= self.weight_threshold { - info!("SignerCoordinator: 70% threshold reached"); - return info.timestamp; - } - } - - // We don't have enough information to reach a 70% threshold at any - // time, so return u64::MAX to indicate that we should not extend the - // tenure. - u64::MAX + self.stackerdb_comms + .get_tenure_extend_timestamp(self.weight_threshold) } /// Check if the tenure needs to change diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 4f213abf6c..a5036efa6e 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -46,7 +46,7 @@ pub static TEST_IGNORE_SIGNERS: std::sync::Mutex> = std::sync::Mute pub static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500); #[derive(Debug, Clone)] -pub(crate) struct BlockStatus { +pub struct BlockStatus { pub responded_signers: HashSet, pub gathered_signatures: BTreeMap, pub total_weight_signed: u32, @@ -89,6 +89,19 @@ pub struct StackerDBListener { pub(crate) signer_idle_timestamps: Arc>>, } +/// Interface for other threads to retrieve info from the StackerDBListener +pub struct StackerDBListenerComms { + /// Tracks signatures for blocks + /// - key: Sha512Trunc256Sum (signer signature hash) + /// - value: BlockStatus + blocks: Arc<(Mutex>, Condvar)>, + /// Tracks the timestamps from signers to decide when they should be + /// willing to accept time-based tenure extensions + /// - key: StacksPublicKey + /// - value: TimestampInfo + signer_idle_timestamps: Arc>>, +} + impl StackerDBListener { pub fn new( stackerdb_channel: Arc>, @@ -153,6 +166,13 @@ impl StackerDBListener { }) } + pub fn get_comms(&self) -> StackerDBListenerComms { + StackerDBListenerComms { + blocks: self.blocks.clone(), + signer_idle_timestamps: self.signer_idle_timestamps.clone(), + } + } + /// Run the StackerDB listener. pub fn run(&mut self) -> Result<(), NakamotoNodeError> { info!("StackerDBListener: Starting up"); @@ -442,3 +462,84 @@ impl Drop for StackerDBListener { )); } } + +impl StackerDBListenerComms { + /// Insert a block into the block status map with initial values. + pub fn insert_block(&self, block: &NakamotoBlockHeader) { + let (lock, _cvar) = &*self.blocks; + let mut blocks = lock.lock().expect("FATAL: failed to lock block status"); + let block_status = BlockStatus { + responded_signers: HashSet::new(), + gathered_signatures: BTreeMap::new(), + total_weight_signed: 0, + total_reject_weight: 0, + }; + blocks.insert(block.signer_signature_hash(), block_status); + } + + /// Get the status for `block` from the Stacker DB listener. + /// If the block is not found in the map, return an error. + /// If the block is found, call `condition` to check if the block status + /// satisfies the condition. + /// If the condition is satisfied, return the block status as + /// `Ok(Some(status))`. + /// If the condition is not satisfied, wait for it to be satisfied. + /// If the timeout is reached, return `Ok(None)`. + pub fn wait_for_block_status( + &self, + block_signer_sighash: &Sha512Trunc256Sum, + timeout: Duration, + condition: F, + ) -> Result, NakamotoNodeError> + where + F: Fn(&BlockStatus) -> bool, + { + let (lock, cvar) = &*self.blocks; + let blocks = lock.lock().expect("FATAL: failed to lock block status"); + + let (guard, timeout_result) = cvar + .wait_timeout_while(blocks, timeout, |map| { + let Some(status) = map.get(block_signer_sighash) else { + return true; + }; + condition(status) + }) + .expect("FATAL: failed to wait on block status cond var"); + + // If we timed out, return None + if timeout_result.timed_out() { + return Ok(None); + } + match guard.get(block_signer_sighash) { + Some(status) => Ok(Some(status.clone())), + None => Err(NakamotoNodeError::SigningCoordinatorFailure( + "Block not found in status map".into(), + )), + } + } + + /// Get the timestamp at which at least 70% of the signing power should be + /// willing to accept a time-based tenure extension. + pub fn get_tenure_extend_timestamp(&self, weight_threshold: u32) -> u64 { + let signer_idle_timestamps = self + .signer_idle_timestamps + .lock() + .expect("FATAL: failed to lock signer idle timestamps"); + debug!("SignerCoordinator: signer_idle_timestamps: {signer_idle_timestamps:?}"); + let mut idle_timestamps = signer_idle_timestamps.values().collect::>(); + idle_timestamps.sort_by_key(|info| info.timestamp); + let mut weight_sum = 0; + for info in idle_timestamps { + weight_sum += info.weight; + if weight_sum >= weight_threshold { + info!("SignerCoordinator: 70% threshold reached"); + return info.timestamp; + } + } + + // We don't have enough information to reach a 70% threshold at any + // time, so return u64::MAX to indicate that we should not extend the + // tenure. + u64::MAX + } +} From ae9383a47c5ea113cb6cfc14725f10719e2f5461 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Mon, 2 Dec 2024 10:56:11 -0500 Subject: [PATCH 36/41] chore: downgrade log to debug --- testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index a5036efa6e..853abf99c1 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -532,7 +532,9 @@ impl StackerDBListenerComms { for info in idle_timestamps { weight_sum += info.weight; if weight_sum >= weight_threshold { - info!("SignerCoordinator: 70% threshold reached"); + debug!("SignerCoordinator: 70% threshold reached for tenure extension timestamp"; + "timestamp" => info.timestamp, + ); return info.timestamp; } } From 61911eff8522514ad015b9decbe97bd264c8ad00 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 3 Dec 2024 13:41:55 -0500 Subject: [PATCH 37/41] chore: add debug log for stackerdb listener timeout --- testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 853abf99c1..2da3d3da60 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -199,6 +199,7 @@ impl StackerDBListener { let event = match receiver.recv_timeout(EVENT_RECEIVER_POLL) { Ok(event) => event, Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { + debug!("StackerDBListener: No StackerDB event received. Checking flags and polling again."); continue; } Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => { From 6e4bcc81626304a3e0a7de5e3e04f0c22d0fe389 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 3 Dec 2024 15:19:35 -0500 Subject: [PATCH 38/41] refactor: pass `reward_set` instead of re-loading it --- stackslib/src/chainstate/nakamoto/mod.rs | 4 ++-- testnet/stacks-node/src/nakamoto_node/miner.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/stackslib/src/chainstate/nakamoto/mod.rs b/stackslib/src/chainstate/nakamoto/mod.rs index ca37e30121..1e22076b4d 100644 --- a/stackslib/src/chainstate/nakamoto/mod.rs +++ b/stackslib/src/chainstate/nakamoto/mod.rs @@ -2452,7 +2452,7 @@ impl NakamotoChainState { db_handle: &mut SortitionHandleConn, staging_db_tx: &NakamotoStagingBlocksTx, headers_conn: &Connection, - reward_set: RewardSet, + reward_set: &RewardSet, obtain_method: NakamotoBlockObtainMethod, ) -> Result { test_debug!("Consider Nakamoto block {}", &block.block_id()); @@ -2522,7 +2522,7 @@ impl NakamotoChainState { return Ok(false); }; - let signing_weight = match block.header.verify_signer_signatures(&reward_set) { + let signing_weight = match block.header.verify_signer_signatures(reward_set) { Ok(x) => x, Err(e) => { warn!("Received block, but the signer signatures are invalid"; diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index e048c9456a..d0dbbabb47 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -328,6 +328,7 @@ impl BlockMinerThread { &sortdb, &mut stackerdbs, &mut last_block_rejected, + &reward_set, ) { // Before stopping this miner, shutdown the coordinator thread. coordinator.shutdown(); @@ -344,6 +345,7 @@ impl BlockMinerThread { sortdb: &SortitionDB, stackerdbs: &mut StackerDBs, last_block_rejected: &mut bool, + reward_set: &RewardSet, ) -> Result<(), NakamotoNodeError> { #[cfg(test)] if *TEST_MINE_STALL.lock().unwrap() == Some(true) { @@ -471,8 +473,6 @@ impl BlockMinerThread { }; *last_block_rejected = false; - let reward_set = self.load_signer_set()?; - new_block.header.signer_signature = signer_signature; if let Err(e) = self.broadcast(new_block.clone(), reward_set, &stackerdbs) { warn!("Error accepting own block: {e:?}. Will try mining again."); @@ -612,7 +612,7 @@ impl BlockMinerThread { sort_db: &SortitionDB, chain_state: &mut StacksChainState, block: &NakamotoBlock, - reward_set: RewardSet, + reward_set: &RewardSet, ) -> Result<(), ChainstateError> { if Self::fault_injection_skip_block_broadcast() { warn!( @@ -669,7 +669,7 @@ impl BlockMinerThread { fn broadcast( &mut self, block: NakamotoBlock, - reward_set: RewardSet, + reward_set: &RewardSet, stackerdbs: &StackerDBs, ) -> Result<(), NakamotoNodeError> { let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) From f2ce342269f0584bc4540f2a0d8ac13cce939038 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 3 Dec 2024 15:23:08 -0500 Subject: [PATCH 39/41] chore: improve comment on `mined_blocks` --- testnet/stacks-node/src/nakamoto_node/miner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index d0dbbabb47..f649046096 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -138,7 +138,7 @@ pub struct BlockMinerThread { burnchain: Burnchain, /// Last block mined last_block_mined: Option, - /// Number of blocks mined in this tenure + /// Number of blocks mined since a tenure change/extend mined_blocks: u64, /// Copy of the node's registered VRF key registered_key: RegisteredKey, From fdb05d1c7e0ba2ea11ec98b147e7e1e5c1d3b825 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 3 Dec 2024 16:48:14 -0500 Subject: [PATCH 40/41] fix: missing change for previous refactor --- stackslib/src/net/relay.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stackslib/src/net/relay.rs b/stackslib/src/net/relay.rs index cb7d310321..b93171916c 100644 --- a/stackslib/src/net/relay.rs +++ b/stackslib/src/net/relay.rs @@ -1077,7 +1077,7 @@ impl Relayer { sort_handle, &staging_db_tx, headers_conn, - reward_set, + &reward_set, obtained_method, )?; staging_db_tx.commit()?; From de1881b5f48810a4db0b608c87036306185c18b4 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Tue, 3 Dec 2024 16:57:03 -0500 Subject: [PATCH 41/41] refactor: use `TestFlag` --- .../src/nakamoto_node/stackerdb_listener.rs | 17 ++++++++++------- testnet/stacks-node/src/tests/signer/v0.rs | 10 +++++----- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs index 2da3d3da60..f9ada97e57 100644 --- a/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs +++ b/testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs @@ -20,6 +20,8 @@ use std::sync::{Arc, Condvar, Mutex}; use std::time::Duration; use hashbrown::{HashMap, HashSet}; +#[cfg(test)] +use lazy_static::lazy_static; use libsigner::v0::messages::{BlockAccepted, BlockResponse, SignerMessage as SignerMessageV0}; use libsigner::SignerEvent; use stacks::burnchains::Burnchain; @@ -35,11 +37,15 @@ use stacks::util::secp256k1::MessageSignature; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; +#[cfg(test)] +use crate::neon::TestFlag; -/// Fault injection flag to prevent the miner from seeing enough signer signatures. -/// Used to test that the signers will broadcast a block if it gets enough signatures #[cfg(test)] -pub static TEST_IGNORE_SIGNERS: std::sync::Mutex> = std::sync::Mutex::new(None); +lazy_static! { + /// Fault injection flag to prevent the miner from seeing enough signer signatures. + /// Used to test that the signers will broadcast a block if it gets enough signatures + pub static ref TEST_IGNORE_SIGNERS: TestFlag = TestFlag::default(); +} /// How long should the coordinator poll on the event receiver before /// waking up to check timeouts? @@ -440,10 +446,7 @@ impl StackerDBListener { /// Do we ignore signer signatures? #[cfg(test)] fn fault_injection_ignore_signatures() -> bool { - if *TEST_IGNORE_SIGNERS.lock().unwrap() == Some(true) { - return true; - } - false + TEST_IGNORE_SIGNERS.get() } #[cfg(not(test))] diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index a0af4d58ac..0051902852 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -2513,7 +2513,7 @@ fn signers_broadcast_signed_blocks() { }) .expect("Timed out waiting for first nakamoto block to be mined"); - TEST_IGNORE_SIGNERS.lock().unwrap().replace(true); + TEST_IGNORE_SIGNERS.set(true); let blocks_before = signer_test .running_nodes .nakamoto_blocks_mined @@ -2798,7 +2798,7 @@ fn empty_sortition_before_approval() { let stacks_height_before = info.stacks_tip_height; info!("Forcing miner to ignore signatures for next block"); - TEST_IGNORE_SIGNERS.lock().unwrap().replace(true); + TEST_IGNORE_SIGNERS.set(true); info!("Pausing block commits to trigger an empty sortition."); signer_test @@ -2851,7 +2851,7 @@ fn empty_sortition_before_approval() { .replace(false); info!("Stop ignoring signers and wait for the tip to advance"); - TEST_IGNORE_SIGNERS.lock().unwrap().replace(false); + TEST_IGNORE_SIGNERS.set(false); wait_for(60, || { let info = get_chain_info(&signer_test.running_nodes.conf); @@ -5608,7 +5608,7 @@ fn miner_recovers_when_broadcast_block_delay_across_tenures_occurs() { // broadcasted to the miner so it can end its tenure before block confirmation obtained // Clear the stackerdb chunks info!("Forcing miner to ignore block responses for block N+1"); - TEST_IGNORE_SIGNERS.lock().unwrap().replace(true); + TEST_IGNORE_SIGNERS.set(true); info!("Delaying signer block N+1 broadcasting to the miner"); TEST_PAUSE_BLOCK_BROADCAST.lock().unwrap().replace(true); test_observer::clear(); @@ -5735,7 +5735,7 @@ fn miner_recovers_when_broadcast_block_delay_across_tenures_occurs() { .expect("Timed out waiting for block proposal of N+1' block proposal"); info!("Allowing miner to accept block responses again. "); - TEST_IGNORE_SIGNERS.lock().unwrap().replace(false); + TEST_IGNORE_SIGNERS.set(false); info!("Allowing signers to broadcast block N+1 to the miner"); TEST_PAUSE_BLOCK_BROADCAST.lock().unwrap().replace(false);