-
Notifications
You must be signed in to change notification settings - Fork 97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(p2p): ensure time synchronization in the network #2255
Changes from all commits
4343c0e
3b69689
5acfe8d
82ae6d2
5771790
f42e706
b693d8c
92af285
86eb068
c1a6eaf
e5c284d
130af67
422626c
3859afc
1c7229c
0691aba
3692673
8487ab1
faeee43
1f2950e
9ad57a7
ff1a52e
3b18529
7418211
fd64a72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ use futures::{channel::oneshot, | |
use futures_rustls::rustls; | ||
use futures_ticker::Ticker; | ||
use instant::Duration; | ||
use lazy_static::lazy_static; | ||
use libp2p::core::transport::Boxed as BoxedTransport; | ||
use libp2p::core::{ConnectedPoint, Endpoint}; | ||
use libp2p::floodsub::{Floodsub, FloodsubEvent, Topic as FloodsubTopic}; | ||
|
@@ -23,16 +24,20 @@ use std::collections::HashMap; | |
use std::hash::{Hash, Hasher}; | ||
use std::iter; | ||
use std::net::IpAddr; | ||
use std::sync::{Mutex, MutexGuard}; | ||
use std::task::{Context, Poll}; | ||
use timed_map::{MapKind, StdClock, TimedMap}; | ||
|
||
use super::peers_exchange::{PeerAddresses, PeersExchange, PeersExchangeRequest, PeersExchangeResponse}; | ||
use super::ping::AdexPing; | ||
use super::request_response::{build_request_response_behaviour, PeerRequest, PeerResponse, RequestResponseBehaviour, | ||
RequestResponseSender}; | ||
use crate::application::request_response::network_info::NetworkInfoRequest; | ||
use crate::application::request_response::P2PRequest; | ||
use crate::network::{get_all_network_seednodes, DEFAULT_NETID}; | ||
use crate::relay_address::{RelayAddress, RelayAddressError}; | ||
use crate::swarm_runtime::SwarmRuntime; | ||
use crate::{NetworkInfo, NetworkPorts, RequestResponseBehaviourEvent}; | ||
use crate::{decode_message, encode_message, NetworkInfo, NetworkPorts, RequestResponseBehaviourEvent}; | ||
|
||
pub use libp2p::gossipsub::{Behaviour as Gossipsub, IdentTopic, MessageAuthenticity, MessageId, Topic, TopicHash}; | ||
pub use libp2p::gossipsub::{ConfigBuilder as GossipsubConfigBuilder, Event as GossipsubEvent, | ||
|
@@ -50,6 +55,21 @@ const ANNOUNCE_INTERVAL: Duration = Duration::from_secs(600); | |
const ANNOUNCE_INITIAL_DELAY: Duration = Duration::from_secs(60); | ||
const CHANNEL_BUF_SIZE: usize = 1024 * 8; | ||
|
||
/// Used in time validation logic for each peer which runs immediately after the | ||
/// `ConnectionEstablished` event. | ||
/// | ||
/// Be careful when updating this value, we have some defaults (like for swaps) | ||
/// depending on this. | ||
pub const MAX_TIME_GAP_FOR_CONNECTED_PEER: u64 = 20; | ||
|
||
/// Used for storing peers in [`RECENTLY_DIALED_PEERS`]. | ||
const DIAL_RETRY_DELAY: Duration = Duration::from_secs(60 * 5); | ||
|
||
lazy_static! { | ||
/// Tracks recently dialed peers to avoid repeated connection attempts. | ||
static ref RECENTLY_DIALED_PEERS: Mutex<TimedMap<StdClock, Multiaddr, ()>> = Mutex::new(TimedMap::new_with_map_kind(MapKind::FxHashMap)); | ||
} | ||
|
||
pub const DEPRECATED_NETID_LIST: &[u16] = &[ | ||
7777, // TODO: keep it inaccessible until Q2 of 2024. | ||
]; | ||
|
@@ -162,6 +182,24 @@ pub enum AdexBehaviourCmd { | |
}, | ||
} | ||
|
||
/// Determines if a dial attempt to the remote should be made. | ||
/// | ||
/// Returns `false` if a dial attempt to the given address has already been made, | ||
/// in which case the caller must skip the dial attempt. | ||
fn check_and_mark_dialed( | ||
recently_dialed_peers: &mut MutexGuard<TimedMap<StdClock, Multiaddr, ()>>, | ||
addr: &Multiaddr, | ||
) -> bool { | ||
if recently_dialed_peers.get(addr).is_some() { | ||
info!("Connection attempt was already made recently to '{addr}'."); | ||
return false; | ||
} | ||
|
||
recently_dialed_peers.insert_expirable_unchecked(addr.clone(), (), DIAL_RETRY_DELAY); | ||
|
||
true | ||
} | ||
|
||
/// Returns info about directly connected peers. | ||
pub async fn get_directly_connected_peers(mut cmd_tx: AdexCmdTx) -> HashMap<String, Vec<String>> { | ||
let (result_tx, rx) = oneshot::channel(); | ||
|
@@ -199,6 +237,46 @@ pub async fn get_relay_mesh(mut cmd_tx: AdexCmdTx) -> Vec<String> { | |
rx.await.expect("Tx should be present") | ||
} | ||
|
||
async fn validate_peer_time(peer: PeerId, mut response_tx: Sender<Option<PeerId>>, rp_sender: RequestResponseSender) { | ||
let request = P2PRequest::NetworkInfo(NetworkInfoRequest::GetPeerUtcTimestamp); | ||
let encoded_request = encode_message(&request) | ||
.expect("Static type `PeerInfoRequest::GetPeerUtcTimestamp` should never fail in serialization."); | ||
|
||
match request_one_peer(peer, encoded_request, rp_sender).await { | ||
PeerResponse::Ok { res } => { | ||
if let Ok(timestamp) = decode_message::<u64>(&res) { | ||
mariocynicys marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let now = common::get_utc_timestamp(); | ||
let now: u64 = now | ||
.try_into() | ||
.unwrap_or_else(|_| panic!("`common::get_utc_timestamp` returned invalid data: {}", now)); | ||
|
||
let diff = now.abs_diff(timestamp); | ||
|
||
// If time diff is in the acceptable gap, end the validation here. | ||
if diff <= MAX_TIME_GAP_FOR_CONNECTED_PEER { | ||
debug!( | ||
"Peer '{peer}' is within the acceptable time gap ({MAX_TIME_GAP_FOR_CONNECTED_PEER} seconds); time difference is {diff} seconds." | ||
); | ||
response_tx.send(None).await.unwrap(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this send in redundant. what about not sending anything in here, as the other end of the end of the channel doesn't really use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you don't send anything, it will block the request/process until timeout. The channel plays a request-response style communication here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please take a second look at this. the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, thought was awaiting on result of this channel |
||
return; | ||
} | ||
}; | ||
}, | ||
other => { | ||
error!("Unexpected response `{other:?}` from peer `{peer}`"); | ||
// TODO: Ideally, we should send `Some(peer)` to end the connection, | ||
// but we don't want to cause a breaking change yet. | ||
response_tx.send(None).await.unwrap(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same for this None |
||
return; | ||
}, | ||
} | ||
|
||
// If the function reaches this point, this means validation has failed. | ||
// Send the peer ID to disconnect from it. | ||
error!("Failed to validate the time for peer `{peer}`; disconnecting."); | ||
response_tx.send(Some(peer)).await.unwrap(); | ||
} | ||
|
||
async fn request_one_peer(peer: PeerId, req: Vec<u8>, mut request_response_tx: RequestResponseSender) -> PeerResponse { | ||
// Use the internal receiver to receive a response to this request. | ||
let (internal_response_tx, internal_response_rx) = oneshot::channel(); | ||
|
@@ -711,19 +789,26 @@ fn start_gossipsub( | |
_ => (), | ||
} | ||
|
||
let mut recently_dialed_peers = RECENTLY_DIALED_PEERS.lock().unwrap(); | ||
for relay in bootstrap.choose_multiple(&mut rng, mesh_n) { | ||
if !check_and_mark_dialed(&mut recently_dialed_peers, relay) { | ||
continue; | ||
} | ||
|
||
match libp2p::Swarm::dial(&mut swarm, relay.clone()) { | ||
Ok(_) => info!("Dialed {}", relay), | ||
Err(e) => error!("Dial {:?} failed: {:?}", relay, e), | ||
} | ||
} | ||
drop(recently_dialed_peers); | ||
|
||
let mut check_connected_relays_interval = | ||
Ticker::new_with_next(CONNECTED_RELAYS_CHECK_INTERVAL, CONNECTED_RELAYS_CHECK_INTERVAL); | ||
|
||
let mut announce_interval = Ticker::new_with_next(ANNOUNCE_INTERVAL, ANNOUNCE_INITIAL_DELAY); | ||
let mut listening = false; | ||
|
||
let (timestamp_tx, mut timestamp_rx) = futures::channel::mpsc::channel(mesh_n_high); | ||
let polling_fut = poll_fn(move |cx: &mut Context| { | ||
loop { | ||
match swarm.behaviour_mut().cmd_rx.poll_next_unpin(cx) { | ||
|
@@ -733,11 +818,27 @@ fn start_gossipsub( | |
} | ||
} | ||
|
||
while let Poll::Ready(Some(Some(peer_id))) = timestamp_rx.poll_next_unpin(cx) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and we can make this channel transmit |
||
if swarm.disconnect_peer_id(peer_id).is_err() { | ||
error!("Disconnection from `{peer_id}` failed unexpectedly, which should never happen."); | ||
} | ||
} | ||
|
||
loop { | ||
match swarm.poll_next_unpin(cx) { | ||
Poll::Ready(Some(event)) => { | ||
debug!("Swarm event {:?}", event); | ||
|
||
if let SwarmEvent::ConnectionEstablished { peer_id, .. } = &event { | ||
info!("Validating time data for peer `{peer_id}`."); | ||
let future = validate_peer_time( | ||
*peer_id, | ||
timestamp_tx.clone(), | ||
swarm.behaviour().core.request_response.sender(), | ||
); | ||
swarm.behaviour().spawn(future); | ||
} | ||
|
||
if let SwarmEvent::Behaviour(event) = event { | ||
if swarm.behaviour_mut().netid != DEFAULT_NETID { | ||
if let AdexBehaviourEvent::Floodsub(FloodsubEvent::Message(message)) = &event { | ||
|
@@ -798,19 +899,29 @@ fn maintain_connection_to_relays(swarm: &mut AtomicDexSwarm, bootstrap_addresses | |
|
||
let mut rng = rand::thread_rng(); | ||
if connected_relays.len() < mesh_n_low { | ||
let mut recently_dialed_peers = RECENTLY_DIALED_PEERS.lock().unwrap(); | ||
let to_connect_num = mesh_n - connected_relays.len(); | ||
let to_connect = swarm | ||
.behaviour_mut() | ||
.core | ||
.peers_exchange | ||
.get_random_peers(to_connect_num, |peer| !connected_relays.contains(peer)); | ||
let to_connect = | ||
swarm | ||
.behaviour_mut() | ||
.core | ||
.peers_exchange | ||
.get_random_peers(to_connect_num, |peer, addresses| { | ||
!connected_relays.contains(peer) | ||
&& addresses | ||
.iter() | ||
.any(|addr| check_and_mark_dialed(&mut recently_dialed_peers, addr)) | ||
}); | ||
|
||
// choose some random bootstrap addresses to connect if peers exchange returned not enough peers | ||
if to_connect.len() < to_connect_num { | ||
let connect_bootstrap_num = to_connect_num - to_connect.len(); | ||
for addr in bootstrap_addresses | ||
.iter() | ||
.filter(|addr| !swarm.behaviour().core.gossipsub.is_connected_to_addr(addr)) | ||
.filter(|addr| { | ||
!swarm.behaviour().core.gossipsub.is_connected_to_addr(addr) | ||
&& check_and_mark_dialed(&mut recently_dialed_peers, addr) | ||
}) | ||
.collect::<Vec<_>>() | ||
.choose_multiple(&mut rng, connect_bootstrap_num) | ||
{ | ||
|
@@ -824,11 +935,13 @@ fn maintain_connection_to_relays(swarm: &mut AtomicDexSwarm, bootstrap_addresses | |
if swarm.behaviour().core.gossipsub.is_connected_to_addr(&addr) { | ||
continue; | ||
} | ||
mariocynicys marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if let Err(e) = libp2p::Swarm::dial(swarm, addr.clone()) { | ||
error!("Peer {} address {} dial error {}", peer, addr, e); | ||
} | ||
} | ||
} | ||
drop(recently_dialed_peers); | ||
} | ||
|
||
if connected_relays.len() > max_n { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I would write this
5 * 60
:)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
explanation: it's just semantically clearer to put the conversion factors at the end (and ordered, so for 2 days converted to seconds u would use
2 * 24 * 60 * 60
)