diff --git a/Cargo.lock b/Cargo.lock index 5b8a6cb3db..78c87ae95b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -984,6 +984,20 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973" +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "futures-core", + "getrandom 0.2.15", + "instant", + "pin-project-lite", + "rand 0.8.5", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -6517,6 +6531,7 @@ dependencies = [ "alloy-sol-types 0.7.7", "anyhow", "async-trait", + "backoff", "bincode", "cfg-if", "dirs", diff --git a/crates/sdk/Cargo.toml b/crates/sdk/Cargo.toml index a9a997d472..ffedbe26da 100644 --- a/crates/sdk/Cargo.toml +++ b/crates/sdk/Cargo.toml @@ -51,6 +51,7 @@ tonic = { version = "0.12", features = ["tls", "tls-roots"], optional = true } alloy-signer = { version = "0.3.6", optional = true } alloy-signer-local = { version = "0.3.6", optional = true } alloy-primitives = { version = "0.8.7", optional = true } +backoff = { version = "0.4", features = ["tokio"], optional = true } [features] default = ["network"] @@ -79,6 +80,7 @@ network-v2 = [ "dep:twirp", "dep:reqwest-middleware", "dep:tonic", + "dep:backoff", ] cuda = ["sp1-cuda"] diff --git a/crates/sdk/src/network-v2/prover.rs b/crates/sdk/src/network-v2/prover.rs index 5a568bfe57..6f5c4eb700 100644 --- a/crates/sdk/src/network-v2/prover.rs +++ b/crates/sdk/src/network-v2/prover.rs @@ -9,6 +9,7 @@ use crate::{ Prover, SP1Context, SP1ProofKind, SP1ProofWithPublicValues, SP1ProvingKey, SP1VerifyingKey, }; use anyhow::Result; +use backoff::{future::retry, ExponentialBackoff}; use serde::de::DeserializeOwned; use sp1_core_machine::io::SP1Stdin; use sp1_prover::{components::DefaultProverComponents, SP1Prover, SP1_CIRCUIT_VERSION}; @@ -113,6 +114,15 @@ impl NetworkProver { ) -> Result

{ let mut is_assigned = false; let start_time = Instant::now(); + + // Configure retries with exponential backoff. + let backoff = ExponentialBackoff { + initial_interval: Duration::from_secs(1), + max_interval: Duration::from_secs(30), + max_elapsed_time: timeout, + ..Default::default() + }; + loop { if let Some(timeout) = timeout { if start_time.elapsed() > timeout { @@ -120,21 +130,45 @@ impl NetworkProver { } } - let (status, maybe_proof) = - self.client.get_proof_request_status::

(request_id).await?; - - match status.proof_status() { - ProofStatus::Fulfilled => { - return Ok(maybe_proof.unwrap()); + let status_result = retry(backoff.clone(), || async { + match self.client.get_proof_request_status::

(request_id).await { + Ok(result) => Ok(result), + Err(e) => { + if let Some(status) = e.downcast_ref::() { + if status.code() == tonic::Code::Unavailable { + // Retry on unavailable. + Err(backoff::Error::transient(e)) + } else { + // Don't retry on other errors. + Err(backoff::Error::permanent(e)) + } + } else { + // Don't retry on non-tonic errors. + Err(backoff::Error::permanent(e)) + } + } } - ProofStatus::Assigned => { - if !is_assigned { - log::info!("Proof request assigned, proving..."); - is_assigned = true; + }) + .await; + + match status_result { + Ok((status, maybe_proof)) => match status.proof_status() { + ProofStatus::Fulfilled => { + return Ok(maybe_proof.unwrap()); } + ProofStatus::Assigned => { + if !is_assigned { + log::info!("Proof request assigned, proving..."); + is_assigned = true; + } + } + _ => {} + }, + Err(e) => { + log::warn!("Retrying get proof status due to error: {}", e); } - _ => {} } + sleep(Duration::from_secs(2)).await; } }