Skip to content

Commit

Permalink
add retrying, test against more AZs
Browse files Browse the repository at this point in the history
  • Loading branch information
mattstam committed Oct 27, 2024
1 parent d06f503 commit e0da534
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 11 deletions.
15 changes: 15 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/sdk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ tonic = { version = "0.12", features = ["tls", "tls-roots"], optional = true }
alloy-signer = { version = "0.3.6", optional = true }
alloy-signer-local = { version = "0.3.6", optional = true }
alloy-primitives = { version = "0.8.7", optional = true }
backoff = { version = "0.4", features = ["tokio"], optional = true }

[features]
default = ["network"]
Expand Down Expand Up @@ -79,6 +80,7 @@ network-v2 = [
"dep:twirp",
"dep:reqwest-middleware",
"dep:tonic",
"dep:backoff",
]
cuda = ["sp1-cuda"]

Expand Down
56 changes: 45 additions & 11 deletions crates/sdk/src/network-v2/prover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::{
Prover, SP1Context, SP1ProofKind, SP1ProofWithPublicValues, SP1ProvingKey, SP1VerifyingKey,
};
use anyhow::Result;
use backoff::{future::retry, ExponentialBackoff};
use serde::de::DeserializeOwned;
use sp1_core_machine::io::SP1Stdin;
use sp1_prover::{components::DefaultProverComponents, SP1Prover, SP1_CIRCUIT_VERSION};
Expand Down Expand Up @@ -113,28 +114,61 @@ impl NetworkProver {
) -> Result<P> {
let mut is_assigned = false;
let start_time = Instant::now();

// Configure retries with exponential backoff.
let backoff = ExponentialBackoff {
initial_interval: Duration::from_secs(1),
max_interval: Duration::from_secs(30),
max_elapsed_time: timeout,
..Default::default()
};

loop {
if let Some(timeout) = timeout {
if start_time.elapsed() > timeout {
return Err(anyhow::anyhow!("Proof request timed out."));
}
}

let (status, maybe_proof) =
self.client.get_proof_request_status::<P>(request_id).await?;

match status.proof_status() {
ProofStatus::Fulfilled => {
return Ok(maybe_proof.unwrap());
let status_result = retry(backoff.clone(), || async {
match self.client.get_proof_request_status::<P>(request_id).await {
Ok(result) => Ok(result),
Err(e) => {
if let Some(status) = e.downcast_ref::<tonic::Status>() {
if status.code() == tonic::Code::Unavailable {
// Retry on unavailable.
Err(backoff::Error::transient(e))
} else {
// Don't retry on other errors.
Err(backoff::Error::permanent(e))
}
} else {
// Don't retry on non-tonic errors.
Err(backoff::Error::permanent(e))
}
}
}
ProofStatus::Assigned => {
if !is_assigned {
log::info!("Proof request assigned, proving...");
is_assigned = true;
})
.await;

match status_result {
Ok((status, maybe_proof)) => match status.proof_status() {
ProofStatus::Fulfilled => {
return Ok(maybe_proof.unwrap());
}
ProofStatus::Assigned => {
if !is_assigned {
log::info!("Proof request assigned, proving...");
is_assigned = true;
}
}
_ => {}
},
Err(e) => {
log::warn!("Retrying get proof status due to error: {}", e);
}
_ => {}
}

sleep(Duration::from_secs(2)).await;
}
}
Expand Down

0 comments on commit e0da534

Please sign in to comment.