Skip to content

Commit

Permalink
feat(proposer): more configurability for witness gen (#303)
Browse files Browse the repository at this point in the history
* feat(proposer): op, support max concurrent witness gen config

* feat(proposer): op, support timeout witness gen config
  • Loading branch information
emilianobonassi authored Dec 23, 2024
1 parent d05ac29 commit 01c7040
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 11 deletions.
2 changes: 2 additions & 0 deletions book/advanced/proposer.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ The following environment variables are optional.
| Parameter | Description |
|-----------|-------------|
| `MAX_CONCURRENT_PROOF_REQUESTS` | Default: `10`. The maximum number of concurrent proof requests to send to the `op-succinct-server`. |
| `MAX_CONCURRENT_WITNESS_GEN` | Default: `5`. The maximum number of concurrent witness generation processes to run on the `op-succinct-server`. |
| `WITNESS_GEN_TIMEOUT` | Default: `1200`. The maximum time in seconds to spend generating a witness for `op-succinct-server`. |
| `MAX_BLOCK_RANGE_PER_SPAN_PROOF` | Default: `300`. The maximum number of blocks to include in each span proof. For chains with high throughput, you need to decrease this value. |
| `OP_SUCCINCT_MOCK` | Default: `false`. Set to `true` to run in mock proof mode. The `OPSuccinctL2OutputOracle` contract must be configured to use an `SP1MockVerifier`. |
| `OP_SUCCINCT_SERVER_URL` | Default: `http://op-succinct-server:3000`. The URL of the `op-succinct-server` service which the `op-succinct-proposer` will send proof requests to. |
Expand Down
2 changes: 2 additions & 0 deletions proposer/op/op_proposer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
--l1-eth-rpc=${L1_RPC} \
--beacon-rpc=${L1_BEACON_RPC} \
--max-concurrent-proof-requests=${MAX_CONCURRENT_PROOF_REQUESTS:-10} \
--max-concurrent-witness-gen=${MAX_CONCURRENT_WITNESS_GEN:-5} \
--witness-gen-timeout=${WITNESS_GEN_TIMEOUT:-1200} \
--db-path=${DB_PATH:-/usr/local/bin/dbdata} \
--op-succinct-server-url=${OP_SUCCINCT_SERVER_URL:-http://op-succinct-server:3000} \
--max-block-range-per-span-proof=${MAX_BLOCK_RANGE_PER_SPAN_PROOF:-300} \
Expand Down
6 changes: 6 additions & 0 deletions proposer/op/proposer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ type CLIConfig struct {
TxCacheOutDir string
// The max size (in blocks) of a proof we will attempt to generate. If span batches are larger, we break them up.
MaxBlockRangePerSpanProof uint64
// The max number of concurrent witness generation processes.
MaxConcurrentWitnessGen uint64
// The max time we will wait for a witness to be generated before giving up.
WitnessGenTimeout uint64
// The Chain ID of the L2 chain.
L2ChainID uint64
// The maximum amount of time we will spend waiting for a proof before giving up and trying again.
Expand Down Expand Up @@ -155,6 +159,8 @@ func NewConfig(ctx *cli.Context) *CLIConfig {
UseCachedDb: ctx.Bool(flags.UseCachedDbFlag.Name),
SlackToken: ctx.String(flags.SlackTokenFlag.Name),
MaxBlockRangePerSpanProof: ctx.Uint64(flags.MaxBlockRangePerSpanProofFlag.Name),
MaxConcurrentWitnessGen: ctx.Uint64(flags.MaxConcurrentWitnessGenFlag.Name),
WitnessGenTimeout: ctx.Uint64(flags.WitnessGenTimeoutFlag.Name),
ProofTimeout: ctx.Uint64(flags.ProofTimeoutFlag.Name),
TxCacheOutDir: ctx.String(flags.TxCacheOutDirFlag.Name),
OPSuccinctServerUrl: ctx.String(flags.OPSuccinctServerUrlFlag.Name),
Expand Down
19 changes: 17 additions & 2 deletions proposer/op/proposer/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,23 @@ var (
Value: 50,
EnvVars: prefixEnvVars("MAX_BLOCK_RANGE_PER_SPAN_PROOF"),
}
// This limit is set to prevent overloading the witness generation server. Until Kona improves their native I/O API (https://github.com/anton-rs/kona/issues/553)
// the maximum number of concurrent witness generation requests is roughly num_cpu / 2. Set it to 5 for now to be safe.
MaxConcurrentWitnessGenFlag = &cli.Uint64Flag{
Name: "max-concurrent-witness-gen",
Usage: "Maximum number of concurrent witness generation processes",
Value: 5,
EnvVars: prefixEnvVars("MAX_CONCURRENT_WITNESS_GEN"),
}
WitnessGenTimeoutFlag = &cli.Uint64Flag{
Name: "witness-gen-timeout",
Usage: "Maximum time in seconds to spend generating a witness before giving up",
Value: 20 * 60,
EnvVars: prefixEnvVars("WITNESS_GEN_TIMEOUT"),
}
ProofTimeoutFlag = &cli.Uint64Flag{
Name: "proof-timeout",
Usage: "Maximum time in seconds to spend generating a proof before giving up",
Name: "proof-timeout",
Usage: "Maximum time in seconds to spend generating a proof before giving up",
// If a proof takes more than 4 hours, assume the cluster failed to set it to failed state.
Value: 14400,
EnvVars: prefixEnvVars("MAX_PROOF_TIME"),
Expand Down Expand Up @@ -154,6 +168,7 @@ var optionalFlags = []cli.Flag{
UseCachedDbFlag,
SlackTokenFlag,
MaxBlockRangePerSpanProofFlag,
MaxConcurrentWitnessGenFlag,
TxCacheOutDirFlag,
OPSuccinctServerUrlFlag,
ProofTimeoutFlag,
Expand Down
14 changes: 5 additions & 9 deletions proposer/op/proposer/prove.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ import (
)

const PROOF_STATUS_TIMEOUT = 30 * time.Second
const WITNESSGEN_TIMEOUT = 20 * time.Minute

// This limit is set to prevent overloading the witness generation server. Until Kona improves their native I/O API (https://github.com/anton-rs/kona/issues/553)
// the maximum number of concurrent witness generation requests is roughly num_cpu / 2. Set it to 5 for now to be safe.
const MAX_CONCURRENT_WITNESS_GEN = 5

// Process all of requests in PROVING state.
func (l *L2OutputSubmitter) ProcessProvingRequests() error {
Expand Down Expand Up @@ -75,7 +70,7 @@ func (l *L2OutputSubmitter) ProcessWitnessgenRequests() error {
for _, req := range reqs {
// If the request has been in the WITNESSGEN state for longer than the timeout, set status to FAILED.
// This is a catch-all in case the witness generation state update failed.
if req.LastUpdatedTime+uint64(WITNESSGEN_TIMEOUT.Seconds()) < uint64(time.Now().Unix()) {
if req.LastUpdatedTime+uint64(l.Cfg.WitnessGenTimeout) < uint64(time.Now().Unix()) {
// Retry the request if it timed out.
l.RetryRequest(req, ProofStatusResponse{})
}
Expand Down Expand Up @@ -162,7 +157,7 @@ func (l *L2OutputSubmitter) RequestQueuedProofs(ctx context.Context) error {

// The number of witness generation requests is capped at MAX_CONCURRENT_WITNESS_GEN. This prevents overloading the machine with processes spawned by the witness generation server.
// Once https://github.com/anton-rs/kona/issues/553 is fixed, we may be able to remove this check.
if witnessGenProofs >= MAX_CONCURRENT_WITNESS_GEN {
if witnessGenProofs >= int(l.Cfg.MaxConcurrentWitnessGen) {
l.Log.Info("max witness generation reached, waiting for next cycle")
return nil
}
Expand Down Expand Up @@ -331,13 +326,14 @@ func (l *L2OutputSubmitter) makeProofRequest(proofType proofrequest.Type, jsonBo
}
req.Header.Set("Content-Type", "application/json")

client := &http.Client{Timeout: WITNESSGEN_TIMEOUT}
timeout := time.Duration(l.Cfg.WitnessGenTimeout) * time.Second
client := &http.Client{Timeout: timeout}
resp, err := client.Do(req)
if err != nil {
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
l.Log.Error("Witness generation request timed out", "err", err)
l.Metr.RecordWitnessGenFailure("Timeout")
return nil, fmt.Errorf("request timed out after %s: %w", WITNESSGEN_TIMEOUT, err)
return nil, fmt.Errorf("request timed out after %s: %w", timeout, err)
}
return nil, fmt.Errorf("failed to send request: %w", err)
}
Expand Down
4 changes: 4 additions & 0 deletions proposer/op/proposer/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ type ProposerConfig struct {
RollupRpc string
TxCacheOutDir string
MaxBlockRangePerSpanProof uint64
MaxConcurrentWitnessGen uint64
WitnessGenTimeout uint64
L2ChainID uint64
ProofTimeout uint64
OPSuccinctServerUrl string
Expand Down Expand Up @@ -122,6 +124,8 @@ func (ps *ProposerService) initFromCLIConfig(ctx context.Context, version string
ps.RollupRpc = cfg.RollupRpc
ps.TxCacheOutDir = cfg.TxCacheOutDir
ps.MaxBlockRangePerSpanProof = cfg.MaxBlockRangePerSpanProof
ps.MaxConcurrentWitnessGen = cfg.MaxConcurrentWitnessGen
ps.WitnessGenTimeout = cfg.WitnessGenTimeout
ps.OPSuccinctServerUrl = cfg.OPSuccinctServerUrl
ps.ProofTimeout = cfg.ProofTimeout
ps.L2ChainID = cfg.L2ChainID
Expand Down

0 comments on commit 01c7040

Please sign in to comment.