feat(proposer): more configurability for witness gen (#303)

* feat(proposer): op, support max concurrent witness gen config * feat(proposer): op, support timeout witness gen config
succinctlabs · Dec 23, 2024 · 01c7040 · 01c7040
1 parent d05ac29
commit 01c7040
Show file tree

Hide file tree

Showing 6 changed files with 36 additions and 11 deletions.
diff --git a/book/advanced/proposer.md b/book/advanced/proposer.md
@@ -55,6 +55,8 @@ The following environment variables are optional.
 | Parameter | Description |
 |-----------|-------------|
 | `MAX_CONCURRENT_PROOF_REQUESTS` | Default: `10`. The maximum number of concurrent proof requests to send to the `op-succinct-server`. |
+| `MAX_CONCURRENT_WITNESS_GEN` | Default: `5`. The maximum number of concurrent witness generation processes to run on the `op-succinct-server`. |
+| `WITNESS_GEN_TIMEOUT` | Default: `1200`. The maximum time in seconds to spend generating a witness for `op-succinct-server`. |
 | `MAX_BLOCK_RANGE_PER_SPAN_PROOF` | Default: `300`. The maximum number of blocks to include in each span proof. For chains with high throughput, you need to decrease this value. |
 | `OP_SUCCINCT_MOCK` | Default: `false`. Set to `true` to run in mock proof mode. The `OPSuccinctL2OutputOracle` contract must be configured to use an `SP1MockVerifier`. |
 | `OP_SUCCINCT_SERVER_URL` | Default: `http://op-succinct-server:3000`. The URL of the `op-succinct-server` service which the `op-succinct-proposer` will send proof requests to. |

diff --git a/proposer/op/op_proposer.sh b/proposer/op/op_proposer.sh
@@ -13,6 +13,8 @@
     --l1-eth-rpc=${L1_RPC} \
     --beacon-rpc=${L1_BEACON_RPC} \
     --max-concurrent-proof-requests=${MAX_CONCURRENT_PROOF_REQUESTS:-10} \
+    --max-concurrent-witness-gen=${MAX_CONCURRENT_WITNESS_GEN:-5} \
+    --witness-gen-timeout=${WITNESS_GEN_TIMEOUT:-1200} \
     --db-path=${DB_PATH:-/usr/local/bin/dbdata} \
     --op-succinct-server-url=${OP_SUCCINCT_SERVER_URL:-http://op-succinct-server:3000} \
     --max-block-range-per-span-proof=${MAX_BLOCK_RANGE_PER_SPAN_PROOF:-300} \

diff --git a/proposer/op/proposer/config.go b/proposer/op/proposer/config.go
@@ -80,6 +80,10 @@ type CLIConfig struct {
 	TxCacheOutDir string
 	// The max size (in blocks) of a proof we will attempt to generate. If span batches are larger, we break them up.
 	MaxBlockRangePerSpanProof uint64
+	// The max number of concurrent witness generation processes.
+	MaxConcurrentWitnessGen uint64
+	// The max time we will wait for a witness to be generated before giving up.
+	WitnessGenTimeout uint64
 	// The Chain ID of the L2 chain.
 	L2ChainID uint64
 	// The maximum amount of time we will spend waiting for a proof before giving up and trying again.
@@ -155,6 +159,8 @@ func NewConfig(ctx *cli.Context) *CLIConfig {
 		UseCachedDb:                  ctx.Bool(flags.UseCachedDbFlag.Name),
 		SlackToken:                   ctx.String(flags.SlackTokenFlag.Name),
 		MaxBlockRangePerSpanProof:    ctx.Uint64(flags.MaxBlockRangePerSpanProofFlag.Name),
+		MaxConcurrentWitnessGen:      ctx.Uint64(flags.MaxConcurrentWitnessGenFlag.Name),
+		WitnessGenTimeout:            ctx.Uint64(flags.WitnessGenTimeoutFlag.Name),
 		ProofTimeout:                 ctx.Uint64(flags.ProofTimeoutFlag.Name),
 		TxCacheOutDir:                ctx.String(flags.TxCacheOutDirFlag.Name),
 		OPSuccinctServerUrl:          ctx.String(flags.OPSuccinctServerUrlFlag.Name),

diff --git a/proposer/op/proposer/flags/flags.go b/proposer/op/proposer/flags/flags.go
@@ -91,9 +91,23 @@ var (
 		Value:   50,
 		EnvVars: prefixEnvVars("MAX_BLOCK_RANGE_PER_SPAN_PROOF"),
 	}
+	// This limit is set to prevent overloading the witness generation server. Until Kona improves their native I/O API (https://github.com/anton-rs/kona/issues/553)
+	// the maximum number of concurrent witness generation requests is roughly num_cpu / 2. Set it to 5 for now to be safe.
+	MaxConcurrentWitnessGenFlag = &cli.Uint64Flag{
+		Name:    "max-concurrent-witness-gen",
+		Usage:   "Maximum number of concurrent witness generation processes",
+		Value:   5,
+		EnvVars: prefixEnvVars("MAX_CONCURRENT_WITNESS_GEN"),
+	}
+	WitnessGenTimeoutFlag = &cli.Uint64Flag{
+		Name:    "witness-gen-timeout",
+		Usage:   "Maximum time in seconds to spend generating a witness before giving up",
+		Value:   20 * 60,
+		EnvVars: prefixEnvVars("WITNESS_GEN_TIMEOUT"),
+	}
 	ProofTimeoutFlag = &cli.Uint64Flag{
-		Name:    "proof-timeout",
-		Usage:   "Maximum time in seconds to spend generating a proof before giving up",
+		Name:  "proof-timeout",
+		Usage: "Maximum time in seconds to spend generating a proof before giving up",
 		// If a proof takes more than 4 hours, assume the cluster failed to set it to failed state.
 		Value:   14400,
 		EnvVars: prefixEnvVars("MAX_PROOF_TIME"),
@@ -154,6 +168,7 @@ var optionalFlags = []cli.Flag{
 	UseCachedDbFlag,
 	SlackTokenFlag,
 	MaxBlockRangePerSpanProofFlag,
+	MaxConcurrentWitnessGenFlag,
 	TxCacheOutDirFlag,
 	OPSuccinctServerUrlFlag,
 	ProofTimeoutFlag,

diff --git a/proposer/op/proposer/prove.go b/proposer/op/proposer/prove.go
@@ -17,11 +17,6 @@ import (
 )
 
 const PROOF_STATUS_TIMEOUT = 30 * time.Second
-const WITNESSGEN_TIMEOUT = 20 * time.Minute
-
-// This limit is set to prevent overloading the witness generation server. Until Kona improves their native I/O API (https://github.com/anton-rs/kona/issues/553)
-// the maximum number of concurrent witness generation requests is roughly num_cpu / 2. Set it to 5 for now to be safe.
-const MAX_CONCURRENT_WITNESS_GEN = 5
 
 // Process all of requests in PROVING state.
 func (l *L2OutputSubmitter) ProcessProvingRequests() error {
@@ -75,7 +70,7 @@ func (l *L2OutputSubmitter) ProcessWitnessgenRequests() error {
 	for _, req := range reqs {
 		// If the request has been in the WITNESSGEN state for longer than the timeout, set status to FAILED.
 		// This is a catch-all in case the witness generation state update failed.
-		if req.LastUpdatedTime+uint64(WITNESSGEN_TIMEOUT.Seconds()) < uint64(time.Now().Unix()) {
+		if req.LastUpdatedTime+uint64(l.Cfg.WitnessGenTimeout) < uint64(time.Now().Unix()) {
 			// Retry the request if it timed out.
 			l.RetryRequest(req, ProofStatusResponse{})
 		}
@@ -162,7 +157,7 @@ func (l *L2OutputSubmitter) RequestQueuedProofs(ctx context.Context) error {
 
 		// The number of witness generation requests is capped at MAX_CONCURRENT_WITNESS_GEN. This prevents overloading the machine with processes spawned by the witness generation server.
 		// Once https://github.com/anton-rs/kona/issues/553 is fixed, we may be able to remove this check.
-		if witnessGenProofs >= MAX_CONCURRENT_WITNESS_GEN {
+		if witnessGenProofs >= int(l.Cfg.MaxConcurrentWitnessGen) {
 			l.Log.Info("max witness generation reached, waiting for next cycle")
 			return nil
 		}
@@ -331,13 +326,14 @@ func (l *L2OutputSubmitter) makeProofRequest(proofType proofrequest.Type, jsonBo
 	}
 	req.Header.Set("Content-Type", "application/json")
 
-	client := &http.Client{Timeout: WITNESSGEN_TIMEOUT}
+	timeout := time.Duration(l.Cfg.WitnessGenTimeout) * time.Second
+	client := &http.Client{Timeout: timeout}
 	resp, err := client.Do(req)
 	if err != nil {
 		if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
 			l.Log.Error("Witness generation request timed out", "err", err)
 			l.Metr.RecordWitnessGenFailure("Timeout")
-			return nil, fmt.Errorf("request timed out after %s: %w", WITNESSGEN_TIMEOUT, err)
+			return nil, fmt.Errorf("request timed out after %s: %w", timeout, err)
 		}
 		return nil, fmt.Errorf("failed to send request: %w", err)
 	}

diff --git a/proposer/op/proposer/service.go b/proposer/op/proposer/service.go
@@ -59,6 +59,8 @@ type ProposerConfig struct {
 	RollupRpc                  string
 	TxCacheOutDir              string
 	MaxBlockRangePerSpanProof  uint64
+	MaxConcurrentWitnessGen    uint64
+	WitnessGenTimeout          uint64
 	L2ChainID                  uint64
 	ProofTimeout               uint64
 	OPSuccinctServerUrl        string
@@ -122,6 +124,8 @@ func (ps *ProposerService) initFromCLIConfig(ctx context.Context, version string
 	ps.RollupRpc = cfg.RollupRpc
 	ps.TxCacheOutDir = cfg.TxCacheOutDir
 	ps.MaxBlockRangePerSpanProof = cfg.MaxBlockRangePerSpanProof
+	ps.MaxConcurrentWitnessGen = cfg.MaxConcurrentWitnessGen
+	ps.WitnessGenTimeout = cfg.WitnessGenTimeout
 	ps.OPSuccinctServerUrl = cfg.OPSuccinctServerUrl
 	ps.ProofTimeout = cfg.ProofTimeout
 	ps.L2ChainID = cfg.L2ChainID