Skip to content

Commit

Permalink
[Observability] Foundation for load testing telemetry (#832)
Browse files Browse the repository at this point in the history
## Summary

Refactor the foundation for E2E tokenomics observability w/ lots of new
data points.

Key changes include:
- `x/tokenomics` telemetry
- Begin/End blockers execution time management
- Custom `poktroll` telemetry config in `app.toml`

## Issue

- #762

---------

Co-authored-by: Daniel Olshansky <[email protected]>
  • Loading branch information
okdas and Olshansk authored Oct 31, 2024
1 parent ff76430 commit bae452a
Show file tree
Hide file tree
Showing 35 changed files with 3,744 additions and 168 deletions.
29 changes: 25 additions & 4 deletions Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ load("ext://deployment", "deployment_create")
load("ext://execute_in_pod", "execute_in_pod")

# A list of directories where changes trigger a hot-reload of the validator
hot_reload_dirs = ["app", "cmd", "tools", "x", "pkg"]
hot_reload_dirs = ["app", "cmd", "tools", "x", "pkg", "telemetry"]


def merge_dicts(base, updates):
Expand Down Expand Up @@ -38,14 +38,26 @@ localnet_config_defaults = {
"enabled": True,
"grafana": {"defaultDashboardsEnabled": False},
},
"relayminers": {"count": 1, "delve": {"enabled": False}},
"relayminers": {
"count": 1,
"delve": {"enabled": False},
"logs": {
"level": "debug",
},
},
"gateways": {
"count": 1,
"delve": {"enabled": False},
"logs": {
"level": "debug",
},
},
"appgateservers": {
"count": 1,
"delve": {"enabled": False},
"logs": {
"level": "debug",
},
},
"ollama": {
"enabled": False,
Expand Down Expand Up @@ -100,8 +112,10 @@ if localnet_config["observability"]["enabled"]:
helm_repo("prometheus-community", "https://prometheus-community.github.io/helm-charts")
helm_repo("grafana-helm-repo", "https://grafana.github.io/helm-charts")

# Increase timeout for building the image
update_settings(k8s_upsert_timeout_secs=60)
# Timeout is increased to 120 seconds (default is 30) because a slow internet connection
# could timeout pulling the image.
# container images.
update_settings(k8s_upsert_timeout_secs=120)

helm_resource(
"observability",
Expand Down Expand Up @@ -226,6 +240,7 @@ helm_resource(
"--set=logs.format=" + str(localnet_config["validator"]["logs"]["format"]),
"--set=serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["validator"]["delve"]["enabled"]),
"--set=image.repository=poktrolld",
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
Expand All @@ -244,6 +259,8 @@ for x in range(localnet_config["relayminers"]["count"]):
"--values=./localnet/kubernetes/values-relayminer-" + str(actor_number) + ".yaml",
"--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["relayminers"]["delve"]["enabled"]),
"--set=logLevel=" + str(localnet_config["relayminers"]["logs"]["level"]),
"--set=image.repository=poktrolld",
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
Expand Down Expand Up @@ -284,6 +301,8 @@ for x in range(localnet_config["appgateservers"]["count"]):
"--set=config.signing_key=app" + str(actor_number),
"--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["appgateservers"]["delve"]["enabled"]),
"--set=logLevel=" + str(localnet_config["appgateservers"]["logs"]["level"]),
"--set=image.repository=poktrolld",
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
Expand Down Expand Up @@ -325,6 +344,8 @@ for x in range(localnet_config["gateways"]["count"]):
"--set=config.signing_key=gateway" + str(actor_number),
"--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["gateways"]["delve"]["enabled"]),
"--set=logLevel=" + str(localnet_config["gateways"]["logs"]["level"]),
"--set=image.repository=poktrolld",
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
Expand Down
14 changes: 6 additions & 8 deletions api/poktroll/application/types.pulsar.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion api/poktroll/tokenomics/event.pulsar.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,11 @@ func New(
return nil, err
}

// Set up poktroll telemetry using `app.toml` configuration options (in addition to cosmos-sdk telemetry config).
if err := telemetry.New(appOpts); err != nil {
return nil, err
}

return app, nil
}

Expand Down
67 changes: 53 additions & 14 deletions cmd/poktrolld/cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,27 @@ import (
sdk "github.com/cosmos/cosmos-sdk/types"

"github.com/pokt-network/poktroll/app"
"github.com/pokt-network/poktroll/telemetry"
)

var once sync.Once

// PoktrollAppConfig represents a poktroll-specific part of `app.toml` file.
// Checkout `customAppConfigTemplate()` for additional information about each setting.
type PoktrollAppConfig struct {
Telemetry telemetry.PoktrollTelemetryConfig `mapstructure:"telemetry"`
}

// poktrollAppConfigDefaults sets default values to render in `app.toml`.
// Checkout `customAppConfigTemplate()` for additional information about each setting.
func poktrollAppConfigDefaults() PoktrollAppConfig {
return PoktrollAppConfig{
Telemetry: telemetry.PoktrollTelemetryConfig{
CardinalityLevel: "medium",
},
}
}

func InitSDKConfig() {
once.Do(func() {
checkOrInitSDKConfig()
Expand Down Expand Up @@ -90,6 +107,7 @@ func initAppConfig() (string, interface{}) {
// The following code snippet is just for reference.
type CustomAppConfig struct {
serverconfig.Config `mapstructure:",squash"`
Poktroll PoktrollAppConfig `mapstructure:"poktroll"`
}

// Optionally allow the chain developer to overwrite the SDK's default
Expand All @@ -113,27 +131,48 @@ func initAppConfig() (string, interface{}) {
srvCfg.MinGasPrices = "0.000000001upokt" // Also adjust ignite's `config.yml`.
srvCfg.Mempool.MaxTxs = 10000
srvCfg.Telemetry.Enabled = true
srvCfg.Telemetry.PrometheusRetentionTime = 60 // in seconds. This turns on Prometheus support.
// Positive non-zero value turns on Prometheus support.
// Prometheus metrics are removed from the exporter when retention time is reached.
srvCfg.Telemetry.PrometheusRetentionTime = 60 * 60 * 24 // in seconds.
srvCfg.Telemetry.MetricsSink = "mem"
srvCfg.Pruning = "nothing" // archiving node by default
srvCfg.API.Enable = true
srvCfg.GRPC.Enable = true
srvCfg.GRPCWeb.Enable = true

customAppConfig := CustomAppConfig{
Config: *srvCfg,
Config: *srvCfg,
Poktroll: poktrollAppConfigDefaults(),
}

customAppTemplate := serverconfig.DefaultConfigTemplate
// Edit the default template file
//
// customAppTemplate := serverconfig.DefaultConfigTemplate + `
// [wasm]
// # This is the maximum sdk gas (wasm and storage) that we allow for any x/wasm "smart" queries
// query_gas_limit = 300000
// # This is the number of wasm vm instances we keep cached in memory for speed-up
// # Warning: this is currently unstable and may lead to crashes, best to keep for 0 unless testing locally
// lru_size = 0`

return customAppTemplate, customAppConfig
return customPoktrollAppConfigTemplate(), customAppConfig
}

// customPoktrollAppConfigTemplate extends the default configuration `app.toml` file with our own configs.
// They are going to be used by validators and full-nodes.
// These configs are rendered using default values from `poktrollAppConfigDefaults()`.
func customPoktrollAppConfigTemplate() string {
return serverconfig.DefaultConfigTemplate + `
###############################################################################
### Poktroll ###
###############################################################################
# Poktroll-specific app configuration for Full Nodes and Validators.
[poktroll]
# Telemetry configuration in addition to the [telemetry] settings.
[poktroll.telemetry]
# Cardinality level for telemetry metrics collection
# This controls the level of detail (number of unique labels) in metrics.
# Options:
# - "low": Collects basic metrics with low cardinality.
# Suitable for production environments with tight performance constraints.
# - "medium": Collects a moderate number of labels, balancing detail and performance.
# Suitable for moderate workloads or staging environments.
# - "high": WARNING: WILL CAUSE STRESS TO YOUR MONITORING ENVIRONMENT! Collects detailed metrics with high
# cardinality, including labels with many unique values (e.g., application_id, session_id).
# Recommended for debugging or testing environments.
cardinality-level = "{{ .Poktroll.Telemetry.CardinalityLevel }}"
`
}
26 changes: 14 additions & 12 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,11 @@ validators:
# minimum-gas-prices: 0.000000001upokt
telemetry:
enabled: true
prometheus-retention-time: "600" # seconds
poktroll:
telemetry:
# "high" produces a lot of timeseries.
# ONLY suitable for small networks such as LocalNet.
cardinality-level: high
config:
moniker: "validator1"
consensus:
Expand Down Expand Up @@ -139,28 +143,28 @@ genesis:
denom: upokt
bank:
supply:
- amount: "1003000204"
- amount: "1102000204"
denom: upokt
balances:
# Application module
- address: pokt1rl3gjgzexmplmds3tq3r3yk84zlwdl6djzgsvm
coins:
- amount: "1000068" # Equals to the total of all app stakes below
- amount: "100000068" # MUST BE equal to the total of all app stakes below
denom: upokt
# Supplier module
- address: pokt1j40dzzmn6cn9kxku7a5tjnud6hv37vesr5ccaa
coins:
- amount: "1000068" # Equals to the total of all supplier stakes below
- amount: "1000068" # MUST BE equal to the total of all supplier stakes below
denom: upokt
# Gateway module
- address: pokt1f6j7u6875p2cvyrgjr0d2uecyzah0kget9vlpl
coins:
- amount: "1000068" # Equals to the total of all gateway stakes below
- amount: "1000068" # MUST BE equal to the total of all gateway stakes below
denom: upokt
# Service module
- address: pokt1nhmtqf4gcmpxu0p6e53hpgtwj0llmsqpxtumcf
coins:
- amount: "1000000000" # Equals to one add_service_fee below
- amount: "1000000000" # MUST BE equal to one add_service_fee below
denom: upokt
application:
params:
Expand All @@ -171,9 +175,8 @@ genesis:
denom: upokt
applicationList:
- address: pokt1mrqt5f7qh8uxs27cjm9t7v9e74a9vvdnq5jva4
delegatee_gateway_addresses: [
pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4
]
delegatee_gateway_addresses:
[pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4]
service_configs:
- service_id: anvil
stake:
Expand All @@ -182,9 +185,8 @@ genesis:
amount: "100000068" # ~100 POKT
denom: upokt
- address: pokt184zvylazwu4queyzpl0gyz9yf5yxm2kdhh9hpm
delegatee_gateway_addresses: [
pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4
]
delegatee_gateway_addresses:
[pokt15vzxjqklzjtlz7lahe8z2dfe9nm5vxwwmscne4]
service_configs:
- service_id: rest
stake:
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ require (
require (
cosmossdk.io/x/tx v0.13.4
github.com/jhump/protoreflect v1.16.0
github.com/mitchellh/mapstructure v1.5.0
)

require (
Expand Down Expand Up @@ -224,7 +225,6 @@ require (
github.com/minio/highwayhash v1.0.2 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/go-testing-interface v1.14.1 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/term v0.5.0 // indirect
github.com/morikuni/aec v1.0.0 // indirect
Expand Down
Loading

0 comments on commit bae452a

Please sign in to comment.