Skip to content

Commit

Permalink
[Tests] Improve & Skip Flaky Tests (#723)
Browse files Browse the repository at this point in the history
- Add the `INCLUDE_FLAKY_TESTS` flag
- Skip a handful of flaky unit tests using the flag above
- Increase `num_blocks_per_session` from `4 -> 10` to avoid E2E tests sending claims mid-session
- Consolidate the request validation in E2E tests to one step
- Verify the response data (not just if it exists) in E2E curl commands
- Add a `RunCurlWithRetr` to account for some ephemeral `no host` issues on the kind cluster
- Calling `recover` in some unit tests that have a `panic: leveldb: closed` errors
- Removed some unused fields
- Improved some failure messages

Co-authored-by: Redouane Lakrache <[email protected]>
  • Loading branch information
Olshansk and red-0ne authored Aug 7, 2024
1 parent 91fc5e7 commit 04329ba
Show file tree
Hide file tree
Showing 31 changed files with 236 additions and 163 deletions.
15 changes: 7 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@

SHELL = /bin/sh

# TODO_IMPROVE: Look into how we can we source `.env.dev` and have everything
# here work.

# ifneq (,$(wildcard .env))
# include .env
# export $(shell sed 's/=.*//' .env)
# endif

POKTROLLD_HOME ?= ./localnet/poktrolld
POCKET_NODE ?= tcp://127.0.0.1:26657 # The pocket node (validator in the localnet context)
TESTNET_RPC ?= https://testnet-validated-validator-rpc.poktroll.com/ # TestNet RPC endpoint for validator maintained by Grove. Needs to be update if there's another "primary" testnet.
Expand Down Expand Up @@ -433,6 +425,13 @@ test_all: warn_flaky_tests check_go_version ## Run all go tests showing detailed
test_all_with_integration: check_go_version ## Run all go tests, including those with the integration
go test -count=1 -v -race -tags test,integration ./...

# We are explicitly using an env variable rather than a build tag to keep flaky
# tests in line with non flaky tests and use it as a way to easily turn them
# on and off without maintaining extra files.
.PHONY: test_all_with_integration_and_flaky
test_all_with_integration_and_flaky: check_go_version ## Run all go tests, including those with the integration and flaky tests
INCLUDE_FLAKY_TESTS=true go test -count=1 -v -race -tags test,integration ./...

.PHONY: test_integration
test_integration: check_go_version ## Run only the in-memory integration "unit" tests
go test -count=1 -v -race -tags test,integration ./tests/integration/...
Expand Down
86 changes: 26 additions & 60 deletions Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ merge_dicts(localnet_config, localnet_config_defaults)
# Then merge file contents over defaults
merge_dicts(localnet_config, localnet_config_file)
# Check if there are differences or if the file doesn't exist
if (localnet_config_file != localnet_config) or (
not os.path.exists(localnet_config_path)
):
if (localnet_config_file != localnet_config) or (not os.path.exists(localnet_config_path)):
print("Updating " + localnet_config_path + " with defaults")
local("cat - > " + localnet_config_path, stdin=encode_yaml(localnet_config))

Expand All @@ -82,9 +80,7 @@ if localnet_config["helm_chart_local_repo"]["enabled"]:
# Observability
print("Observability enabled: " + str(localnet_config["observability"]["enabled"]))
if localnet_config["observability"]["enabled"]:
helm_repo(
"prometheus-community", "https://prometheus-community.github.io/helm-charts"
)
helm_repo("prometheus-community", "https://prometheus-community.github.io/helm-charts")
helm_repo("grafana-helm-repo", "https://grafana.github.io/helm-charts")

# Increase timeout for building the image
Expand All @@ -96,9 +92,7 @@ if localnet_config["observability"]["enabled"]:
flags=[
"--values=./localnet/kubernetes/observability-prometheus-stack.yaml",
"--set=grafana.defaultDashboardsEnabled="
+ str(
localnet_config["observability"]["grafana"]["defaultDashboardsEnabled"]
),
+ str(localnet_config["observability"]["grafana"]["defaultDashboardsEnabled"]),
],
resource_deps=["prometheus-community"],
)
Expand Down Expand Up @@ -130,9 +124,7 @@ if localnet_config["observability"]["enabled"]:
)

# Import our custom grafana dashboards into Kubernetes ConfigMap
configmap_create(
"protocol-dashboards", from_file=listdir("localnet/grafana-dashboards/")
)
configmap_create("protocol-dashboards", from_file=listdir("localnet/grafana-dashboards/"))

# Grafana discovers dashboards to "import" via a label
local_resource(
Expand All @@ -142,14 +134,10 @@ if localnet_config["observability"]["enabled"]:
)

# Import keyring/keybase files into Kubernetes ConfigMap
configmap_create(
"poktrolld-keys", from_file=listdir("localnet/poktrolld/keyring-test/")
)
configmap_create("poktrolld-keys", from_file=listdir("localnet/poktrolld/keyring-test/"))

# Import keyring/keybase files into Kubernetes Secret
secret_create_generic(
"poktrolld-keys", from_file=listdir("localnet/poktrolld/keyring-test/")
)
secret_create_generic("poktrolld-keys", from_file=listdir("localnet/poktrolld/keyring-test/"))

# Import validator keys for the poktrolld helm chart to consume
secret_create_generic(
Expand All @@ -161,9 +149,7 @@ secret_create_generic(
)

# Import configuration files into Kubernetes ConfigMap
configmap_create(
"poktrolld-configs", from_file=listdir("localnet/poktrolld/config/"), watch=True
)
configmap_create("poktrolld-configs", from_file=listdir("localnet/poktrolld/config/"), watch=True)

# Hot reload protobuf changes
local_resource(
Expand Down Expand Up @@ -207,9 +193,7 @@ WORKDIR /
)

# Run data nodes & validators
k8s_yaml(
["localnet/kubernetes/anvil.yaml", "localnet/kubernetes/validator-volume.yaml"]
)
k8s_yaml(["localnet/kubernetes/anvil.yaml", "localnet/kubernetes/validator-volume.yaml"])

# Provision validator
helm_resource(
Expand All @@ -218,14 +202,11 @@ helm_resource(
flags=[
"--values=./localnet/kubernetes/values-common.yaml",
"--values=./localnet/kubernetes/values-validator.yaml",
"--set=persistence.cleanupBeforeEachStart="
+ str(localnet_config["validator"]["cleanupBeforeEachStart"]),
"--set=persistence.cleanupBeforeEachStart=" + str(localnet_config["validator"]["cleanupBeforeEachStart"]),
"--set=logs.level=" + str(localnet_config["validator"]["logs"]["level"]),
"--set=logs.format=" + str(localnet_config["validator"]["logs"]["format"]),
"--set=serviceMonitor.enabled="
+ str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled="
+ str(localnet_config["validator"]["delve"]["enabled"]),
"--set=serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["validator"]["delve"]["enabled"]),
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
Expand All @@ -241,31 +222,25 @@ for x in range(localnet_config["relayminers"]["count"]):
flags=[
"--values=./localnet/kubernetes/values-common.yaml",
"--values=./localnet/kubernetes/values-relayminer-common.yaml",
"--values=./localnet/kubernetes/values-relayminer-"
+ str(actor_number)
+ ".yaml",
"--set=metrics.serviceMonitor.enabled="
+ str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled="
+ str(localnet_config["relayminers"]["delve"]["enabled"]),
"--values=./localnet/kubernetes/values-relayminer-" + str(actor_number) + ".yaml",
"--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["relayminers"]["delve"]["enabled"]),
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
)
k8s_resource(
"relayminer" + str(actor_number),
labels=["supplier_nodes"],
labels=["suppliers"],
resource_deps=["validator"],
links=[
link(
"http://localhost:3003/d/relayminer/relayminer?orgId=1&var-relayminer=relayminer"
+ str(actor_number),
"http://localhost:3003/d/relayminer/relayminer?orgId=1&var-relayminer=relayminer" + str(actor_number),
"Grafana dashboard",
),
],
port_forwards=[
str(8084 + actor_number)
+ ":8545", # relayminer1 - exposes 8545, relayminer2 exposes 8546, etc.
str(8084 + actor_number) + ":8545", # relayminer1 - exposes 8545, relayminer2 exposes 8546, etc.
str(40044 + actor_number)
+ ":40004", # DLV port. relayminer1 - exposes 40045, relayminer2 exposes 40046, etc.
# Run `curl localhost:PORT` to see the current snapshot of relayminer metrics.
Expand All @@ -288,17 +263,15 @@ for x in range(localnet_config["appgateservers"]["count"]):
"--values=./localnet/kubernetes/values-common.yaml",
"--values=./localnet/kubernetes/values-appgateserver.yaml",
"--set=config.signing_key=app" + str(actor_number),
"--set=metrics.serviceMonitor.enabled="
+ str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled="
+ str(localnet_config["appgateservers"]["delve"]["enabled"]),
"--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["appgateservers"]["delve"]["enabled"]),
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
)
k8s_resource(
"appgateserver" + str(actor_number),
labels=["applications"],
labels=["gateways"],
resource_deps=["validator"],
links=[
link(
Expand All @@ -308,8 +281,7 @@ for x in range(localnet_config["appgateservers"]["count"]):
),
],
port_forwards=[
str(42068 + actor_number)
+ ":42069", # appgateserver1 - exposes 42069, appgateserver2 exposes 42070, etc.
str(42068 + actor_number) + ":42069", # appgateserver1 - exposes 42069, appgateserver2 exposes 42070, etc.
str(40054 + actor_number)
+ ":40004", # DLV port. appgateserver1 - exposes 40055, appgateserver2 exposes 40056, etc.
# Run `curl localhost:PORT` to see the current snapshot of appgateserver metrics.
Expand All @@ -332,10 +304,8 @@ for x in range(localnet_config["gateways"]["count"]):
"--values=./localnet/kubernetes/values-common.yaml",
"--values=./localnet/kubernetes/values-gateway.yaml",
"--set=config.signing_key=gateway" + str(actor_number),
"--set=metrics.serviceMonitor.enabled="
+ str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled="
+ str(localnet_config["gateways"]["delve"]["enabled"]),
"--set=metrics.serviceMonitor.enabled=" + str(localnet_config["observability"]["enabled"]),
"--set=development.delve.enabled=" + str(localnet_config["gateways"]["delve"]["enabled"]),
],
image_deps=["poktrolld"],
image_keys=[("image.repository", "image.tag")],
Expand All @@ -352,10 +322,8 @@ for x in range(localnet_config["gateways"]["count"]):
),
],
port_forwards=[
str(42078 + actor_number)
+ ":42069", # gateway1 - exposes 42079, gateway2 exposes 42080, etc.
str(40064 + actor_number)
+ ":40004", # DLV port. gateway1 - exposes 40065, gateway2 exposes 40066, etc.
str(42078 + actor_number) + ":42069", # gateway1 - exposes 42079, gateway2 exposes 42080, etc.
str(40064 + actor_number) + ":40004", # DLV port. gateway1 - exposes 40065, gateway2 exposes 40066, etc.
# Run `curl localhost:PORT` to see the current snapshot of gateway metrics.
str(9059 + actor_number)
+ ":9090", # gateway metrics port. gateway1 - exposes 9060, gateway2 exposes 9061, etc.
Expand Down Expand Up @@ -399,8 +367,6 @@ if localnet_config["ollama"]["enabled"]:

local_resource(
name="ollama-pull-model",
cmd=execute_in_pod(
"ollama", "ollama pull " + localnet_config["ollama"]["model"]
),
cmd=execute_in_pod("ollama", "ollama pull " + localnet_config["ollama"]["model"]),
resource_deps=["ollama"],
)
2 changes: 1 addition & 1 deletion config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ genesis:
denom: upokt
shared:
params:
num_blocks_per_session: 4
num_blocks_per_session: 10
grace_period_end_offset_blocks: 1
claim_window_open_offset_blocks: 1
claim_window_close_offset_blocks: 4
Expand Down
5 changes: 5 additions & 0 deletions e2e/tests/0_settlement.feature
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,21 @@
Feature: Tokenomics Namespace

Scenario: Emissions equals burn when a claim is created and a valid proof is submitted and required
# Baseline
Given the user has the pocketd binary installed
# Network preparation
And an account exists for "supplier1"
And the "supplier" account for "supplier1" is staked
And an account exists for "app1"
And the "application" account for "app1" is staked
And the service "anvil" registered for application "app1" has a compute units per relay of "1"
# Start servicing
When the supplier "supplier1" has serviced a session with "10" relays for service "anvil" for application "app1"
# Wait for the Claim & Proof lifecycle
And the user should wait for the "proof" module "CreateClaim" Message to be submitted
And the user should wait for the "proof" module "SubmitProof" Message to be submitted
And the user should wait for the "tokenomics" module "ClaimSettled" end block event to be broadcast
# Validate the results
Then the account balance of "supplier1" should be "420" uPOKT "more" than before
And the "application" stake of "app1" should be "420" uPOKT "less" than before

Expand Down
81 changes: 53 additions & 28 deletions e2e/tests/init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package e2e

import (
"bytes"
"context"
"encoding/json"
"flag"
Expand Down Expand Up @@ -100,10 +101,9 @@ type suite struct {
// moduleParamsMap is a map of module names to a map of parameter names to parameter values & types.
expectedModuleParams moduleParamsMap

deps depinject.Config
newBlockEventsReplayClient client.EventsReplayClient[*block.CometNewBlockEvent]
txResultReplayClient client.EventsReplayClient[*abci.TxResult]
finalizeBlockEventsReplayClient client.EventsReplayClient[*abci.Event]
deps depinject.Config
newBlockEventsReplayClient client.EventsReplayClient[*block.CometNewBlockEvent]
txResultReplayClient client.EventsReplayClient[*abci.TxResult]
}

func (s *suite) Before() {
Expand Down Expand Up @@ -297,6 +297,7 @@ func (s *suite) TheUserStakesAWithUpoktForServiceFromTheAccount(actorType string
"-y",
}
res, err := s.pocketd.RunCommandOnHost("", args...)
require.NoError(s, err, "error staking %s for service %s", actorType, serviceId)

// Remove the temporary config file
err = os.Remove(configFile.Name())
Expand Down Expand Up @@ -412,30 +413,36 @@ func (s *suite) TheSessionForApplicationAndServiceContainsTheSupplier(appName st
s.Fatalf("ERROR: session for app %s and service %s does not contain supplier %s", appName, serviceId, supplierName)
}

func (s *suite) TheApplicationSendsTheSupplierARequestForServiceWithPathAndData(appName, supplierName, serviceId, path, requestData string) {
func (s *suite) TheApplicationSendsTheSupplierASuccessfulRequestForServiceWithPathAndData(appName, supplierName, serviceId, path, requestData string) {
// TODO_HACK: We need to support a non self_signing LocalNet AppGateServer
// that allows any application to send a relay in LocalNet and our E2E Tests.
require.Equal(s, "app1", appName, "TODO_HACK: The LocalNet AppGateServer is self_signing and only supports app1.")

res, err := s.pocketd.RunCurl(appGateServerUrl, serviceId, path, requestData)
res, err := s.pocketd.RunCurlWithRetry(appGateServerUrl, serviceId, path, requestData, 5)
require.NoError(s, err, "error sending relay request from app %q to supplier %q for service %q", appName, supplierName, serviceId)

relayKey := relayReferenceKey(appName, supplierName)
s.scenarioState[relayKey] = res.Stdout
}

func (s *suite) TheApplicationReceivesASuccessfulRelayResponseSignedBy(appName string, supplierName string) {
// TODO_HACK: We need to support a non self_signing LocalNet AppGateServer
// that allows any application to send a relay in LocalNet and our E2E Tests.
require.Equal(s, "app1", appName, "TODO_HACK: The LocalNet AppGateServer is self_signing and only supports app1.")

relayKey := relayReferenceKey(appName, supplierName)
stdout, ok := s.scenarioState[relayKey]
require.Truef(s, ok, "no relay response found for %s", relayKey)

var jsonContent json.RawMessage
err := json.Unmarshal([]byte(stdout.(string)), &jsonContent)
require.NoError(s, err, `Expected valid JSON, got: %s`, stdout)
err = json.Unmarshal([]byte(res.Stdout), &jsonContent)
require.NoError(s, err, `Expected valid JSON, got: %s`, res.Stdout)

jsonMap, err := jsonToMap(jsonContent)
require.NoError(s, err, "error converting JSON to map")

prettyJson, err := jsonPrettyPrint(jsonContent)
require.NoError(s, err, "error pretty printing JSON")
s.Log(prettyJson)

// TODO_IMPROVE: This is a minimalistic first approach to request validation in E2E tests.
// Consider leveraging the shannon-sdk or path here.
switch path {
case "":
// Validate JSON-RPC request where the path is empty
require.Nil(s, jsonMap["error"], "error in relay response")
require.NotNil(s, jsonMap["result"], "no result in relay response")
default:
// Validate REST request where the path is non-empty
require.Nil(s, jsonMap["error"], "error in relay response")
}
}

func (s *suite) AModuleEndBlockEventIsBroadcast(module, eventType string) {
Expand All @@ -457,7 +464,7 @@ func (s *suite) TheUserWaitsForUnbondingPeriodToFinish(accName string) {
require.True(s, ok, "supplier %s not found", accName)

unbondingHeight := s.getSupplierUnbondingHeight(accName)
s.waitForBlockHeight(unbondingHeight)
s.waitForBlockHeight(unbondingHeight + 1) // Add 1 to ensure the unbonding block has been committed
}

func (s *suite) getStakedAmount(actorType, accName string) (int, bool) {
Expand Down Expand Up @@ -675,12 +682,6 @@ func (s *suite) getSupplierUnbondingHeight(accName string) int64 {
return unbondingHeight
}

// TODO_IMPROVE: use `sessionId` and `supplierName` since those are the two values
// used to create the primary composite key on-chain to uniquely distinguish relays.
func relayReferenceKey(appName, supplierName string) string {
return fmt.Sprintf("%s/%s", appName, supplierName)
}

// accBalanceKey is a helper function to create a key to store the balance
// for accName in the context of a scenario state.
func accBalanceKey(accName string) string {
Expand All @@ -692,3 +693,27 @@ func accBalanceKey(accName string) string {
func accStakeKey(actorType, accName string) string {
return fmt.Sprintf("stake/%s/%s", actorType, accName)
}

// printPrettyJSON returns the given raw JSON message in a pretty format that
// can be printed to the console.
func jsonPrettyPrint(raw json.RawMessage) (string, error) {
var buf bytes.Buffer
err := json.Indent(&buf, raw, "", " ")
if err != nil {
return "", err
}
return buf.String(), nil
}

// jsonToMap converts a raw JSON message into a map of string keys and interface values.
func jsonToMap(raw json.RawMessage) (map[string]interface{}, error) {
var dataMap map[string]interface{}

// Unmarshal the raw message into the map
err := json.Unmarshal(raw, &dataMap)
if err != nil {
return nil, err
}

return dataMap, nil
}
Loading

0 comments on commit 04329ba

Please sign in to comment.