From cb0a0ab2039aef70f964e6a136a229772922b352 Mon Sep 17 00:00:00 2001 From: Dima Kniazev Date: Mon, 22 May 2023 16:16:08 -0700 Subject: [PATCH] [Helm] Add ServiceMonitor to the helm chart (#767) ## Description Adds `ServiceMonitor` to the helm chart, so our software can be monitored by different monitoring stacks that support this CRD, which is just about an industry standard now. We are going to use it in our infrastructure as well. Also, included changes to run e2e tests within one, `main` GitHub action workflow because the other option wasn't working on non-default branches. ## Type of change Please mark the relevant option(s): - [x] New feature, functionality or library ## List of changes - Add a new 'ServiceMonitor' object to the helm chart, which will be disabled by default because not everyone has this CRD installed on their cluster. - Adjusted e2e test workflow so it kicks off on pull requests. ## Testing - [ ] `make develop_test`; if any code changes were made - [ ] `make test_e2e` on [k8s LocalNet](https://github.com/pokt-network/pocket/blob/main/build/localnet/README.md); if any code changes were made - [x] `e2e-devnet-test` passes tests on [DevNet](https://pocketnetwork.notion.site/How-to-DevNet-ff1598f27efe44c09f34e2aa0051f0dd); if any code was changed - [ ] [Docker Compose LocalNet](https://github.com/pokt-network/pocket/blob/main/docs/development/README.md); if any major functionality was changed or introduced - [x] [k8s LocalNet](https://github.com/pokt-network/pocket/blob/main/build/localnet/README.md); if any infrastructure or configuration changes were made ## Required Checklist - [x] I have performed a self-review of my own code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have added, or updated, [`godoc` format comments](https://go.dev/blog/godoc) on touched members (see: [tip.golang.org/doc/comment](https://tip.golang.org/doc/comment)) - [ ] I have tested my changes using the available tooling - [ ] I have updated the corresponding CHANGELOG ### If Applicable Checklist - [ ] I have updated the corresponding README(s); local and/or global - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added, or updated, [mermaid.js](https://mermaid-js.github.io) diagrams in the corresponding README(s) - [ ] I have added, or updated, documentation and [mermaid.js](https://mermaid-js.github.io) diagrams in `shared/docs/*` if I updated `shared/*`README(s) --------- Co-authored-by: github-actions Co-authored-by: Daniel Olshansky --- .github/workflows/e2e-test.yml | 62 ------------------ .github/workflows/main.yml | 69 ++++++++++++++++++-- charts/CHANGELOG.md | 4 ++ charts/pocket/README.md | 1 + charts/pocket/templates/service-monitor.yaml | 12 ++++ charts/pocket/templates/statefulset.yaml | 6 +- charts/pocket/values.yaml | 6 ++ 7 files changed, 90 insertions(+), 70 deletions(-) delete mode 100644 .github/workflows/e2e-test.yml create mode 100644 charts/pocket/templates/service-monitor.yaml diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml deleted file mode 100644 index ae7630ba9..000000000 --- a/.github/workflows/e2e-test.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: E2E test on DevNet - -# Only trigger, when the build workflow succeeded, and allow manual triggering. -on: - workflow_dispatch: - workflow_run: - workflows: ["Test, build and push artifacts"] - types: - - completed - -jobs: - e2e-tests: - runs-on: ubuntu-latest - if: contains(github.event.pull_request.labels.*.name, 'e2e-devnet-test') - env: - ARGO_SERVER: "workflows.dev-us-east4-1.poktnodes.network:8443" - ARGO_HTTP1: true - ARGO_SECURE: true - permissions: - contents: "read" - id-token: "write" - - steps: - - id: "auth" - uses: "google-github-actions/auth@v1" - with: - credentials_json: "${{ secrets.ARGO_WORKFLOW_EXTERNAL }}" - - - id: "get-credentials" - uses: "google-github-actions/get-gke-credentials@v1" - with: - cluster_name: "nodes-gcp-dev-us-east4-1" - location: "us-east4" - - - id: "install-argo" - run: | - curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.4.7/argo-linux-amd64.gz - gunzip argo-linux-amd64.gz - chmod +x argo-linux-amd64 - mv ./argo-linux-amd64 /usr/local/bin/argo - argo version - - - id: "wait-for-infra" - shell: bash - run: | - start_time=$(date +%s) # store current time - timeout=900 # 15 minute timeout in seconds - - until argo template get dev-e2e-tests --namespace=devnet-issue-${{ github.event.pull_request.number }}; do - current_time=$(date +%s) - elapsed_time=$(( current_time - start_time )) - if (( elapsed_time > timeout )); then - echo "Timeout of $timeout seconds reached. Exiting..." - exit 1 - fi - echo "Waiting for devnet-issue-${{ github.event.pull_request.number }} to be provisioned..." - sleep 5 - done - - - id: "run-e2e-tests" - run: | - argo submit --wait --log --namespace devnet-issue-${{ github.event.pull_request.number }} --from 'wftmpl/dev-e2e-tests' --parameter gitsha="${{ github.event.pull_request.head.sha }}" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 36cca6815..b3bc7911e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,13 +6,15 @@ on: workflow_dispatch: push: branches: [main] - paths-ignore: - - "docs/**" - - "**.md" + # OPTIMIZE: We generate new images even on non src code changes, but this cost is okay for now + # paths-ignore: + # - "docs/**" + # - "**.md" pull_request: - paths-ignore: - - "docs/**" - - "**.md" + # paths-ignore: + # - "docs/**" + # - "**.md" + env: # Even though we can test against multiple versions, this one is considered a target version. @@ -151,3 +153,58 @@ jobs: cache-to: type=gha,mode=max build-args: | TARGET_GOLANG_VERSION=${{ env.TARGET_GOLANG_VERSION }} + + + # Run e2e tests on devnet if the PR has a label "e2e-devnet-test" + e2e-tests: + runs-on: ubuntu-latest + needs: build-images + if: contains(github.event.pull_request.labels.*.name, 'e2e-devnet-test') + env: + ARGO_HTTP1: true + ARGO_SECURE: true + ARGO_SERVER: ${{ vars.ARGO_SERVER }} + permissions: + contents: "read" + id-token: "write" + + steps: + - id: "auth" + uses: "google-github-actions/auth@v1" + with: + credentials_json: "${{ secrets.ARGO_WORKFLOW_EXTERNAL }}" + + - id: "get-credentials" + uses: "google-github-actions/get-gke-credentials@v1" + with: + cluster_name: "nodes-gcp-dev-us-east4-1" + location: "us-east4" + + - id: "install-argo" + run: | + curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.4.7/argo-linux-amd64.gz + gunzip argo-linux-amd64.gz + chmod +x argo-linux-amd64 + mv ./argo-linux-amd64 /usr/local/bin/argo + argo version + + - id: "wait-for-infra" + shell: bash + run: | + start_time=$(date +%s) # store current time + timeout=900 # 15 minute timeout in seconds + + until argo template get dev-e2e-tests --namespace=devnet-issue-${{ github.event.pull_request.number }}; do + current_time=$(date +%s) + elapsed_time=$(( current_time - start_time )) + if (( elapsed_time > timeout )); then + echo "Timeout of $timeout seconds reached. Exiting..." + exit 1 + fi + echo "Waiting for devnet-issue-${{ github.event.pull_request.number }} to be provisioned..." + sleep 5 + done + + - id: "run-e2e-tests" + run: | + argo submit --wait --log --namespace devnet-issue-${{ github.event.pull_request.number }} --from 'wftmpl/dev-e2e-tests' --parameter gitsha="${{ github.event.pull_request.head.sha }}" diff --git a/charts/CHANGELOG.md b/charts/CHANGELOG.md index 20c3baa83..6f79a9ae5 100644 --- a/charts/CHANGELOG.md +++ b/charts/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.0.0.5] - 2023-05-20 + +- Added `ServiceMonitor` to the helm chart. + ## [0.0.0.4] - 2023-05-12 - Added `nodeType` parameter to the helm chart, which is now actor-agnostic. diff --git a/charts/pocket/README.md b/charts/pocket/README.md index 363717b90..137cf7340 100644 --- a/charts/pocket/README.md +++ b/charts/pocket/README.md @@ -127,4 +127,5 @@ privateKeySecretKeyRef: | serviceAccount.annotations | object | `{}` | Annotations to add to the service account | | serviceAccount.create | bool | `true` | Specifies whether a service account should be created | | serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | +| serviceMonitor.enabled | bool | `false` | enable service monitor | | tolerations | list | `[]` | | diff --git a/charts/pocket/templates/service-monitor.yaml b/charts/pocket/templates/service-monitor.yaml new file mode 100644 index 000000000..d5aaf5dbe --- /dev/null +++ b/charts/pocket/templates/service-monitor.yaml @@ -0,0 +1,12 @@ +{{ if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "pocket.fullname" . }} +spec: + endpoints: + - port: metrics + selector: + matchLabels: + {{- include "pocket.selectorLabels" . | nindent 6 }} +{{ end }} diff --git a/charts/pocket/templates/statefulset.yaml b/charts/pocket/templates/statefulset.yaml index dca38a99e..4be68b50b 100644 --- a/charts/pocket/templates/statefulset.yaml +++ b/charts/pocket/templates/statefulset.yaml @@ -58,10 +58,12 @@ spec: - -config=/pocket/configs/config.json - -genesis=/pocket/configs/genesis.json ports: - - containerPort: 42069 + - containerPort: {{ .Values.service.ports.consensus }} name: consensus - - containerPort: 50832 + - containerPort: {{ .Values.service.ports.rpc }} name: rpc + - containerPort: {{ .Values.service.ports.metrics }} + name: metrics env: {{ if or .Values.privateKeySecretKeyRef.name .Values.privateKeySecretKeyRef.key }} - name: POCKET_PRIVATE_KEY diff --git a/charts/pocket/values.yaml b/charts/pocket/values.yaml index f897de4e2..6258056e9 100644 --- a/charts/pocket/values.yaml +++ b/charts/pocket/values.yaml @@ -170,6 +170,8 @@ service: annotations: {} # -- service type type: ClusterIP + + # If adjusting values here, please make sure to also change the configuration of the pocket node (`config` section above) ports: # -- consensus port of the node consensus: 42069 @@ -178,6 +180,10 @@ service: # -- OpenTelemetry metrics port of the node metrics: 9000 +serviceMonitor: + # -- enable service monitor + enabled: false + ingress: # -- enable ingress for RPC port enabled: false