Skip to content

Implement scaling latency metrics through revisions #3081

Implement scaling latency metrics through revisions

Implement scaling latency metrics through revisions #3081

Workflow file for this run

name: e2e-test
on:
pull_request:
push:
branches:
- main
workflow_dispatch:
inputs:
kernel-image:
type: string
description: 'The kernel image to use for the VMs. If not specified, a kernel will be built from source'
required: false
cluster:
type: choice
description: 'The cluster to run the tests on'
options:
- k3d
- kind
default: k3d
workflow_call:
inputs:
tag:
type: string
description: 'Tag to use for images, skipping building'
required: false
push-yamls:
type: boolean
description: 'If true, pushes a tarball containing the rendered yaml manifests as an artifact'
required: false
env:
IMG_E2E_TEST: vm-postgres:15-bullseye
defaults:
run:
shell: bash -euo pipefail {0}
jobs:
get-tag:
outputs:
tag: ${{ inputs.tag || steps.get-tag.outputs.tag }}
runs-on: ubuntu-latest
steps:
- name: get tag
if: ${{ inputs.tag == '' }}
id: get-tag
env:
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
test -n "$SHA"
sha="${SHA::7}"
echo "tag=$sha.$GITHUB_RUN_ID" | tee -a $GITHUB_OUTPUT
build-images:
needs: get-tag
uses: ./.github/workflows/build-images.yaml
with:
skip: ${{ inputs.tag != '' }}
tag: ${{ inputs.tag || needs.get-tag.outputs.tag }}
kernel-image: ${{ inputs.kernel-image }}
# note: setting to preserve runner pods will mean that if !skip, they'll be built with those
# settings and used properly in the tests. But if skip (because inputs.tag != ''), then this
# setting will have no effect and the release images will be normal.
controller-preserve-runner-pods: true
secrets: inherit
build-test-vm:
needs: get-tag
uses: ./.github/workflows/build-test-vm.yaml
with:
skip: ${{ inputs.tag != '' }}
tag: ${{ inputs.tag || needs.get-tag.outputs.tag }}
secrets: inherit
e2e-tests:
needs: [ build-images, build-test-vm ]
strategy:
fail-fast: false
matrix:
cluster:
- ${{ inputs.cluster || 'k3d' }}
runs-on: [ self-hosted, gen3, large ]
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0 # fetch all, so that we also include tags
- uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
# Disable cache on self-hosted runners to avoid /usr/bin/tar errors, see https://github.com/actions/setup-go/issues/403
cache: false
# Sometimes setup-go gets stuck. Without this, it'll keep going until the job gets killed
timeout-minutes: 10
- name: Install dependencies
run: |
sudo apt install -y python3-venv
make e2e-tools
echo $(pwd)/bin >> $GITHUB_PATH
- name: Check dependencies
run: |
kubectl version --client --output=yaml
k3d version
kind version
kuttl version
docker version
- run: make render-release
env:
IMG_CONTROLLER: ${{ needs.build-images.outputs.controller }}
IMG_VXLAN_CONTROLLER: ${{ needs.build-images.outputs.vxlan-controller }}
IMG_RUNNER: ${{ needs.build-images.outputs.runner }}
IMG_SCHEDULER: ${{ needs.build-images.outputs.scheduler }}
IMG_AUTOSCALER_AGENT: ${{ needs.build-images.outputs.autoscaler-agent }}
- name: upload manifests
# nb: use format(..) to catch both inputs.push-yamls = true AND inputs.push-yamls = 'true'.
if: ${{ format('{0}', inputs.push-yamls) == 'true' }}
uses: actions/upload-artifact@v4
with:
name: rendered_manifests
# nb: prefix before wildcard is removed from the uploaded files, so the artifact should
# contain e.g.
# - autoscale-scheduler.yaml
# - autoscaler-agent.yaml
# ...
# ref https://github.com/actions/upload-artifact#upload-using-multiple-paths-and-exclusions
path: rendered_manifests/*
if-no-files-found: error
retention-days: 2 # minimum is 1 day; 0 is default. These are only used temporarily.
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
# The default value is ~/.docker
- name: set custom docker config directory
run: |
mkdir -p .docker-custom
echo DOCKER_CONFIG=$(pwd)/.docker-custom >> $GITHUB_ENV
- uses: docker/login-action@v2
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
# https://docs.k3s.io/installation/private-registry#registries-configuration-file
# https://github.com/neondatabase/autoscaling/issues/975
- name: set k3d registries.yaml
# TODO: Implement an equivalent for kind?
# Relevant docs seem to be here: https://kind.sigs.k8s.io/docs/user/private-registries
if: ${{ matrix.cluster == 'k3d' }}
env:
DOCKERHUB_USERNAME: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
DOCKERHUB_PASSWORD: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
run: |
{
echo "configs:"
echo " registry-1.docker.io:"
echo " auth:"
echo " username: $DOCKERHUB_USERNAME"
echo " password: $DOCKERHUB_PASSWORD"
} >> $(pwd)/k3d/registries.yaml
- run: make ${{ matrix.cluster }}-setup
env:
USE_REGISTRIES_FILE: true
- name: deploy components
timeout-minutes: 3
run: |
rendered () { echo "rendered_manifests/$1"; }
kubectl apply -f $(rendered multus.yaml)
kubectl -n kube-system rollout status daemonset kube-multus-ds
kubectl apply -f $(rendered whereabouts.yaml)
kubectl -n kube-system rollout status daemonset whereabouts
kubectl apply -f $(rendered neonvm-runner-image-loader.yaml)
kubectl -n neonvm-system rollout status daemonset neonvm-runner-image-loader
kubectl apply -f $(rendered neonvm.yaml)
kubectl -n neonvm-system rollout status daemonset neonvm-device-plugin
kubectl -n neonvm-system rollout status daemonset neonvm-vxlan-controller
kubectl -n neonvm-system rollout status deployment neonvm-controller
kubectl apply -f $(rendered autoscale-scheduler.yaml)
kubectl -n kube-system rollout status deployment autoscale-scheduler
kubectl apply -f $(rendered autoscaler-agent.yaml)
kubectl -n kube-system rollout status daemonset autoscaler-agent
- name: load e2e test vm image
env:
TEST_IMAGE: ${{ needs.build-test-vm.outputs.vm-postgres-16-bullseye }}
timeout-minutes: 2
run: |
# Pull the docker image so we can re-tag it, because using a consistent tag inside the
# cluster means we can avoid dynamically editing the image used in the kuttl files.
docker pull "$TEST_IMAGE"
docker image tag "$TEST_IMAGE" "$IMG_E2E_TEST"
make load-example-vms
- run: make e2e
timeout-minutes: 15
- name: Get k8s logs and events
if: always()
run: |
if ! kubectl config current-context; then
echo "skipping cluster logs because no cluster found in kubectl context"
exit 0
fi
namespaces=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}')
for namespace in $namespaces; do
if [[ "$namespace" == "neonvm-system" ]] || [[ "$namespace" == kuttl-test-* ]]; then
tee_if_needed=$GITHUB_STEP_SUMMARY
else
tee_if_needed=/dev/null
fi
{
echo "<details>"
echo "<summary>Namespace=$namespace</summary>"
} | tee -a $tee_if_needed
pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}')
for pod in $pods; do
{
echo "<details>"
echo "<summary>- Namespace=$namespace Pod=$pod Logs</summary>"
echo "<pre>"
} | tee -a $tee_if_needed
restarts=$(
kubectl get pod -n $namespace $pod -o jsonpath='{.status.containerStatuses[0].restartCount}' || echo '0'
)
{
if [ "$restarts" -ne 0 ]; then
echo "CONTAINER RESTARTED $restarts TIME(S)"
echo "Previous logs:"
kubectl logs -n $namespace -p $pod || echo 'Error getting logs'
echo "Current logs:"
kubectl logs -n $namespace $pod || echo 'Error getting logs'
else
echo "Logs:"
kubectl logs -n $namespace $pod || echo 'Error getting logs'
fi
} | tee -a $tee_if_needed
{
echo "</pre>"
echo "</details>"
} | tee -a $tee_if_needed
{
echo "<details>"
echo "<summary>- Namespace=$namespace Pod=$pod Events</summary>"
echo "<pre>"
} | tee -a $tee_if_needed
(kubectl get events --namespace $namespace --field-selector involvedObject.name=$pod || echo 'Error getting events') | tee -a $tee_if_needed
{
echo "</pre>"
echo "</pre>"
echo "</details>"
} | tee -a $tee_if_needed
done
echo "</details>" | tee -a $tee_if_needed
done
- name: Cleanup
if: always()
run: make ${{ matrix.cluster }}-destroy