From 187cd071ea78cdd701f7a6a667a7db75481a499c Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Mon, 18 Nov 2024 07:33:43 -0800 Subject: [PATCH 1/4] api: Remove VERSIONING.md (#1144) It hasn't been updated for a while, and doesn't seem to be useful. --- pkg/api/VERSIONING.md | 135 ------------------------------------------ pkg/api/types.go | 4 -- pkg/plugin/run.go | 2 - 3 files changed, 141 deletions(-) delete mode 100644 pkg/api/VERSIONING.md diff --git a/pkg/api/VERSIONING.md b/pkg/api/VERSIONING.md deleted file mode 100644 index e32008142..000000000 --- a/pkg/api/VERSIONING.md +++ /dev/null @@ -1,135 +0,0 @@ -# API version compatibility - -This file exists to make it easy to answer the following questions: - -1. Which protocol versions does each component support? -2. Which releases of a component support a given protocol version? - -The table below should provide the necessary information. For each release, it gives the range of -supported protocol versions by each component. The topmost line - "Current" - refers to the latest -commit in this repository, possibly unreleased. - -## agent<->monitor protocol - -Note: For v0.17.0 and below, the autoscaler-agent additionally had support for the vm-informant by -first checking if the /register endpoint returned a 404. - -| Release | autoscaler-agent | VM monitor | -|---------|------------------|------------| -| _Current_ | v1.0 only | v1.0 only | -| v0.28.0 | v1.0 only | v1.0 only | -| v0.27.0 | v1.0 only | v1.0 only | -| v0.26.0 | v1.0 only | v1.0 only | -| v0.25.0 | v1.0 only | v1.0 only | -| v0.24.0 | v1.0 only | v1.0 only | -| v0.23.0 | v1.0 only | v1.0 only | -| v0.22.0 | v1.0 only | v1.0 only | -| v0.21.0 | v1.0 only | v1.0 only | -| v0.20.0 | v1.0 only | v1.0 only | -| v0.19.0 | v1.0 only | v1.0 only | -| v0.18.0 | v1.0 only | v1.0 only | -| v0.17.0 | v1.0 only | v1.0 only | -| v0.16.0 | v1.0 only | v1.0 only | -| v0.15.0 | **v1.0** only | **v1.0** only | - -## agent<->scheduler plugin protocol - -Note: Components v0.1.7 and below did not have a versioned protocol between the agent and scheduler -plugin. We've marked those as protocol version v0.0. Scheduler plugin v0.1.7 implicitly supports -v1.0 because the only change from v0.0 to v1.0 is having the scheduler plugin check the version -number. - -| Release | autoscaler-agent | Scheduler plugin | -|---------|------------------|------------------| -| _Current_ | v5.0 only | v3.0-v5.0 | -| v0.28.0 | **v5.0** only | **v3.0-v5.0** | -| v0.27.0 | v4.0 only | v3.0-v4.0 | -| v0.26.0 | v4.0 only | **v3.0-v4.0** | -| v0.25.0 | v4.0 only | v1.0-v4.0 | -| v0.24.0 | v4.0 only | v1.0-v4.0 | -| v0.23.0 | **v4.0 only** | **v1.0-v4.0** | -| v0.22.0 | **v3.0 only** | **v1.0-v3.0** | -| v0.21.0 | v2.1 only | v1.0-v2.1 | -| v0.20.0 | **v2.1 only** | **v1.0-v2.1** | -| v0.19.0 | v2.0 only | v1.0-v2.0 | -| v0.18.0 | v2.0 only | v1.0-v2.0 | -| v0.17.0 | v2.0 only | v1.0-v2.0 | -| v0.16.0 | v2.0 only | v1.0-v2.0 | -| v0.15.0 | v2.0 only | v1.0-v2.0 | -| v0.14.2 | v2.0 only | v1.0-v2.0 | -| v0.14.1 | v2.0 only | v1.0-v2.0 | -| v0.14.0 | v2.0 only | v1.0-v2.0 | -| v0.13.3 | v2.0 only | v1.0-v2.0 | -| v0.13.2 | v2.0 only | v1.0-v2.0 | -| v0.13.1 | v2.0 only | v1.0-v2.0 | -| v0.13.0 | v2.0 only | v1.0-v2.0 | -| v0.12.2 | v2.0 only | v1.0-v2.0 | -| v0.12.1 | v2.0 only | v1.0-v2.0 | -| v0.12.0 | v2.0 only | v1.0-v2.0 | -| v0.11.0 | v2.0 only | v1.0-v2.0 | -| v0.10.0 | v2.0 only | v1.0-v2.0 | -| v0.9.0 | v2.0 only | v1.0-v2.0 | -| v0.8.0 | v2.0 only | v1.0-v2.0 | -| v0.7.2 | v2.0 only | v1.0-v2.0 | -| v0.7.1 | v2.0 only | v1.0-v2.0 | -| v0.7.0 | **v2.0** only | **v1.0-v2.0** | -| v0.6.0 | v1.1 only | v1.0-v1.1 | -| v0.5.2 | v1.1 only | v1.0-v1.1 | -| v0.5.1 | v1.1 only | v1.0-v1.1 | -| v0.5.0 | v1.1 only | v1.0-v1.1 | -| v0.1.17 | v1.1 only | v1.0-v1.1 | -| v0.1.16 | v1.1 only | v1.0-v1.1 | -| v0.1.15 | v1.1 only | v1.0-v1.1 | -| v0.1.14 | v1.1 only | v1.0-v1.1 | -| v0.1.13 | v1.1 only | v1.0-v1.1 | -| v0.1.12 | v1.1 only | v1.0-v1.1 | -| v0.1.11 | v1.1 only | v1.0-v1.1 | -| v0.1.10 | v1.1 only | v1.0-v1.1 | -| v0.1.9 | **v1.1** only | **v1.0-v1.1** | -| v0.1.8 | **v1.0** only | **v1.0** only | -| v0.1.7 | v0.0 only | **v0.0-v1.0** | -| v0.1.6 | v0.0 only | v0.0 only | -| v0.1.5 | v0.0 only | v0.0 only | -| v0.1.4 | v0.0 only | v0.0 only | -| v0.1.3 | v0.0 only | v0.0 only | -| 0.1.2 | v0.0 only | v0.0 only | -| 0.1.1 | v0.0 only | v0.0 only | -| 0.1.0 | **v0.0** only | **v0.0** only | - -## controller<->runner protocol - -Note: Components v0.6.0 and below did not have a versioned protocol between the controller and the runner. -| Release | controller | runner | -|---------|------------|--------| -| _Current_ | 1 | 1 | -| v0.28.0 | **1** | 1 | -| v0.27.0 | 0 - 1 | 1 | -| v0.26.0 | 0 - 1 | 1 | -| v0.25.0 | 0 - 1 | 1 | -| v0.24.0 | 0 - 1 | 1 | -| v0.23.0 | 0 - 1 | 1 | -| v0.22.0 | 0 - 1 | 1 | -| v0.21.0 | 0 - 1 | 1 | -| v0.20.0 | 0 - 1 | 1 | -| v0.19.0 | 0 - 1 | 1 | -| v0.18.0 | 0 - 1 | 1 | -| v0.17.0 | 0 - 1 | 1 | -| v0.16.0 | 0 - 1 | 1 | -| v0.15.0 | 0 - 1 | 1 | -| v0.14.2 | 0 - 1 | 1 | -| v0.14.1 | 0 - 1 | 1 | -| v0.14.0 | 0 - 1 | 1 | -| v0.13.3 | 0 - 1 | 1 | -| v0.13.2 | 0 - 1 | 1 | -| v0.13.1 | 0 - 1 | 1 | -| v0.13.0 | 0 - 1 | 1 | -| v0.12.2 | 0 - 1 | 1 | -| v0.12.1 | 0 - 1 | 1 | -| v0.12.0 | 0 - 1 | 1 | -| v0.11.0 | 0 - 1 | 1 | -| v0.10.0 | 0 - 1 | 1 | -| v0.9.0 | 0 - 1 | 1 | -| v0.8.0 | 0 - 1 | 1 | -| v0.7.2 | 0 - 1 | 1 | -| v0.7.1 | 0 - 1 | 1 | -| v0.7.0 | 0 - 1 | 1 | diff --git a/pkg/api/types.go b/pkg/api/types.go index d71570def..8261ad88b 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -24,8 +24,6 @@ import ( // repository containing this code. Names follow semver, although this does not necessarily // guarantee support - for example, the plugin may only support a single version, even though others // may appear to be semver-compatible. -// -// Version compatibility is documented in the neighboring file VERSIONING.md. type PluginProtoVersion uint32 const ( @@ -595,8 +593,6 @@ func SerializeMonitorMessage(content any, id uint64) ([]byte, error) { // Each version of the agent<->monitor protocol is named independently from releases of the // repository containing this code. Names follow semver, although this does not necessarily // guarantee support - for example, the monitor may only support versions above v1.1. -// -// Version compatibility is documented in the neighboring file VERSIONING.md. type MonitorProtoVersion uint32 const ( diff --git a/pkg/plugin/run.go b/pkg/plugin/run.go index cf1277fd0..4a319a867 100644 --- a/pkg/plugin/run.go +++ b/pkg/plugin/run.go @@ -24,8 +24,6 @@ const ( ) // The scheduler plugin currently supports v3.0 to v5.0 of the agent<->scheduler plugin protocol. -// -// If you update either of these values, make sure to also update VERSIONING.md. const ( MinPluginProtocolVersion api.PluginProtoVersion = api.PluginProtoV3_0 MaxPluginProtocolVersion api.PluginProtoVersion = api.PluginProtoV5_0 From ff1126e1c02f78c93248b5e3ed19a1cd5e2537a1 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Mon, 18 Nov 2024 08:29:47 -0800 Subject: [PATCH 2/4] api: Touch-up comments on AgentRequest (#1143) I think the comments were just never updated when the autoscaler-agent was moved from a per-pod sidecar to a per-node daemonset. Found this while working on some background work for #995. --- pkg/api/types.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/api/types.go b/pkg/api/types.go index 8261ad88b..296673fe4 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -172,7 +172,8 @@ func (v PluginProtoVersion) IncludesExtendedMetrics() bool { return v < PluginProtoV5_0 } -// AgentRequest is the type of message sent from an autoscaler-agent to the scheduler plugin +// AgentRequest is the type of message sent from an autoscaler-agent to the scheduler plugin on +// behalf of a Pod on the agent's node. // // All AgentRequests expect a PluginResponse. type AgentRequest struct { @@ -180,7 +181,8 @@ type AgentRequest struct { // // If the scheduler does not support this version, then it will respond with a 400 status. ProtoVersion PluginProtoVersion `json:"protoVersion"` - // Pod is the namespaced name of the pod making the request + // Pod is the namespaced name of the Pod that the autoscaler-agent is making the request on + // behalf of. Pod util.NamespacedName `json:"pod"` // ComputeUnit gives the value of the agent's configured compute unit to use for the VM. // From 9ac98e728dbad8fc7f85da6835673418d3dfb0a8 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Mon, 18 Nov 2024 16:11:52 -0800 Subject: [PATCH 3/4] scheduler: Shorten tolerations for node failure (#1146) Similar to what was done in #1055, we need to explicitly add tolerations to the scheduler to get it to be recreated more quickly on node failure. This is particularly necessary because we don't have #995. We could wait for that, but it's a lot of work, and this is a small thing we can do in the meantime. Fixes neondatabase/cloud#17298, part of neondatabase/cloud#14114. --- autoscale-scheduler/deployment.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/autoscale-scheduler/deployment.yaml b/autoscale-scheduler/deployment.yaml index 409002c59..d4f7dc9e2 100644 --- a/autoscale-scheduler/deployment.yaml +++ b/autoscale-scheduler/deployment.yaml @@ -63,3 +63,13 @@ spec: - name: plugin-config-volume configMap: name: scheduler-plugin-config + + tolerations: + # Add explicit (short) tolerations for node failure, because otherwise the default of 5m + # will be used, which is unacceptably long for us. + - key: node.kubernetes.io/not-ready + tolerationSeconds: 30 + effect: NoExecute + - key: node.kubernetes.io/unreachable + tolerationSeconds: 30 + effect: NoExecute From 3fab71f46ad36c7e0b98258d6ea4cca1791fb75a Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Mon, 18 Nov 2024 22:08:17 -0800 Subject: [PATCH 4/4] Makefile: Remove trailing whitespace (#1150) Saw this in a couple different PRs that haven't yet been merged, presumably from auto-formatting. Figured I'd open a PR for it so it stops showing up in diffs. Co-authored-by: Michael Francis Co-authored-by: Misha Sakhnov --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1b7a96e92..ac83eaf4c 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ PG16_DISK_TEST_IMG ?= pg16-disk-test:dev GOARCH ?= $(shell go env GOARCH) GOOS ?= $(shell go env GOOS) -# The target architecture for linux kernel. Possible values: amd64 or arm64. +# The target architecture for linux kernel. Possible values: amd64 or arm64. # Any other supported by linux kernel architecture could be added by introducing new build step into neonvm/hack/kernel/Dockerfile.kernel-builder KERNEL_TARGET_ARCH ?= amd64 @@ -141,7 +141,7 @@ build: vet bin/vm-builder ## Build all neonvm binaries. .PHONY: bin/vm-builder bin/vm-builder: ## Build vm-builder binary. - GOOS=linux CGO_ENABLED=0 go build -o bin/vm-builder -ldflags "-X main.Version=${GIT_INFO} -X main.NeonvmDaemonImage=${IMG_DAEMON}" vm-builder/main.go + GOOS=linux CGO_ENABLED=0 go build -o bin/vm-builder -ldflags "-X main.Version=${GIT_INFO} -X main.NeonvmDaemonImage=${IMG_DAEMON}" vm-builder/main.go .PHONY: run run: vet ## Run a controller from your host. go run ./neonvm/main.go