diff --git a/.github/ISSUE_TEMPLATE/bug-template.md b/.github/ISSUE_TEMPLATE/bug-template.md new file mode 100644 index 000000000..cd6c7d48c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-template.md @@ -0,0 +1,46 @@ +--- +name: Bug template +about: Template that will help you to submit a visible and actionable bug report. +title: 'Bug: ' +labels: t/bug +assignees: '' + +--- + +## Environment + + + +## Steps to reproduce + + + +## Expected result + + + +## Actual result + + + +## Other logs, links + +- ... diff --git a/.github/ISSUE_TEMPLATE/epic-template.md b/.github/ISSUE_TEMPLATE/epic-template.md new file mode 100644 index 000000000..5f666b0a7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/epic-template.md @@ -0,0 +1,51 @@ +--- +name: Epic Template +about: A set of related tasks contributing towards specific outcome, comprising of + more than 1 week of work. +title: 'Epic: ' +labels: t/Epic +assignees: '' + +--- + +## Motivation + + + +## DoD + + + + +## Implementation ideas + + + +TODO + + +## Tasks + +```[tasklist] +- [ ] ... +- [ ] List tasks as they're created for this Epic +``` + + +## Other related tasks, Epics, and links + +- diff --git a/.github/ISSUE_TEMPLATE/feature-request-template.md b/.github/ISSUE_TEMPLATE/feature-request-template.md new file mode 100644 index 000000000..5b4142de7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request-template.md @@ -0,0 +1,37 @@ +--- +name: Feature request template +about: Template that will help you to submit a visible and actionable feature request. +title: 'Feature: ' +labels: t/feature +assignees: '' + +--- + +## Problem description / Motivation + + + +## Feature idea(s) / DoD + + + +## Implementation ideas + + diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index 71130df80..52417781c 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -8,6 +8,7 @@ on: jobs: e2e-tests: strategy: + fail-fast: false matrix: cluster: - k3d @@ -64,8 +65,11 @@ jobs: - run: make ${{ matrix.cluster }}-setup - run: make deploy + timeout-minutes: 10 - run: make example-vms + timeout-minutes: 10 - run: make e2e + timeout-minutes: 15 - name: Get k8s logs and events if: always() diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b9e07ab15..b6f4fd6f8 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -7,7 +7,8 @@ on: env: AGENT_IMAGE: "neondatabase/autoscaler-agent" SCHED_IMAGE: "neondatabase/autoscale-scheduler" - INFORMANT_IMAGE: "neondatabase/vm-informant" + MONITOR_IMAGE: "neondatabase/vm-monitor" + VM_MONITOR_BRANCH: "main" KUSTOMIZE_VERSION: "4.5.7" CONTROLLER_TOOLS_VERSION: "0.10.0" @@ -50,7 +51,10 @@ jobs: run: go build ./... - name: build binaries - run: make VM_INFORMANT_IMG=${{ env.INFORMANT_IMAGE }}:${{ steps.get_vcs_info.outputs.version }} build + run: | + make \ + VM_MONITOR_IMG=${{ env.MONITOR_IMAGE }}:${{ steps.get_vcs_info.outputs.version }} \ + build - name: docker - install qemu uses: docker/setup-qemu-action@v2 @@ -119,16 +123,17 @@ jobs: build-args: | GIT_INFO=${{ steps.get_vcs_info.outputs.git_info }} - - name: build and push vm-informant image + - name: build and push vm-monitor image uses: docker/build-push-action@v3 with: context: . platforms: linux/amd64 push: true - file: build/vm-informant/Dockerfile - tags: ${{ env.INFORMANT_IMAGE }}:${{ steps.get_vcs_info.outputs.version }} + file: build/vm-monitor/Dockerfile + tags: ${{ env.MONITOR_IMAGE }}:${{ steps.get_vcs_info.outputs.version }} build-args: | GIT_INFO=${{ steps.get_vcs_info.outputs.git_info }} + BRANCH=${{ env.VM_MONITOR_BRANCH }} - name: render kubernetes resources uses: stefanprodan/kube-tools@v1 @@ -149,12 +154,12 @@ jobs: kustomize build deploy/scheduler > rendered_manifests/autoscale-scheduler.yaml kustomize build deploy/agent > rendered_manifests/autoscaler-agent.yaml - # Because we want a docker image for the VM informant, the easiest way for us to also provide + # Because we want a docker image for the VM monitor, the easiest way for us to also provide # a binary is by just extracting it from the container image itself. - - name: extract vm-informant binary + - name: extract vm-monitor binary run: | - ID=$(docker create ${{ env.INFORMANT_IMAGE }}:${{ steps.get_vcs_info.outputs.version }}) - docker cp $ID:/usr/bin/vm-informant bin/vm-informant + ID=$(docker create ${{ env.MONITOR_IMAGE }}:${{ steps.get_vcs_info.outputs.version }}) + docker cp $ID:/usr/bin/vm-monitor bin/vm-monitor docker rm -f $ID - name: build and push cluster-autoscaler image @@ -173,10 +178,11 @@ jobs: files: | bin/vm-builder bin/vm-builder-generic - bin/vm-informant + bin/vm-monitor rendered_manifests/autoscale-scheduler.yaml rendered_manifests/autoscaler-agent.yaml rendered_manifests/neonvm.yaml rendered_manifests/multus.yaml rendered_manifests/multus-eks.yaml rendered_manifests/whereabouts.yaml + deploy/vmscrape.yaml diff --git a/.golangci.yml b/.golangci.yml index 40fa640ae..523d877c4 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -9,7 +9,7 @@ run: issues: exclude: # ChanMutex contains only a channel, which *is* safe to copy - - 'copylocks: return copies lock value: github\.com/neondatabase/autoscaling/pkg/util\.ChanMutex' + - 'copylocks: .* copies lock value.*: github\.com/neondatabase/autoscaling/pkg/util\.ChanMutex' output: format: colored-line-number @@ -54,8 +54,8 @@ linters-settings: exclude: - '^net/http\.(Client|Server)' - '^net\.TCPAddr$' - # metav1.{CreateOptions,GetOptions,ListOptions,WatchOptions,PatchOptions} - - '^k8s\.io/apimachinery/pkg/apis/meta/v1\.(Create|Get|List|Watch|Patch)Options$' + # metav1.{CreateOptions,GetOptions,ListOptions,WatchOptions,PatchOptions,DeleteOptions} + - '^k8s\.io/apimachinery/pkg/apis/meta/v1\.(Create|Get|List|Watch|Patch|Delete)Options$' - '^k8s\.io/apimachinery/pkg/apis/meta/v1\.ObjectMeta$' - '^k8s\.io/apimachinery/pkg/api/resource\.Quantity$' - '^github.com/prometheus/client_golang/prometheus(/.*)?\.\w+Opts$' diff --git a/ARCHITECTURE-network-diagram.org b/ARCHITECTURE-network-diagram.org index 5f836c820..9c565a8a8 100644 --- a/ARCHITECTURE-network-diagram.org +++ b/ARCHITECTURE-network-diagram.org @@ -32,29 +32,29 @@ awk '/#\+BEGIN_SRC/{flag=1;next}/#\+END_SRC/{flag=0}flag' ARCHITECTURE-network-d | +---------| autoscaler agent | | | | | | (one per K8s node) | - | +-----------------*--+ - | | | ^ random port - | | | | (per VM) - | | | | -+=================|==================================|===========|===|======+ -: K8s pod | | | | : -: QMP | | | | : -: 20183 V | | | : -: +---------------*----------------------------------|-----------|---|---+ : -: | | | | | : -: | QEMU process | | | | : -: | | | | | : -: | | | | | : -: | compute_ctl postgres metrics | informant | | | : -: | mgmt API postgres prometheus | informant | | | : -: | 3080 5432 9100 V 10301 V | | : + | +--------------------+ + | | | + | | | + | | | ++=================|==================================|===========|==========+ +: K8s pod | | | : +: QMP | | | : +: 20183 V | | : +: +---------------*----------------------------------|-----------|-------+ : +: | | | | : +: | QEMU process | | | : +: | | | | : +: | | | | : +: | compute_ctl postgres metrics | monitor | | : +: | mgmt API postgres prometheus | websocket | | : +: | 3080 5432 9100 V 10301 V | : : +------------------------*-----------*-------------*-----------*-------+ : : | VM | : : | | : : | Inside the VM runs: | : : | - compute_ctl (listens on port 3080) | : +: | - VM monitor (port 10301 via websocket) | : : | - Postgres (port 5432) | : -: | - VM informant (port 10301) | : : | - vector (metrics on port 9100) | : : | | : : +----------------------------------------------------------------------+ : diff --git a/ARCHITECTURE-network-diagram.png b/ARCHITECTURE-network-diagram.png index 81c367cc9..e58c91c98 100644 Binary files a/ARCHITECTURE-network-diagram.png and b/ARCHITECTURE-network-diagram.png differ diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 44ab79110..337c5a2e5 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -18,7 +18,7 @@ This document should be up-to-date. If it isn't, that's a mistake (open an issue * [Agent-Scheduler protocol steps](#agent-scheduler-protocol-steps) * [Node pressure and watermarks](#node-pressure-and-watermarks) * [High-level consequences of the Agent-Scheduler protocol](#high-level-consequences-of-the-agent-scheduler-protocol) -* [Agent-Informant protocol details](#agent-informant-protocol-details) +* [Agent-Monitor protocol details](#agent-monitor-protocol-details) * [Footguns](#footguns) ## See also @@ -27,32 +27,38 @@ This isn't the only architecture document. You may also want to look at: * [`pkg/plugin/ARCHITECTURE.md`](pkg/plugin/ARCHITECTURE.md) — detail on the implementation of the scheduler plugin +* [`neondatabase/vm-monitor`](https://github.com/neondatabase/vm-monitor) - +where the (VM) monitor, an autoscaling component that manages a Postgres, lives. ## High-level overview -At a high level, this repository provides three components: +At a high level, this repository provides two components: 1. A modified Kubernetes scheduler (using the [plugin interface]) — known as "the (scheduler) - plugin", `AutoscaleEnforcer`, `autscale-scheduler` + plugin", `AutoscaleEnforcer`, `autoscale-scheduler` 2. A daemonset responsible for making VM scaling decisions & checking with interested parties — known as `autoscaler-agent` or simply `agent` -3. A binary running inside of the VM to (a) provide metrics to the `autoscaler-agent`, (b) validate - that downscaling is ok, and (c) request immediate upscaling due to sharp changes in demand — - known as "the (VM) informant" + +A third component, a binary running inside of the VM to (a) handle being upscaled +(b) validate that downscaling is ok, and (c) request immediate upscaling due to sharp changes in demand +— known as "the (VM) monitor", lives in +[`neondatabase/vm-monitor`](https://github.com/neondatabase/vm-monitor) [plugin interface]: https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/ The scheduler plugin is responsible for handling resource requests from the `autoscaler-agent`, capping increases so that node resources aren't overcommitted. -The `autoscaler-agent` periodically reads from a metrics source in the VM (defined by the -_informant_) and makes scaling decisions about the _desired_ resource allocation. It then -requests these resources from the scheduler plugin, and submits a patch request for its NeonVM to -update the resources. +The `autoscaler-agent` periodically reads from a metrics source in the VM (currently vector's +`node_exporter`-like functionality) and makes scaling decisions about the _desired_ resource +allocation. It then requests these resources from the scheduler plugin, and submits a patch request +for its NeonVM to update the resources. -The VM informant provides is responsible for handling all of the functionality inside the VM that -the `autoscaler-agent` cannot. It provides metrics (or: informs the agent where it can find those) -and approves attempts to downscale resource usage (or: rejects them, if they're still in use). +The VM monitor is responsible for handling all of the resource management functionality inside +the VM that the `autoscaler-agent` cannot. This constitutes handling upscales (eg. increasing Postgres +file cache size), approving attempts to downscale resource usage (or: rejecting them, if those +resources are still in use), and requesting upscale when memory usage increases too rapidly for +metrics to catch. NeonVM is able to live-scale the resources given to a VM (i.e. CPU and memory _slots_) by handling patches to the Kubernetes VM object, which requires connecting to QEMU running on the outer @@ -66,15 +72,15 @@ discussed more in the [high-level consequences] section below. ## Network connections between components -![Diagram of network connections between the components listed above, in addition to the kubernetes API and Neon compute node. Directed arrows indicate which component initiates each TCP connection](ARCHITECTURE-network-diagram.png) +![Diagram of network connections between the components listed above, in addition to the Kubernetes API and Neon compute node. Directed arrows indicate which component initiates each TCP connection](ARCHITECTURE-network-diagram.png) [Diagram source](ARCHITECTURE-network-diagram.org) ## Repository structure * `build/` — scripts for building the scheduler (`autoscale-scheduler`) and `autoscaler-agent` * `cluster-autoscaler/` — patch and Dockerfile for building a NeonVM-compatible [cluster-autoscaler] -* `cmd/` — entrypoints for the `autoscaler-agent`, VM informant, and scheduler plugin. Very little - functionality implemented here. (See: `pkg/agent`, `pkg/informant`, and `pkg/plugin`) +* `cmd/` — entrypoints for the `autoscaler-agent` and scheduler plugin. Very little + functionality implemented here. (See: `pkg/agent` and `pkg/plugin`) * `deploy/` — YAML files used during cluster init. Of these, only the following two are manually written: * `deploy/autoscaler-agent.yaml` @@ -90,7 +96,6 @@ discussed more in the [high-level consequences] section below. independently used by multiple components. * `pkg/billing/` — consumption metrics API, primarily used in [`pkg/agent/billing.go`](pkg/agent/billing.go) - * `pkg/informant/` — implementation of the VM informant * `pkg/plugin/` — implementation of the scheduler plugin * `pkg/util/` — miscellaneous utilities that are too general to be included in `agent` or `plugin`. @@ -98,7 +103,7 @@ discussed more in the [high-level consequences] section below. * `scripts/patch-*.json` — patches for testing live-updating of a VM or config * `scripts/replace-scheduler.sh` — replaces the currently running scheduler, for quick redeploy * `scripts/repeat-delete-scheduler.sh` — repeatedly deletes the scheduler (which will be - recreated by the depoyment). For debugging. + recreated by the deployment). For debugging. * `scripts/run-bench.sh` — starts a CPU-intensive pgbench connected to a VM. Useful to watch the TPS and get confirmation that autoscaled CPUs are being used. * `scripts/scheduler-logs.sh` — convenience script to tail the scheduler's logs @@ -111,7 +116,7 @@ discussed more in the [high-level consequences] section below. * `vm-examples/` — collection of VMs: * `pg14-disk-test/` — VM with Postgres 14 and and ssh access * Refer to [`vm-examples/pg14-disk-test/README.md`](./vm-examples/pg14-disk-test) for more information. - * `postgres-minimal/` — Minimal postgres 15 VM used in e2e tests + * `postgres-minimal/` — Minimal Postgres 15 VM used in e2e tests [cluster-autoscaler]: https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler @@ -132,7 +137,7 @@ on each node, the scheduler can prevent ### Agent-Scheduler protocol steps -1. On startup (for a particular VM), the `autoscaler-agent` [connects to the VM informant] and +1. On startup (for a particular VM), the `autoscaler-agent` [connects to the VM monitor] and fetches some initial metrics. 2. After successfully receiving a response, the autoscaler-agent sends an `AgentRequest` with the metrics and current resource allocation (i.e. it does not request any scaling). @@ -159,7 +164,7 @@ on each node, the scheduler can prevent that scales those resources up. * This has the same connection flow as the earlier patch request. -[connects to the VM informant]: #agent-informant-protocol-details +[connects to the VM monitor]: #agent-monitor-protocol-details ### Node pressure and watermarks @@ -192,72 +197,48 @@ than the amount of pressure already accounted for. cause the scheduler to return `Permit`s that aren't a clean multiple of a compute unit. (e.g., nodes have mismatched memory vs CPU, or external pods / system reserved are mismatched) -## Agent-Informant protocol details - -A brief note before we get started: There are a lot of specific difficulties around making sure that -the informant is always talking to _some_ agent — ideally the most recent one. While _in theory_ -there should only ever be one, we might have `n=0` or `n>1` during rollouts of new versions. Our -process for handling this is not discussed here — this section only covers the communciations -between a single agent and informant. - -The relevant types for the agent-informant protocol are all in [`pkg/api/types.go`]. If using this -as a reference, it may be helpful to have that file open at the same time. - -[`pkg/api/types.go`]: ./pkg/api/types.go - -It may also be worth noting that this protocol is versioned. For an overview of version -compatibility and how it relates to releases of this repository, refer to -[`pkg/api/VERSIONING.md`](./pkg/api/VERSIONING.md). - -The protocol is as follows: - -1. On startup, the VM informant starts an HTTP server listening on `0.0.0.0:10301`. -2. On startup for this VM, the `autoscaler-agent` starts an HTTP server listening _some_ port -3. The agent sends an `AgentDesc` to the informant as a POST request on the `/register` endpoint. - Before responding: - 1. If the informant has already registered an agent with the same `AgentDesc.AgentID`, it - immediately responds with HTTP code 409. - 2. If the informant's protocol version doesn't match the `AgentDesc`'s min/max protocol - versions, it immediately responds with HTTP code 400. - 3. Using the provided `ServerAddr` from the agent's `AgentDesc`, the informant makes a GET - request on the agent's `/id` endpoint - 4. The agent responds immediately to the `/id` request with an `AgentMessage[AgentIdentification]`. - 5. If the agent's `AgentIdentification.AgentID` doesn't match the original `AgentDesc.AgentID`, - the informant responds with HTTP code 400. - 6. Otherwise, the informant responds with HTTP code 200, returning an `InformantDesc` describing - its capabilities and which protocol version to use. -4. Begin "normal operation". During this, there are a few types of requests made between the agent - and informant. Each party can make **only one request at a time**. The agent starts in the - "suspended" state. - 1. The informant's `/health-check` endpoint (via PUT), with `AgentIdentification`. This allows - the autoscaler-agent to check that the informant is up and running, and that it still - recognizes the agent. - 2. The informant's `/downscale` endpoint (via PUT), with `AgentResourceMessage`. This serves as the - agent _politely asking_ the informant to decrease resource usage to the specified amount. - The informant returns a `DownscaleResult` indicating whether it was able to downscale (it may - not, if e.g. memory usage is too high). - 3. The informant's `/upscale` endpoint (via PUT), with `AgentResourceMessage`. This serves as the agent - _notifying_ the informant that its resources have increased to the provided amount. - 4. The agent's `/suspend` endpoint (via POST), with `SuspendAgent`. This allows the informant to - inform the agent that it is no longer in use for the VM. While suspended, the agent **must - not** make any `downscale` or `upscale` requests. The informant **must not** double-suspend - an agent. - 5. The agent's `/resume` endpoint (via POST), with `ResumeAgent`. This allows the informant to - pick up communication with an agent that was previously suspended. The informant **must not** - double-resume an agent. - 6. The agent's `/id` endpoint (via GET) is also available during normal operation, and is used - as a health check by the informant. - 7. The agent's `/try-upscale` endpoint (via POST), with `MoreResources`. This allows the - informant to request more of a particular resource (e.g. memory). The agent MUST respond - immediately with an `AgentIdentification`. It MAY later send an `/upscale` request to the - informant once the requested increase in resources has been achieved. -5. If explicitly cut off, communication ends with the agent sending the original `AgentDesc` as a - DELETE request on the `/unregister` endpoint. The informant returns an `UnregisterAgent`. - -Broadly, agent<->informant connections are not expected to survive restarts of the informant (due -to failure, or otherwise). So, it is expected that *sometimes*, the informant will receive a request -for an agent that it has no connection to. When that happens, the informant MUST respond with HTTP -code 404, and the agent SHOULD try reconnecting. +## Agent-Monitor protocol details + +Agent-Monitor communication is carried out through a relatively simple _versioned_ protocol +over websocket. One party sends a message, the other responds. There are various +message types that each party sends, and all messages are annotated with an ID. +The allows a sender to recognize responses to its previous messages. If the +out-message has ID X, then the return message will also have ID X. + +Like the other protocols, relevant types are located in [`pkg/api/types.go`]. + +1. On startup, the VM monitor listens for websocket connections on `127.0.0.1:10369` +2. On startup, the agent connects to the monitor via websocket on `127.0.0.1:10369/monitor` +3. The agent then sends a `VersionRange[MonitorProtocolVersion]` with the range of + protocols it supports. +4. The monitor responds with the highest common version between the two. If there is no + compatible protocol, it returns an error. +5. From this point on, either party may initiate a transaction by sending a Message. +6. The other party responds with the appropriate message, with the same ID attached + so that the receiver knows it has received a response. + +Currently, the following interactions are supported: +``` +Monitor sends UpscaleRequest +Agent returns NotifyUpscale + +Agent sends TryDownscale +Monitor returns DownscaleResult + +Agent sends NotifyUpscale +Monitor returns UpscaleConfirmation + +Agent sends HealthCheck +Monitor returns HealthCheck +``` + +*Healthchecks*: the agent initiates a health check every 5 seconds. The monitor +simply returns with an ack. + +There are two additional messages types that either party may send: +- `InvalidMessage`: sent when either party fails to deserialize a message it received +- `InternalError`: used to indicate that an error occured while processing a request, + for example, if the monitor errors while trying to downscale ## Footguns diff --git a/LOGGING.md b/LOGGING.md index d56ffa8a4..ecfdfabb2 100644 --- a/LOGGING.md +++ b/LOGGING.md @@ -10,9 +10,8 @@ The following components have been updated to follow this document: - [x] autoscaler-agent - [x] autoscale-scheduler (scheduler plugin) -- [x] vm-informant - [ ] neonvm-controlller -- [ ] neonvm-runner +- [x] neonvm-runner ## Common keys @@ -48,9 +47,8 @@ the VM's resources, so they share a logger name (`agent.runner.main`). ## Logger naming conventions -- `component.*` — each component (e.g. "autoscaler-agent", "vm-informant", etc) has logger names +- `component.*` — each component (e.g. "autoscaler-agent") has logger names prefixed with the name of the component - - This is necessary so that compute logs can filter out the vm-informant. - `*.main` — if the bulk of the logic for something is in one straightforward loop (like `autoscaler-agent.runner.main`) - `*.klog` — for klog output that's been redirected diff --git a/Makefile b/Makefile index 8f352aeab..9e51582dc 100644 --- a/Makefile +++ b/Makefile @@ -6,10 +6,13 @@ IMG_VXLAN ?= vxlan-controller:dev # Autoscaler related images AUTOSCALER_SCHEDULER_IMG ?= autoscale-scheduler:dev AUTOSCALER_AGENT_IMG ?= autoscaler-agent:dev -VM_INFORMANT_IMG ?= vm-informant:dev +VM_MONITOR_IMG ?= vm-monitor:dev E2E_TESTS_VM_IMG ?= vm-postgres:15-bullseye PG14_DISK_TEST_IMG ?= pg14-disk-test:dev +# Which branch of neondatabase/neon to pull the vm-monitor from +VM_MONITOR_BRANCH ?= main + # kernel for guests VM_KERNEL_VERSION ?= "5.15.80" @@ -120,7 +123,7 @@ build: fmt vet bin/vm-builder bin/vm-builder-generic ## Build all neonvm binarie .PHONY: bin/vm-builder bin/vm-builder: ## Build vm-builder binary. - CGO_ENABLED=0 go build -o bin/vm-builder -ldflags "-X main.Version=${GIT_INFO} -X main.VMInformant=${VM_INFORMANT_IMG}" neonvm/tools/vm-builder/main.go + CGO_ENABLED=0 go build -o bin/vm-builder -ldflags "-X main.Version=${GIT_INFO} -X main.VMMonitor=${VM_MONITOR_IMG}" neonvm/tools/vm-builder/main.go .PHONY: bin/vm-builder-generic bin/vm-builder-generic: ## Build vm-builder-generic binary. @@ -130,20 +133,20 @@ bin/vm-builder-generic: ## Build vm-builder-generic binary. run: fmt vet ## Run a controller from your host. go run ./neonvm/main.go -.PHONY: vm-informant -vm-informant: ## Build vm-informant image +.PHONY: vm-monitor +vm-monitor: ## Build vm-monitor image docker buildx build \ - --tag $(VM_INFORMANT_IMG) \ + --tag $(VM_MONITOR_IMG) \ --load \ - --build-arg GIT_INFO=$(GIT_INFO) \ - --file build/vm-informant/Dockerfile \ + --build-arg BRANCH=$(VM_MONITOR_BRANCH) \ + --file build/vm-monitor/Dockerfile \ . # If you wish built the controller image targeting other platforms you can use the --platform flag. # (i.e. docker build --platform linux/arm64 ). However, you must enable docker buildKit for it. # More info: https://docs.docker.com/develop/develop-images/build_enhancements/ .PHONY: docker-build -docker-build: docker-build-controller docker-build-runner docker-build-vxlan-controller docker-build-autoscaler-agent docker-build-scheduler vm-informant ## Build docker images for NeonVM controllers, NeonVM runner, autoscaler-agent, and scheduler +docker-build: docker-build-controller docker-build-runner docker-build-vxlan-controller docker-build-autoscaler-agent docker-build-scheduler vm-monitor ## Build docker images for NeonVM controllers, NeonVM runner, autoscaler-agent, scheduler, vm-monitor .PHONY: docker-push docker-push: docker-build ## Push docker images to docker registry @@ -152,7 +155,7 @@ docker-push: docker-build ## Push docker images to docker registry docker push -q $(IMG_VXLAN) docker push -q $(AUTOSCALER_SCHEDULER_IMG) docker push -q $(AUTOSCALER_AGENT_IMG) - docker push -q $(VM_INFORMANT_IMG) + docker push -q $(VM_MONITOR_IMG) .PHONY: docker-build-controller docker-build-controller: ## Build docker image for NeonVM controller @@ -185,11 +188,11 @@ docker-build-scheduler: ## Build docker image for (autoscaling) scheduler . .PHONY: docker-build-examples -docker-build-examples: vm-informant bin/vm-builder ## Build docker images for testing VMs - ./bin/vm-builder -src postgres:15-bullseye -dst $(E2E_TESTS_VM_IMG) +docker-build-examples: bin/vm-builder ## Build docker images for testing VMs + ./bin/vm-builder -src postgres:15-bullseye -dst $(E2E_TESTS_VM_IMG) -enable-monitor .PHONY: docker-build-pg14-disk-test -docker-build-pg14-disk-test: vm-informant bin/vm-builder-generic ## Build a VM image for testing +docker-build-pg14-disk-test: vm-monitor bin/vm-builder-generic ## Build a VM image for testing if [ -a 'vm-examples/pg14-disk-test/ssh_id_rsa' ]; then \ echo "Skipping keygen because 'ssh_id_rsa' already exists"; \ else \ diff --git a/README.md b/README.md index e74c364f3..4f5e004e2 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,9 @@ Images are available as: |----------------|------------| | scheduler (and plugin) | `neondatabase/autoscale-scheduler` | | autoscaler-agent | `neondatabase/autoscaler-agent` | -| VM informant | `neondatabase/vm-informant` | +| VM monitor | `neondatabase/vm-monitor` | -The deployment files and a VM informant binary are attached to each release. +The deployment files and a VM monitor binary are attached to each release. For information on inter-version compatibility, see [`pkg/api/VERSIONING.md`](./pkg/api/VERSIONING.md). @@ -41,7 +41,7 @@ settled on the following: demand is above a pre-configured threshold * Each K8s node has an `autoscaler-agent` pod that triggers scaling decisions and makes resource requests to the K8s scheduler on the VMs' behalf to reserve additional resources for them -* Each VM runs the _VM informant_ binary, which communicates to the autoscaler-agent so that it can +* Each compute node runs the _VM monitor binary, which communicates to the autoscaler-agent so that it can immediately respond to memory pressure by allocating more (among other things). Networking is preserved across migrations by giving each VM an additional IP address on a bridge diff --git a/build/vm-informant/Dockerfile b/build/vm-informant/Dockerfile deleted file mode 100644 index bfa31e893..000000000 --- a/build/vm-informant/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -FROM golang:1.20-alpine AS builder -WORKDIR /workspace - -RUN apk add gcc musl-dev # gcc (and therefore musl-dev) is required for cgo extensions - -COPY go.mod go.mod -COPY go.sum go.sum -RUN go mod download - -COPY neonvm/apis neonvm/apis -COPY pkg/api pkg/api -COPY pkg/informant pkg/informant -COPY pkg/util pkg/util -COPY cmd/vm-informant cmd/vm-informant - -ARG GIT_INFO - -RUN --mount=type=cache,target=/root/.cache/go-build \ - go build -a \ - # future compat: don't modify go.mod if we have a vendor directory \ - -mod readonly \ - # -ldflags "-X ..." allows us to overwrite the value of a variable in a package \ - -ldflags "-X 'github.com/neondatabase/autoscaling/pkg/util.BuildGitInfo=$GIT_INFO'" \ - cmd/vm-informant/main.go - -FROM alpine -COPY --from=builder /workspace/main /usr/bin/vm-informant -ENTRYPOINT ["/usr/bin/vm-informant"] diff --git a/build/vm-monitor/Dockerfile b/build/vm-monitor/Dockerfile new file mode 100644 index 000000000..6e4b95052 --- /dev/null +++ b/build/vm-monitor/Dockerfile @@ -0,0 +1,21 @@ +FROM rust:1.70-alpine as builder +WORKDIR /workspace + +RUN apk add musl-dev git + +# Which branch to pull from +ARG BRANCH + +# Ensures we reclone upon new commits +# https://stackoverflow.com/questions/35134713 +ADD "https://api.github.com/repos/neondatabase/neon/commits/$BRANCH" latest_commit + +RUN git clone --depth 1 --branch $BRANCH https://github.com/neondatabase/neon.git +RUN cargo build --release --manifest-path neon/libs/vm_monitor/Cargo.toml +# Move binary so we can cargo clean +RUN mkdir -p /workspace/bin && cp /workspace/neon/target/release/vm-monitor /workspace/bin +# Cargo clean dramatically reduces the size of the image +RUN cargo clean --release --manifest-path neon/libs/vm_monitor/Cargo.toml + +FROM builder +COPY --from=builder /workspace/bin/vm-monitor /usr/bin/vm-monitor diff --git a/cmd/autoscale-scheduler/main.go b/cmd/autoscale-scheduler/main.go index c8bb5a5b1..0024a049c 100644 --- a/cmd/autoscale-scheduler/main.go +++ b/cmd/autoscale-scheduler/main.go @@ -23,7 +23,9 @@ import ( // all of the juicy bits are defined in pkg/plugin/ func main() { - logger := zap.Must(zap.NewProduction()).Named("autoscale-scheduler") + logConfig := zap.NewProductionConfig() + logConfig.Sampling = nil // Disable sampling, which the production config enables by default. + logger := zap.Must(logConfig.Build()).Named("autoscale-scheduler") logger.Info("", zap.Any("buildInfo", util.GetBuildInfo())) if err := runProgram(logger); err != nil { diff --git a/cmd/autoscaler-agent/main.go b/cmd/autoscaler-agent/main.go index 42997c537..c6292877a 100644 --- a/cmd/autoscaler-agent/main.go +++ b/cmd/autoscaler-agent/main.go @@ -21,7 +21,9 @@ import ( ) func main() { - logger := zap.Must(zap.NewProduction()).Named("autoscaler-agent") + logConfig := zap.NewProductionConfig() + logConfig.Sampling = nil // Disable sampling, which the production config enables by default. + logger := zap.Must(logConfig.Build()).Named("autoscaler-agent") defer logger.Sync() //nolint:errcheck // what are we gonna do, log something about it? logger.Info("", zap.Any("buildInfo", util.GetBuildInfo())) diff --git a/cmd/vm-informant/main.go b/cmd/vm-informant/main.go deleted file mode 100644 index 93b773e15..000000000 --- a/cmd/vm-informant/main.go +++ /dev/null @@ -1,242 +0,0 @@ -package main - -import ( - "context" - "flag" - "fmt" - "net/http" - "os" - "os/exec" - "os/signal" - "syscall" - "time" - - "github.com/containerd/cgroups/v3/cgroup2" - "github.com/tychoish/fun/srv" - "go.uber.org/zap" - - "github.com/neondatabase/autoscaling/pkg/informant" - "github.com/neondatabase/autoscaling/pkg/util" -) - -const minSubProcessRestartInterval = 5 * time.Second - -func main() { - logger := zap.Must(zap.NewProduction()).Named("vm-informant") - defer logger.Sync() //nolint:errcheck // what are we gonna do, log something about it? - - logger.Info("", zap.Any("buildInfo", util.GetBuildInfo())) - - ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM) - defer cancel() - ctx = srv.SetShutdownSignal(ctx) // allows workers to cause a shutdown - ctx = srv.WithOrchestrator(ctx) // creates and starts an orchestrator - ctx = srv.SetBaseContext(ctx) // sets a context for starting async work in request scopes - - orca := srv.GetOrchestrator(ctx) - - defer func() { - if err := orca.Service().Wait(); err != nil { - logger.Panic("Failed to shut down service", zap.Error(err)) - } - }() - - // Below, we want to be able to distinguish between absence of flags and presence of empty - // flags. The only way we can reliably do this is by setting defaults to a sentinel value that - // isn't possible to create otherwise. In this case, it's a string containing a null byte, which - // cannot be provided (due to C's null-terminated strings). - invalidArgValue := "\x00" - - var cgroupName string - var autoRestart bool - var pgConnStr string - flag.StringVar(&cgroupName, "cgroup", invalidArgValue, "Sets the cgroup to monitor (optional)") - flag.BoolVar(&autoRestart, "auto-restart", false, "Automatically cleanup and restart on failure or exit") - flag.StringVar(&pgConnStr, "pgconnstr", invalidArgValue, "Sets the postgres connection string to enable file cache (optional)") - - flag.Parse() - - // If we were asked to restart on failure, handle that separately: - if autoRestart { - logger = logger.Named("parent") - - var args []string - var cleanupHooks []func() - - if pgConnStr != invalidArgValue { - args = append(args, "-pgconnstr", pgConnStr) - } - if cgroupName != invalidArgValue { - args = append(args, "-cgroup", cgroupName) - cleanupHooks = append(cleanupHooks, func() { - logger.Info("cleanup hook: making sure cgroup is thawed", zap.String("cgroup", cgroupName)) - manager, err := cgroup2.Load(fmt.Sprint("/", cgroupName)) - if err != nil { - logger.Error("Error making cgroup handler", zap.Error(err)) - return - } - if err := manager.Thaw(); err != nil { - logger.Error("Error thawing cgroup", zap.Error(err)) - } - }) - } - - runRestartOnFailure(ctx, logger, args, cleanupHooks) - closer := srv.GetShutdownSignal(ctx) - // this cancels the process' underlying context - closer() - // this drops to the defer that waits for all services to shutdown - // will run now. - return - } - - var stateOpts []informant.NewStateOpts - - if cgroupName != invalidArgValue { - logger := logger.With(zap.String("cgroup", cgroupName)) - - cgroupConfig := informant.DefaultCgroupConfig - logger.Info("Selected cgroup, starting handler", zap.Any("config", cgroupConfig)) - cgroup, err := informant.NewCgroupManager(logger.Named("cgroup").Named("manager"), cgroupName) - if err != nil { - logger.Fatal("Error starting cgroup handler", zap.Error(err)) - } - - stateOpts = append(stateOpts, informant.WithCgroup(cgroup, cgroupConfig)) - } else { - logger.Info("No cgroup selected") - } - - if pgConnStr != invalidArgValue { - logger := logger.With(zap.String("fileCacheConnstr", pgConnStr)) - - fileCacheConfig := informant.DefaultFileCacheConfig - logger.Info("Selected postgres file cache", zap.Any("config", fileCacheConfig)) - stateOpts = append(stateOpts, informant.WithPostgresFileCache(pgConnStr, fileCacheConfig)) - } else { - logger.Info("No postgres file cache selected") - } - - agents := informant.NewAgentSet(logger) - state, err := informant.NewState(logger, agents, informant.DefaultStateConfig, stateOpts...) - if err != nil { - logger.Fatal("Error starting informant.NewState", zap.Error(err)) - } - - mux := http.NewServeMux() - hl := logger.Named("handle") - util.AddHandler(hl, mux, "/register", http.MethodPost, "AgentDesc", state.RegisterAgent) - util.AddHandler(hl, mux, "/health-check", http.MethodPut, "AgentIdentification", state.HealthCheck) - util.AddHandler(hl, mux, "/downscale", http.MethodPut, "AgentResourceMessage", state.TryDownscale) - util.AddHandler(hl, mux, "/upscale", http.MethodPut, "AgentResourceMessage", state.NotifyUpscale) - util.AddHandler(hl, mux, "/unregister", http.MethodDelete, "AgentDesc", state.UnregisterAgent) - - addr := "0.0.0.0:10301" - hl.Info("Starting server", zap.String("addr", addr)) - - // we create an http service and add it to the orchestrator, - // which will start it and manage its lifecycle. - if err := orca.Add(srv.HTTP("vm-informant-api", 5*time.Second, &http.Server{Addr: addr, Handler: mux})); err != nil { - logger.Fatal("Failed to add API server", zap.Error(err)) - } - - // we drop to the defers now, which will block until the signal - // handler is called. -} - -// runRestartOnFailure repeatedly calls this binary with the same flags, but with 'auto-restart' -// removed. -// -// We execute ourselves as a subprocess so that it's possible to appropriately cleanup after -// termination by various signals (or an unhandled panic!). This is worthwhile because we *really* -// don't want to leave the cgroup frozen while waiting to restart. -func runRestartOnFailure(ctx context.Context, logger *zap.Logger, args []string, cleanupHooks []func()) { - selfPath := os.Args[0] - timer := time.NewTimer(0) - defer timer.Stop() - - for { - startTime := time.Now() - sig := make(chan struct{}) - - func() { - pctx, pcancel := context.WithCancel(context.Background()) - defer pcancel() - - cmd := exec.Command(selfPath, args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - logger.Info("Starting child vm-informant", zap.Any("args", args)) - err := cmd.Start() - if err == nil { - go func() { - defer close(sig) - - select { - case <-pctx.Done(): - return - case <-ctx.Done(): - if pctx.Err() != nil { - // the process has already returned - // and we don't need to signal it - return - } - if err := cmd.Process.Signal(syscall.SIGTERM); err != nil { - logger.Warn("Could not signal child vm-informant process", zap.Error(err)) - } - } - }() - - // this is blocking, but we should - // have killed the process in the - // wait goroutine, or the process would - // return normally. - err = cmd.Wait() - // stop the goroutine above, as the - // process has already returned. - pcancel() - } - - if err != nil { - logger.Error("Child vm-informrant exited with error", zap.Error(err)) - } else { - logger.Warn("Child vm-informant exited without error. This should not happen") - } - - for _, h := range cleanupHooks { - h() - } - }() - - select { - case <-ctx.Done(): - logger.Info("Received shutdown signal") - return - case <-sig: - dur := time.Since(startTime) - if dur < minSubProcessRestartInterval { - // drain the timer before resetting it, required by Timer.Reset:: - if !timer.Stop() { - <-timer.C - } - timer.Reset(minSubProcessRestartInterval - dur) - - logger.Info( - "Child vm-informant failed, respecting minimum delay before restart", - zap.Duration("delay", minSubProcessRestartInterval), - ) - select { - case <-ctx.Done(): - logger.Info("Received shutdown signal while delaying before restart", zap.Duration("delay", minSubProcessRestartInterval)) - return - case <-timer.C: - continue - } - } - - logger.Info("Restarting child vm-informant immediately") - continue - } - } -} diff --git a/deploy/agent/config_map.yaml b/deploy/agent/config_map.yaml index 280f8d2b5..5f06d0ff8 100644 --- a/deploy/agent/config_map.yaml +++ b/deploy/agent/config_map.yaml @@ -9,23 +9,23 @@ data: "scaling": { "requestTimeoutSeconds": 10, "defaultConfig": { - "loadAverageFractionTarget": 0.9 + "loadAverageFractionTarget": 0.9, + "memoryUsageFractionTarget": 0.75 } }, - "informant": { - "serverPort": 10301, - "retryServerMinWaitSeconds": 5, - "retryServerNormalWaitSeconds": 5, - "retryDeniedDownscaleSeconds": 5, - "retryFailedRequestSeconds": 3, - "registerRetrySeconds": 5, - "requestTimeoutSeconds": 1, - "registerTimeoutSeconds": 2, - "downscaleTimeoutSeconds": 2, - "unhealthyAfterSilenceDurationSeconds": 20, - "unhealthyStartupGracePeriodSeconds": 20 + "monitor": { + "serverPort": 10301, + "responseTimeoutSeconds": 5, + "connectionTimeoutSeconds": 4, + "connectionRetryMinWaitSeconds": 5, + "unhealthyAfterSilenceDurationSeconds": 20, + "unhealthyStartupGracePeriodSeconds": 20, + "maxHealthCheckSequentialFailuresSeconds": 30, + "retryDeniedDownscaleSeconds": 5, + "retryFailedRequestSeconds": 3 }, "metrics": { + "port": 9100, "loadMetricPrefix": "host_", "requestTimeoutSeconds": 2, "secondsBetweenRequests": 5 diff --git a/deploy/scheduler/config_map.yaml b/deploy/scheduler/config_map.yaml index 075823f92..95c9378b5 100644 --- a/deploy/scheduler/config_map.yaml +++ b/deploy/scheduler/config_map.yaml @@ -27,9 +27,12 @@ data: { "memBlockSize": "1Gi", "nodeDefaults": { - "cpu": { "watermark": 0.7, "system": "500m" }, - "memory": { "watermark": 0.7, "system": "0.5Gi" }, - "computeUnit": { "vCPUs": 0.25, "mem": 1 } + "cpu": { "watermark": 0.9 }, + "memory": { "watermark": 0.9 }, + "computeUnit": { "vCPUs": 0.25, "mem": 1 }, + "minUsageScore": 0.5, + "maxUsageScore": 0, + "scorePeak": 0.8 }, "nodeOverrides": [], "schedulerName": "autoscale-scheduler", @@ -37,5 +40,7 @@ data: "port": 10298, "timeoutSeconds": 5 }, - "doMigration": false + "migrationDeletionRetrySeconds": 5, + "doMigration": false, + "randomizeScores": true } diff --git a/doc/vm-builder/README.md b/doc/vm-builder/README.md new file mode 100644 index 000000000..bcef96602 --- /dev/null +++ b/doc/vm-builder/README.md @@ -0,0 +1,189 @@ +This README covers non-trivial implementation details of vm-builder / vm-builder-generic. + +What `vm-builder` Does +====================== + +vm-builder consumes a Docker image and turns it into a new docker image that runs the Docker container in a qemu VM. +The OS in the VM is a minimal Alpine Linux / busybox environment. +We use busybox `init` as the init system, configured through `/etc/inittab`. +Likewise, the `poweroff` command is provided by busybox. + +We use a virtual CDROM to deliver the container launch command / entrypoint+arguments into the VM. +The script is called `vmstarter.sh`. +It is launched by the `vmstart` script which in turn is configured as a `respawn` service in the `inittab`. +After `vmstarter.sh` exits, `vmstart` exits, and then gets restarted by `respawn`. +This is a bit like docker in `--restart=always` mode. + +**Graceful shutdown** of the container-turned-VM is done through a virtual ACPI power button event. +`acpid` handles the ACPI events and we configure it to call the busybox `poweroff` command. + +Busybox Init & Shutdown +======================= + +The busybox `poweroff` command is integrated with the busybox `init` command as follows: + +0. Invoking busybox `poweroff` signals SIGUSR2 to the busybox `init` process. + The `init` process then does the following: +1. Stop waiting for child processes to exit, and stop restarting child + processes that are marked `respawn` in the inittab. +2. Run the `shutdown` directives in the inittab, in the order + in which they are specified. +3. Send SIGTERM to all processes. +4. Sleep 1 second. +5. (minor details omitted) +6. Call into kernel to poweroff. + +What follows are links to the busybox source code to "prove" the above. + +The `poweroff` command invoked by acpid is the busybox poweroff. +At runtime, we take the following branch: +https://github.com/brgl/busybox//blob/97e9a72c71d0238c9f241612ce4af923c16954c7/init/halt.c#L172-L173 +The `signals[which]` variable is `SIGUSR2` for the `poweroff` "applet". + +The code in `init` that handles `SIGUSR2` is the `check_delayed_signals` function that is called form inside `init`'s main loop. +Code taken at runtime when `poweroff` signals `SIGUSR2`: + +* main loop calls `check_delayed_signals`: https://github.com/brgl/busybox//blob/f35ad3bd1287627fc6ca7cc9c1f48b186257dd87/init/init.c#L1219 +* check_delayed_signals detects `SIGUSR2` was signalled and calls `halt_reboot_pwoff`, this call will never return: https://github.com/brgl/busybox//blob/f35ad3bd1287627fc6ca7cc9c1f48b186257dd87/init/init.c#L996-L1005 +* it calls `run_shutdown_and_kill_processes` https://github.com/brgl/busybox//blob/f35ad3bd1287627fc6ca7cc9c1f48b186257dd87/init/init.c#L821 +* Runs `shutdown` actions in the inittab: https://github.com/brgl/busybox//blob/f35ad3bd1287627fc6ca7cc9c1f48b186257dd87/init/init.c#L751-L754 +* SIGTERM, pause, SIGKILL (not relevant for as because we take down postgres & compute_ctl through the shutdown action added in this PR: https://github.com/brgl/busybox//blob/f35ad3bd1287627fc6ca7cc9c1f48b186257dd87/init/init.c#L758-L766 +* Log shutdown and call into kernel: https://github.com/brgl/busybox//blob/f35ad3bd1287627fc6ca7cc9c1f48b186257dd87/init/init.c#L832-L833 + + +The Role Of `vm-builder` in Neon Autoscaling +============================================ + +In Neon's autoscaling, we use `vm-builder` to turn the `neon.git` compute Docker image into a VM. +This means the `vmstarter.sh` will launch the `compute_ctl`, which in turn: +1. waits for a spec +2. gets basebackup from compute +3. launches Postgres +4. waits for Postgres to exit +5. does a sync safekeepers +6. exits itself. + +Neon Control Plane's `suspend_compute` relies on ACPI shutdown +signalling for graceful shutdown of the NeonVM. +If the NeonVM doesn't shut down timely, the pod that contains +the qemu process gets SIGKILLed. + +What Happens On ACPI Shutdown +============================= + +Here is a mermaid diagram of what happens during shutdown: + + +```mermaid +sequenceDiagram + + participant k8s + participant vmrunner + participant Qemu + participant GuestKernel + participant acpid + participant poweroff + participant init + participant vmstart + participant vmshutdown + participant vmstart.allowed + participant flock as flock vmstart.lock + participant vmstarter.sh + + + + GuestKernel->>init: start + init->>vmstart.allowed: create + activate vmstart.allowed + + init->>vmstart: start + vmstart->>+flock: flock + flock->>vmstart.allowed: check existence + vmstart.allowed->>flock: . + + flock->>+vmstarter.sh: start and wait + Note over vmstarter.sh: workload is running + Note over vmstarter.sh: exits for whatever reason + vmstarter.sh->>-flock: exits + flock->>-vmstart: exits + vmstart->>init: exits + + Note over init: has not yet received shutdown request + Note over init: so, respawn + + init->>vmstart: start + vmstart->>+flock: . + flock->>vmstart.allowed: check existence + vmstart.allowed->>flock: . + + flock->>+vmstarter.sh: start and wait + Note over vmstarter.sh: workload is running + + k8s-)vmrunner: SIGTERM + Note over k8s: will SIGKILL the container,
including QEMU after timeout + vmrunner->>Qemu: send ACPI power button event + Qemu-)GuestKernel: ACPI power button event + GuestKernel->>acpid: . + acpid->>poweroff: . + poweroff->>init: SIGUSR2 + Note over init: will no longer respawn
but also not stop anything either + Note over init: run shutdown actions + init->>vmshutdown: start + vmshutdown->>vmstart.allowed: unlink + deactivate vmstart.allowed + Note over vmstart.allowed: vmstart's existence check
will fail from here on + loop Until we win the flock + vmshutdown-xflock: nonblock try acquire fails + vmshutdown-)vmstarter.sh: signal to shut down + end + vmstarter.sh->>-flock: eventually exits in
response to signal + flock->>-vmstart: exits + vmshutdown->>+flock: nonblock try acquire succeeds + flock->>-vmshutdown: exit immediately + + Note over vmshutdown: we acquired the flock once after removing vmstart.allowed.
This ensures vmstarter.sh is not running. + + vmshutdown->>init: exit + + Note over init: SIGTERM + Note over init: sleep 1 second + Note over init: kill everything + init->>GuestKernel: power off system call + Note over GuestKernel: powers off the machine +``` + +## How It Looks Inside The VM + +In a neon.git-compute-image-turned-vm image, running in staging, it looks like this + +``` +ps -eHo pid,command | cat +... +/neonvm/bin/sh /neonvm/bin/vmstart + 149 flock /neonvm/vmstart.lock -c test -e /neonvm/vmstart.allowed && /neonvm/bin/su-exec postgres /neonvm/bin/sh /neonvm/bin/vmstarter.sh + 150 /bin/sh -c test -e /neonvm/vmstart.allowed && /neonvm/bin/su-exec postgres /neonvm/bin/sh /neonvm/bin/vmstarter.sh + 151 /neonvm/bin/sh /neonvm/bin/vmstarter.sh + 152 /usr/local/bin/compute_ctl -D /var/db/postgres/compute/pgdata -b /usr/local/bin/postgres -C postgresql://cloud_admin@127.0.0.1/postgres?options=-c%20default_transaction_read_only=false --remote-ext-config {"bucket":"neon-dev-extensions-us-east-2","region":"us-east-2"} --compute-id compute-long-flower-94034268 --control-plane-uri http://neon-compute-api.aws.neon.build:9096 + 178 /usr/local/bin/postgres -D /var/db/postgres/compute/pgdata + 182 postgres: checkpointer + 183 postgres: background writer + 185 postgres: walwriter + 186 postgres: autovacuum launcher + 187 postgres: pg_cron launcher + 188 postgres: TimescaleDB Background Worker Launcher + 189 postgres: WAL proposer streaming 0/1FD62B0 + 190 postgres: Local free space monitor + 191 postgres: logical replication launcher + 201 postgres: cloud_admin postgres 127.0.0.1(33860) idle + 204 postgres: cloud_admin postgres ::1(53686) idle +... +``` + +## TLA+ Model Of Shutdown + +The `./shutdown/shutdown.tla` model is a PlusCal specification of the shutdown procedure. + +TLC model checker configuration: + +* Check for deadlocks, there shouldn't be any. +* Check temporal properties `TEMPORAL PROPERTIES` at the bottom of the spec. diff --git a/doc/vm-builder/shutdown/shutdown.tla b/doc/vm-builder/shutdown/shutdown.tla new file mode 100644 index 000000000..9d8666481 --- /dev/null +++ b/doc/vm-builder/shutdown/shutdown.tla @@ -0,0 +1,515 @@ +----------------------------- MODULE vmshutdown ----------------------------- + +EXTENDS Sequences, Integers, TLC + +CONSTANT NULL + +(*--algorithm vmshutdown + +variables + start_allowed = TRUE, \* vmstart.allowed + start_allowed_locked = FALSE, \* vmstart.lock + + \* ACPI & unix signal delivery, modeled through variables that are polled/await'ed + shutdown_signal_received = FALSE, + postgres_running = NULL, + postgres_spawn_pending = NULL, + postgres_shutdown_request_pending = NULL, + postgres_next_pids = <<1,2>>, \* bound number of crashes + postgres_exited_pids = {}, + + machine_running = TRUE, + vmshutdown_exited = FALSE, + + \* for temporal invariants + vmstarter_sh_running = FALSE + + +fair process init = "init" +begin + init: + while ~shutdown_signal_received do + either + \* disable respawn loop & run vmshutdown + shutdown_signal_received := TRUE; + or + skip; + end either; + end while; + wait_for_vmshutdown: + await vmshutdown_exited; + poweroff_to_kernel: + machine_running := FALSE; +end process; + +fair process respawn_vmstart = "respawn_vmstart" +variables + respawn_current_postgres_pid = NULL +begin + init: + while ~shutdown_signal_received do + + respawn_flock_enter: + await start_allowed_locked = FALSE; + start_allowed_locked := TRUE; + respawn_check_start_allowed: + if start_allowed then + respawn_launch_vmstarter_sh: + vmstarter_sh_running := TRUE; + respawn_vmstarter_launch_postgres: + postgres_spawn_pending := Head(postgres_next_pids); + respawn_current_postgres_pid := postgres_spawn_pending; + postgres_next_pids := Tail(postgres_next_pids); + respawn_vmstarter_wait_postgres: + await respawn_current_postgres_pid \in postgres_exited_pids; + respawn_vmstarter_sh_exits: + vmstarter_sh_running := FALSE; + else + respawn_not_allowed: + skip; + end if; + respawn_flock_exit: + start_allowed_locked := FALSE; + end while; + +end process; + +fair process postgres = "postgres" +begin + init: + while machine_running do + postgres_wait_to_be_launched: + await ~machine_running \/ postgres_spawn_pending /= NULL; + if ~machine_running then + goto halt; + else + postgres_running := postgres_spawn_pending; + postgres_spawn_pending := NULL; + end if; + + postgres_await_shutdown_or_crash: + + \* bound number of crashes to pids left, otherwise we have infinite state space "until" shutdown signal gets delivered + if Len(postgres_next_pids) > 0 then + either + await postgres_shutdown_request_pending = postgres_running; + or + \* crash / exit on its own + skip; + end either; + else + await postgres_shutdown_request_pending = postgres_running; + end if; + postgres_exited_pids := postgres_exited_pids \union {postgres_running}; + postgres_running := NULL; + end while; + halt: + skip; +end process; + +fair process vmshutdown = "vmshutdown" +begin + init: + await shutdown_signal_received; + + vmshutdown_inhibit_new_starts: + start_allowed := FALSE; \* rm the vmstart.allowed file on disk + vmshutdown_kill_running_command: + \* if there was a command running from before vmshutdown_inhibit_new_starts, + \* it is holding the lock. + if start_allowed_locked = TRUE then \* use trylock to implement this + vmshutdown_pg_ctl_stop: + \* the `if` models signal loss + if postgres_running /= NULL then + postgres_shutdown_request_pending := postgres_running; + end if; + goto vmshutdown_kill_running_command; + end if; + vmshutdown_done: + vmshutdown_exited := TRUE; + skip; +end process; + + +end algorithm; *) +\* BEGIN TRANSLATION (chksum(pcal) = "d013f716" /\ chksum(tla) = "e8963d9a") +\* Label init of process init at line 31 col 5 changed to init_ +\* Label init of process respawn_vmstart at line 50 col 5 changed to init_r +\* Label init of process postgres at line 80 col 5 changed to init_p +\* Label init of process vmshutdown at line 113 col 9 changed to init_v +VARIABLES start_allowed, start_allowed_locked, shutdown_signal_received, + postgres_running, postgres_spawn_pending, + postgres_shutdown_request_pending, postgres_next_pids, + postgres_exited_pids, machine_running, vmshutdown_exited, + vmstarter_sh_running, pc, respawn_current_postgres_pid + +vars == << start_allowed, start_allowed_locked, shutdown_signal_received, + postgres_running, postgres_spawn_pending, + postgres_shutdown_request_pending, postgres_next_pids, + postgres_exited_pids, machine_running, vmshutdown_exited, + vmstarter_sh_running, pc, respawn_current_postgres_pid >> + +ProcSet == {"init"} \cup {"respawn_vmstart"} \cup {"postgres"} \cup {"vmshutdown"} + +Init == (* Global variables *) + /\ start_allowed = TRUE + /\ start_allowed_locked = FALSE + /\ shutdown_signal_received = FALSE + /\ postgres_running = NULL + /\ postgres_spawn_pending = NULL + /\ postgres_shutdown_request_pending = NULL + /\ postgres_next_pids = <<1,2>> + /\ postgres_exited_pids = {} + /\ machine_running = TRUE + /\ vmshutdown_exited = FALSE + /\ vmstarter_sh_running = FALSE + (* Process respawn_vmstart *) + /\ respawn_current_postgres_pid = NULL + /\ pc = [self \in ProcSet |-> CASE self = "init" -> "init_" + [] self = "respawn_vmstart" -> "init_r" + [] self = "postgres" -> "init_p" + [] self = "vmshutdown" -> "init_v"] + +init_ == /\ pc["init"] = "init_" + /\ IF ~shutdown_signal_received + THEN /\ \/ /\ shutdown_signal_received' = TRUE + \/ /\ TRUE + /\ UNCHANGED shutdown_signal_received + /\ pc' = [pc EXCEPT !["init"] = "init_"] + ELSE /\ pc' = [pc EXCEPT !["init"] = "wait_for_vmshutdown"] + /\ UNCHANGED shutdown_signal_received + /\ UNCHANGED << start_allowed, start_allowed_locked, postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, postgres_next_pids, + postgres_exited_pids, machine_running, + vmshutdown_exited, vmstarter_sh_running, + respawn_current_postgres_pid >> + +wait_for_vmshutdown == /\ pc["init"] = "wait_for_vmshutdown" + /\ vmshutdown_exited + /\ pc' = [pc EXCEPT !["init"] = "poweroff_to_kernel"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, machine_running, + vmshutdown_exited, vmstarter_sh_running, + respawn_current_postgres_pid >> + +poweroff_to_kernel == /\ pc["init"] = "poweroff_to_kernel" + /\ machine_running' = FALSE + /\ pc' = [pc EXCEPT !["init"] = "Done"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, + postgres_running, postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, postgres_exited_pids, + vmshutdown_exited, vmstarter_sh_running, + respawn_current_postgres_pid >> + +init == init_ \/ wait_for_vmshutdown \/ poweroff_to_kernel + +init_r == /\ pc["respawn_vmstart"] = "init_r" + /\ IF ~shutdown_signal_received + THEN /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_flock_enter"] + ELSE /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "Done"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, postgres_exited_pids, + machine_running, vmshutdown_exited, + vmstarter_sh_running, respawn_current_postgres_pid >> + +respawn_flock_enter == /\ pc["respawn_vmstart"] = "respawn_flock_enter" + /\ start_allowed_locked = FALSE + /\ start_allowed_locked' = TRUE + /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_check_start_allowed"] + /\ UNCHANGED << start_allowed, shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, machine_running, + vmshutdown_exited, vmstarter_sh_running, + respawn_current_postgres_pid >> + +respawn_check_start_allowed == /\ pc["respawn_vmstart"] = "respawn_check_start_allowed" + /\ IF start_allowed + THEN /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_launch_vmstarter_sh"] + ELSE /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_not_allowed"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +respawn_launch_vmstarter_sh == /\ pc["respawn_vmstart"] = "respawn_launch_vmstarter_sh" + /\ vmstarter_sh_running' = TRUE + /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_vmstarter_launch_postgres"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + respawn_current_postgres_pid >> + +respawn_vmstarter_launch_postgres == /\ pc["respawn_vmstart"] = "respawn_vmstarter_launch_postgres" + /\ postgres_spawn_pending' = Head(postgres_next_pids) + /\ respawn_current_postgres_pid' = postgres_spawn_pending' + /\ postgres_next_pids' = Tail(postgres_next_pids) + /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_vmstarter_wait_postgres"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_shutdown_request_pending, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + vmstarter_sh_running >> + +respawn_vmstarter_wait_postgres == /\ pc["respawn_vmstart"] = "respawn_vmstarter_wait_postgres" + /\ respawn_current_postgres_pid \in postgres_exited_pids + /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_vmstarter_sh_exits"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +respawn_vmstarter_sh_exits == /\ pc["respawn_vmstart"] = "respawn_vmstarter_sh_exits" + /\ vmstarter_sh_running' = FALSE + /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_flock_exit"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + respawn_current_postgres_pid >> + +respawn_not_allowed == /\ pc["respawn_vmstart"] = "respawn_not_allowed" + /\ TRUE + /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "respawn_flock_exit"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, machine_running, + vmshutdown_exited, vmstarter_sh_running, + respawn_current_postgres_pid >> + +respawn_flock_exit == /\ pc["respawn_vmstart"] = "respawn_flock_exit" + /\ start_allowed_locked' = FALSE + /\ pc' = [pc EXCEPT !["respawn_vmstart"] = "init_r"] + /\ UNCHANGED << start_allowed, shutdown_signal_received, + postgres_running, postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, postgres_exited_pids, + machine_running, vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +respawn_vmstart == init_r \/ respawn_flock_enter + \/ respawn_check_start_allowed + \/ respawn_launch_vmstarter_sh + \/ respawn_vmstarter_launch_postgres + \/ respawn_vmstarter_wait_postgres + \/ respawn_vmstarter_sh_exits \/ respawn_not_allowed + \/ respawn_flock_exit + +init_p == /\ pc["postgres"] = "init_p" + /\ IF machine_running + THEN /\ pc' = [pc EXCEPT !["postgres"] = "postgres_wait_to_be_launched"] + ELSE /\ pc' = [pc EXCEPT !["postgres"] = "halt"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, postgres_exited_pids, + machine_running, vmshutdown_exited, + vmstarter_sh_running, respawn_current_postgres_pid >> + +postgres_wait_to_be_launched == /\ pc["postgres"] = "postgres_wait_to_be_launched" + /\ ~machine_running \/ postgres_spawn_pending /= NULL + /\ IF ~machine_running + THEN /\ pc' = [pc EXCEPT !["postgres"] = "halt"] + /\ UNCHANGED << postgres_running, + postgres_spawn_pending >> + ELSE /\ postgres_running' = postgres_spawn_pending + /\ postgres_spawn_pending' = NULL + /\ pc' = [pc EXCEPT !["postgres"] = "postgres_await_shutdown_or_crash"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +postgres_await_shutdown_or_crash == /\ pc["postgres"] = "postgres_await_shutdown_or_crash" + /\ IF Len(postgres_next_pids) > 0 + THEN /\ \/ /\ postgres_shutdown_request_pending = postgres_running + \/ /\ TRUE + ELSE /\ postgres_shutdown_request_pending = postgres_running + /\ postgres_exited_pids' = (postgres_exited_pids \union {postgres_running}) + /\ postgres_running' = NULL + /\ pc' = [pc EXCEPT !["postgres"] = "init_p"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + machine_running, + vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +halt == /\ pc["postgres"] = "halt" + /\ TRUE + /\ pc' = [pc EXCEPT !["postgres"] = "Done"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, postgres_next_pids, + postgres_exited_pids, machine_running, + vmshutdown_exited, vmstarter_sh_running, + respawn_current_postgres_pid >> + +postgres == init_p \/ postgres_wait_to_be_launched + \/ postgres_await_shutdown_or_crash \/ halt + +init_v == /\ pc["vmshutdown"] = "init_v" + /\ shutdown_signal_received + /\ pc' = [pc EXCEPT !["vmshutdown"] = "vmshutdown_inhibit_new_starts"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, postgres_exited_pids, + machine_running, vmshutdown_exited, + vmstarter_sh_running, respawn_current_postgres_pid >> + +vmshutdown_inhibit_new_starts == /\ pc["vmshutdown"] = "vmshutdown_inhibit_new_starts" + /\ start_allowed' = FALSE + /\ pc' = [pc EXCEPT !["vmshutdown"] = "vmshutdown_kill_running_command"] + /\ UNCHANGED << start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +vmshutdown_kill_running_command == /\ pc["vmshutdown"] = "vmshutdown_kill_running_command" + /\ IF start_allowed_locked = TRUE + THEN /\ pc' = [pc EXCEPT !["vmshutdown"] = "vmshutdown_pg_ctl_stop"] + ELSE /\ pc' = [pc EXCEPT !["vmshutdown"] = "vmshutdown_done"] + /\ UNCHANGED << start_allowed, + start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, + vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +vmshutdown_pg_ctl_stop == /\ pc["vmshutdown"] = "vmshutdown_pg_ctl_stop" + /\ IF postgres_running /= NULL + THEN /\ postgres_shutdown_request_pending' = postgres_running + ELSE /\ TRUE + /\ UNCHANGED postgres_shutdown_request_pending + /\ pc' = [pc EXCEPT !["vmshutdown"] = "vmshutdown_kill_running_command"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, + postgres_running, + postgres_spawn_pending, + postgres_next_pids, + postgres_exited_pids, + machine_running, vmshutdown_exited, + vmstarter_sh_running, + respawn_current_postgres_pid >> + +vmshutdown_done == /\ pc["vmshutdown"] = "vmshutdown_done" + /\ vmshutdown_exited' = TRUE + /\ TRUE + /\ pc' = [pc EXCEPT !["vmshutdown"] = "Done"] + /\ UNCHANGED << start_allowed, start_allowed_locked, + shutdown_signal_received, postgres_running, + postgres_spawn_pending, + postgres_shutdown_request_pending, + postgres_next_pids, postgres_exited_pids, + machine_running, vmstarter_sh_running, + respawn_current_postgres_pid >> + +vmshutdown == init_v \/ vmshutdown_inhibit_new_starts + \/ vmshutdown_kill_running_command + \/ vmshutdown_pg_ctl_stop \/ vmshutdown_done + +(* Allow infinite stuttering to prevent deadlock on termination. *) +Terminating == /\ \A self \in ProcSet: pc[self] = "Done" + /\ UNCHANGED vars + +Next == init \/ respawn_vmstart \/ postgres \/ vmshutdown + \/ Terminating + +Spec == /\ Init /\ [][Next]_vars + /\ WF_vars(init) + /\ WF_vars(respawn_vmstart) + /\ WF_vars(postgres) + /\ WF_vars(vmshutdown) + +Termination == <>(\A self \in ProcSet: pc[self] = "Done") + +\* END TRANSLATION + +\* TEMPORAL PROPERTIES: +\* If we signal ACPI shutdown, vmstart eventually stops running and never restarts +ShutdownSignalWorks == (shutdown_signal_received ~> ([](~vmstarter_sh_running))) +\* Before we signal ACPI shutdown, respawn works +RespawnBeforeShutdownCanRestartWithoutPendingShutdown == TRUE \* TODO: how to express this? + +============================================================================= +\* Modification History +\* Last modified Mon Sep 25 11:19:20 CEST 2023 by cs +\* Created Sun Sep 24 12:17:50 CEST 2023 by cs diff --git a/go.mod b/go.mod index 219dd64f7..b91fed8c1 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,14 @@ module github.com/neondatabase/autoscaling go 1.20 +// Replace directives from github.com/cilium/cilium. Keep in sync when updating Cilium! +replace ( + github.com/miekg/dns => github.com/cilium/dns v1.1.51-0.20220729113855-5b94b11b46fc + github.com/optiopay/kafka => github.com/cilium/kafka v0.0.0-20180809090225-01ce283b732b + go.universe.tf/metallb => github.com/cilium/metallb v0.1.1-0.20220829170633-5d7dfb1129f7 + sigs.k8s.io/controller-tools => github.com/cilium/controller-tools v0.6.2 +) + replace ( k8s.io/api => k8s.io/api v0.25.11 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.25.11 @@ -41,12 +49,11 @@ require ( github.com/digitalocean/go-qemu v0.0.0-20220826173844-d5f5e3ceed89 github.com/docker/docker v20.10.24+incompatible github.com/docker/libnetwork v0.8.0-dev.2.0.20210525090646-64b7a4574d14 - github.com/elastic/go-sysinfo v1.9.0 github.com/google/uuid v1.3.0 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 github.com/k8snetworkplumbingwg/whereabouts v0.6.1 github.com/kdomanski/iso9660 v0.3.3 - github.com/lib/pq v1.10.7 + github.com/lithammer/shortuuid v3.0.0+incompatible github.com/onsi/ginkgo/v2 v2.6.1 github.com/onsi/gomega v1.24.2 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 @@ -61,12 +68,13 @@ require ( k8s.io/client-go v0.25.11 k8s.io/klog/v2 v2.80.1 k8s.io/kubernetes v1.25.11 + nhooyr.io/websocket v1.8.7 sigs.k8s.io/controller-runtime v0.13.1 sigs.k8s.io/controller-tools v0.10.0 ) require ( - cloud.google.com/go/compute v1.14.0 // indirect + cloud.google.com/go/compute v1.15.1 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect github.com/Azure/go-autorest v14.2.0+incompatible // indirect @@ -90,7 +98,6 @@ require ( github.com/docker/distribution v2.8.2+incompatible // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.5.0 // indirect - github.com/elastic/go-windows v1.0.0 // indirect github.com/emicklei/go-restful/v3 v3.8.0 // indirect github.com/evanphx/json-patch v5.6.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.6.0 // indirect @@ -116,9 +123,9 @@ require ( github.com/imdario/mergo v0.3.12 // indirect github.com/inconshreveable/mousetrap v1.0.1 // indirect github.com/ishidawataru/sctp v0.0.0-20210707070123-9a39160e9062 // indirect - github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.10.3 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.11 // indirect github.com/mattn/go-isatty v0.0.14 // indirect @@ -159,7 +166,7 @@ require ( golang.org/x/crypto v0.5.0 // indirect golang.org/x/mod v0.8.0 // indirect golang.org/x/net v0.8.0 // indirect - golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 // indirect + golang.org/x/oauth2 v0.4.0 // indirect golang.org/x/sync v0.1.0 // indirect golang.org/x/sys v0.6.0 // indirect golang.org/x/term v0.6.0 // indirect @@ -168,15 +175,14 @@ require ( golang.org/x/tools v0.6.0 // indirect gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230106154932-a12b697841d9 // indirect - google.golang.org/grpc v1.51.0 // indirect + google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect + google.golang.org/grpc v1.53.0 // indirect google.golang.org/protobuf v1.28.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - howett.net/plist v0.0.0-20181124034731-591f970eefbb // indirect - k8s.io/apiextensions-apiserver v0.26.1 // indirect + k8s.io/apiextensions-apiserver v0.25.11 // indirect k8s.io/cloud-provider v0.0.0 // indirect k8s.io/component-base v0.25.11 // indirect k8s.io/component-helpers v0.25.11 // indirect diff --git a/go.sum b/go.sum index 40a08697b..67ba25037 100644 --- a/go.sum +++ b/go.sum @@ -13,18 +13,31 @@ cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKV cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= +cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= +cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= +cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= +cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= +cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= +cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY= +cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= +cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= +cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= +cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= +cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= +cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute v1.14.0 h1:hfm2+FfxVmnRlh6LpB7cg1ZNU+5edAHmW679JePztk0= -cloud.google.com/go/compute v1.14.0/go.mod h1:YfLtxrj9sU4Yxv+sXzZkyPjEyPBZfXHUvjxega5vAdo= +cloud.google.com/go/compute v1.15.1 h1:7UGq3QknM33pw5xATlpzeoomNxsacIVvTqTTvbfajmE= +cloud.google.com/go/compute v1.15.1/go.mod h1:bjjoF/NtFUrkD/urWfdHaKuOPDR5nWIs63rR+SXhcpA= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= +cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -59,9 +72,12 @@ github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbi github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg= github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE= +github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -70,6 +86,12 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0= github.com/alessio/shellescape v1.4.1/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220418222510-f25a4f6275ed/go.mod h1:F7bn7fEU90QkQ3tnmaTx3LTKLEDqnwWODIYppRQ5hnY= +github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= +github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= @@ -78,10 +100,15 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= +github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= +github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -92,30 +119,51 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5P github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/cilium/cilium v1.11.16 h1:7e4TAkMZeDJHbEBOoB1sBbp+fhGe0sdJYxiMIW4LIZ4= github.com/cilium/cilium v1.11.16/go.mod h1:79NKzx+ZKD6gcr3Y6RqdsoP4vzUj0CLFJUdQrFBClYw= +github.com/cilium/controller-tools v0.6.2 h1:oIkqAzqncKsm+lQFJVP6n+bqHOVs9nUZ06hgZ4PxlMM= +github.com/cilium/controller-tools v0.6.2/go.mod h1:oaeGpjXn6+ZSEIQkUe/+3I40PNiDYp9aeawbt3xTgJ8= +github.com/cilium/dns v1.1.51-0.20220729113855-5b94b11b46fc/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME= github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4= github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= +github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= +github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= github.com/containerd/cgroups/v3 v3.0.1 h1:4hfGvu8rfGIwVIDd+nLzn/B9ZXx4BcCjzt5ToenJRaE= github.com/containerd/cgroups/v3 v3.0.1/go.mod h1:/vtwk1VXrtoa5AaZLkypuOJgA/6DyPMZHJPGQNtlHnw= github.com/containernetworking/cni v1.0.1 h1:9OIL/sZmMYDBe+G8svzILAlulUpaDTUjeAbtH/JNLBo= github.com/containernetworking/cni v1.0.1/go.mod h1:AKuhXbN5EzmD4yTNtfSsX3tPcmtrBI6QcRV0NiNt15Y= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-iptables v0.6.0 h1:is9qnZMPYjLd8LYqmm/qlE+wwEgJIkTYdhV3rfZo4jk= github.com/coreos/go-iptables v0.6.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q= +github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e h1:SCnqm8SjSa0QqRxXbo5YY//S+OryeJioe17nK+iDZpg= github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e/go.mod h1:o129ljs6alsIQTc8d6eweihqpmmrbxZ2g1jhgjhPykI= github.com/digitalocean/go-qemu v0.0.0-20220826173844-d5f5e3ceed89 h1:2/52ma1zkjfR9aIrAX1F9H24rpj+PkCDkAwhQgqVR/A= @@ -133,38 +181,47 @@ github.com/docker/libnetwork v0.8.0-dev.2.0.20210525090646-64b7a4574d14/go.mod h github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/elastic/go-sysinfo v1.9.0 h1:usICqY/Nw4Mpn9f4LdtpFrKxXroJDe81GaxxUlCckIo= -github.com/elastic/go-sysinfo v1.9.0/go.mod h1:eBD1wEGVaRnRLGecc9iG1z8eOv5HnEdz9+nWd8UAxcE= -github.com/elastic/go-windows v1.0.0 h1:qLURgZFkkrYyTTkvYpsZIgf83AUsdIHfvlJaqaZ7aSY= -github.com/elastic/go-windows v1.0.0/go.mod h1:TsU0Nrp7/y3+VwE82FoZF8gC/XFg/Elz6CcloAxnPgU= +github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/emicklei/go-restful/v3 v3.8.0 h1:eCZ8ulSerjdAiaNpF7GxXIE7ZCMo1moN1qX+S609eVw= github.com/emicklei/go-restful/v3 v3.8.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= +github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJCLunww= github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/color v1.12.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/flowstack/go-jsonschema v0.1.1/go.mod h1:yL7fNggx1o8rm9RlgXv7hTBWxdBM0rVwpMwimd3F3N0= github.com/form3tech-oss/jwt-go v3.2.3+incompatible h1:7ZaBxOI7TMoYBfyA3cQHErNNyAWIKUMIwqxEtgHOs5c= +github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/getkin/kin-openapi v0.76.0/go.mod h1:660oXbgy5JFMKreazJaQTw7o+X00qeSyhcnluiMv+Xg= +github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.6.3 h1:ahKqKTFpO5KTPHxWZjEdPScmYaGtLo8Y4DMHoEsnp14= +github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -176,6 +233,8 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= @@ -185,24 +244,45 @@ github.com/go-logr/zapr v1.2.3/go.mod h1:eIauM6P8qSvTw5o2ez6UEAfGjQKrxQTl5EoK+Qa github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY= github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= +github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg= github.com/go-openapi/jsonreference v0.20.0 h1:MYlu0sBgChmCfJxxUKZ8g1cPWFOB37YSZqewK7OKeyA= github.com/go-openapi/jsonreference v0.20.0/go.mod h1:Ag74Ico3lPc+zR+qjn4XBUmXymS4zJbYVCZmcgkasdo= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/swag v0.21.1 h1:wm0rhTb5z7qpJRHBdPOMuY4QjVUMbF6/kwoYeRAOrKU= github.com/go-openapi/swag v0.21.1/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= +github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= +github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= +github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= +github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= +github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1Vv0sFl1UcHBOY= +github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= +github.com/gobuffalo/flect v0.2.3/go.mod h1:vmkQwuZYhN5Pc4ljYQZzP+1sq+NEkK+lh20jmEmX3jc= github.com/gobuffalo/flect v0.3.0 h1:erfPWM+K1rFNIQeRPdeEXxo8yFr/PO17lhRnS8FUrtk= github.com/gobuffalo/flect v0.3.0/go.mod h1:5pf3aGnsvqvCj50AVni7mJJF8ICxGZ8HomberC3pXLE= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= +github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= +github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= +github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= github.com/golang-jwt/jwt/v4 v4.2.0 h1:besgBTC8w8HjP6NzQdxwKH9Z5oQMZ24ThTrHp3cZ8eU= github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -215,6 +295,8 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= +github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= +github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -230,11 +312,16 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= +github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= +github.com/google/cel-go v0.12.6/go.mod h1:Jk7ljRzLBhkmiAwBoUxB1sZSCVBAzkqPF25olK/iRDw= +github.com/google/gnostic v0.5.7-v3refs/go.mod h1:73MKFl6jIHelAJNaBGFzt3SPtZULs9dYrGFt8OiIsHQ= github.com/google/gnostic v0.6.9 h1:ZK/5VhkoX835RikCHpSUJV9a+S3e1zLh59YnyWeBW+0= github.com/google/gnostic v0.6.9/go.mod h1:Nm8234We1lq6iB9OmlgNv3nH91XLLVZHCDayfA3xq+E= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -244,15 +331,22 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= @@ -260,42 +354,82 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= +github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= +github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= +github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= +github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= +github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= +github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc= github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/ishidawataru/sctp v0.0.0-20210707070123-9a39160e9062 h1:G1+wBT0dwjIrBdLy0MIG0i+E4CQxEnedHXdauJEIH6g= github.com/ishidawataru/sctp v0.0.0-20210707070123-9a39160e9062/go.mod h1:co9pwDoBCm1kGxawmb4sPq0cSIOOWNPT4KnHotMP1Zg= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= -github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 h1:rp+c0RAYOWj8l6qbCUTSiRLG/iKnW3K3/QfPPuSsBt4= -github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901/go.mod h1:Z86h9688Y0wesXCyonoVr47MasHilkuLMqGhRZ4Hpak= +github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= +github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 h1:VzM3TYHDgqPkettiP6I6q2jOeQFL4nrJM+UcAc4f6Fs= @@ -304,10 +438,15 @@ github.com/k8snetworkplumbingwg/whereabouts v0.6.1 h1:3pfShDMF9+/7ijzKUPezoBqN2I github.com/k8snetworkplumbingwg/whereabouts v0.6.1/go.mod h1:FbmUjZg27cI6om0IAc+NV5Ur+IKwHyqdLaeR0SGfWJc= github.com/kdomanski/iso9660 v0.3.3 h1:cNwM9L2L1Hzc5hZWGy6fPJ92UyWDccaY69DmEPlfDNY= github.com/kdomanski/iso9660 v0.3.3/go.mod h1:K+UlIGxKgtrdAWyoigPnFbeQLVs/Xudz4iztWFThBwo= +github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.10.3 h1:OP96hzwJVBIHYU52pVTI6CczrxPvrGfgqF9N5eTO0Q8= +github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -316,24 +455,42 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw= -github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= +github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= +github.com/lithammer/shortuuid v3.0.0+incompatible h1:NcD0xWW/MZYXEHa6ITy6kaXN5nwm/V115vj2YXfhS0w= +github.com/lithammer/shortuuid v3.0.0+incompatible/go.mod h1:FR74pbAuElzOUuenUHTK2Tciko1/vKuIKS9dSkDrA4w= +github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.11 h1:nQ+aFkoE2TMGc0b68U2OKSexC+eq46+XwZzWXHRmPYs= github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= +github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= +github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78= github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6/go.mod h1:E2VnQOmVuvZB6UYnnDB0qG5Nq/1tD9acaOpo6xmt0Kw= github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -344,21 +501,33 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= +github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.13.0/go.mod h1:+REjRxOmWfHCjfv9TTWB1jD1Frx4XydAD3zm1lskyM0= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= +github.com/onsi/ginkgo/v2 v2.1.4/go.mod h1:um6tUpWM/cxCK3/FK8BXqEiUMUwRgSM4JXG47RKZmLU= +github.com/onsi/ginkgo/v2 v2.1.6/go.mod h1:MEH45j8TBi6u9BMogfbp0stKC5cdGjumZj5Y7AG4VIk= github.com/onsi/ginkgo/v2 v2.6.1 h1:1xQPCjcqYw/J5LchOcp4/2q/jzJFjiAOc25chhnDw+Q= github.com/onsi/ginkgo/v2 v2.6.1/go.mod h1:yjiuMwPokqY1XauOgju45q3sJt6VzQ/Fict1LFVcsAo= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.14.0/go.mod h1:cIuvLEne0aoVhAgh/O6ac0Op8WWw9H6eYCriF+tEHG0= +github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= +github.com/onsi/gomega v1.20.1/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= github.com/onsi/gomega v1.24.2 h1:J/tulyYK6JwBldPViHJReihxxZ+22FHs0piGjQAvoUE= github.com/onsi/gomega v1.24.2/go.mod h1:gs3J10IS7Z7r7eXRoNJIrNqU4ToQukCJhFtKrWgHWnk= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -369,13 +538,22 @@ github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU= github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= +github.com/pquerna/cachecontrol v0.1.0/go.mod h1:NrUG3Z7Rdu85UNR3vm7SOsl1nFIeSiQnrHV5K9mBcUI= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= @@ -389,6 +567,8 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= @@ -396,28 +576,54 @@ github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+ github.com/prometheus/common v0.37.0 h1:ccBbHCgIiT9uSoFY0vX8H3zsNR5eLt17/RQLUvn8pXE= github.com/prometheus/common v0.37.0/go.mod h1:phzohg0JFMnBEFGxTDbfu3QyL5GI8gTQJFhYO5B3mfA= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo= github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= +github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= +github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk= +github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= +github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns= github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -433,9 +639,16 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA= +github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tychoish/fun v0.8.5 h1:8uTFk2fG8mxDyRmqMj6llKE8+vTuQRclUkl0/tyYwAU= github.com/tychoish/fun v0.8.5/go.mod h1:84A+BwGecz23UotmbB4mtvVS5ZcsZpspecduxpwF/XM= +github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/vishvananda/netlink v1.1.1-0.20220125195016-0639e7e787ba h1:MU5oPE25XZhDS8Z0xFG0/1ERBEu5rZIw62TImubLusU= github.com/vishvananda/netlink v1.1.1-0.20220125195016-0639e7e787ba/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= @@ -445,27 +658,44 @@ github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2 github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= +go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= +go.etcd.io/etcd/api/v3 v3.5.4/go.mod h1:5GB2vv4A4AOn3yk7MftYGHkUfGtDHnEraIjym4dYz5A= go.etcd.io/etcd/api/v3 v3.5.6 h1:Cy2qx3npLcYqTKqGJzMypnMv2tiRyifZJ17BlWIWA7A= go.etcd.io/etcd/api/v3 v3.5.6/go.mod h1:KFtNaxGDw4Yx/BA4iPPwevUTAuqcsPxzyX8PHydchN8= +go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= +go.etcd.io/etcd/client/pkg/v3 v3.5.4/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= go.etcd.io/etcd/client/pkg/v3 v3.5.6 h1:TXQWYceBKqLp4sa87rcPs11SXxUA/mHwH975v+BDvLU= go.etcd.io/etcd/client/pkg/v3 v3.5.6/go.mod h1:ggrwbk069qxpKPq8/FKkQ3Xq9y39kbFR4LnKszpRXeQ= +go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= go.etcd.io/etcd/client/v2 v2.305.4 h1:Dcx3/MYyfKcPNLpR4VVQUP5KgYrBeJtktBwEKkw08Ao= +go.etcd.io/etcd/client/v2 v2.305.4/go.mod h1:Ud+VUwIi9/uQHOMA+4ekToJ12lTxlv0zB/+DHwTGEbU= +go.etcd.io/etcd/client/v3 v3.5.4/go.mod h1:ZaRkVgBZC+L+dLCjTcF1hRXpgZXQPOvnA/Ak/gq3kiY= go.etcd.io/etcd/client/v3 v3.5.6 h1:coLs69PWCXE9G4FKquzNaSHrRyMCAXwF+IX1tAPVO8E= go.etcd.io/etcd/client/v3 v3.5.6/go.mod h1:f6GRinRMCsFVv9Ht42EyY7nfsVGwrNO0WEoS2pRKzQk= go.etcd.io/etcd/pkg/v3 v3.5.4 h1:V5Dvl7S39ZDwjkKqJG2BfXgxZ3QREqqKifWQgIw5IM0= +go.etcd.io/etcd/pkg/v3 v3.5.4/go.mod h1:OI+TtO+Aa3nhQSppMbwE4ld3uF1/fqqwbpfndbbrEe0= go.etcd.io/etcd/raft/v3 v3.5.4 h1:YGrnAgRfgXloBNuqa+oBI/aRZMcK/1GS6trJePJ/Gqc= +go.etcd.io/etcd/raft/v3 v3.5.4/go.mod h1:SCuunjYvZFC0fBX0vxMSPjuZmpcSk+XaAcMrD6Do03w= go.etcd.io/etcd/server/v3 v3.5.4 h1:CMAZd0g8Bn5NRhynW6pKhc4FRg41/0QYy3d7aNm9874= +go.etcd.io/etcd/server/v3 v3.5.4/go.mod h1:S5/YTU15KxymM5l3T6b09sNOHPXqGYIZStpuuGbb65c= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= +go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.opentelemetry.io/contrib v0.20.0 h1:ubFQUn0VCZ0gPwIoJfBJVpeBlyRMxu8Mm/huKWYd9p0= go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0 h1:sO4WKdPAudZGKPcpZT4MJn6JaDmpyLrMPDGGyA1SttE= @@ -490,26 +720,34 @@ go.opentelemetry.io/otel/trace v0.20.0 h1:1DL6EXUdcg95gukhuRRvLDO/4X5THh/5dIV52l go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw= go.opentelemetry.io/proto/otlp v0.7.0 h1:rwOQPCuKAKmwGKq2aVNnYIibI6wnV7EvzgfTCzcdGg8= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20220131195533-30dcbda58838/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20220315160706-3147a52a75dd/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE= golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -536,6 +774,7 @@ golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= @@ -545,13 +784,20 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -563,6 +809,7 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -579,13 +826,27 @@ golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -593,10 +854,21 @@ golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4Iltr golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= -golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 h1:nt+Q6cXKz4MosCSpnbMtqiQ8Oz0pxTef2B4Vca2lvfk= -golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg= +golang.org/x/oauth2 v0.4.0 h1:NF0gk8LVPg1Ml7SSbGyySuoxdsXitj7TvgvuRxIMc/M= +golang.org/x/oauth2 v0.4.0/go.mod h1:RznEsdpjGAINPTOF0UH/t+xJ75L18YO3Ho6Pyn+uRec= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -608,11 +880,15 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -626,6 +902,7 @@ golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -650,27 +927,48 @@ golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0 h1:clScbb1cHjoCkyRbWwBEUZ5H/tIFu5TAXIqaZD0Gcjw= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -678,32 +976,41 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20220210224613-90d013bbcef8/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20220609170525-579cf78fd858 h1:Dpdu/EMxGMFgq0CeYMh4fazTD2vtlZRYE7wyynxJb9U= golang.org/x/time v0.0.0-20220609170525-579cf78fd858/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -723,6 +1030,7 @@ golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjs golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= @@ -730,9 +1038,22 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= +golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -757,6 +1078,19 @@ google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0M google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= +google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= +google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= +google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= +google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= +google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= +google.golang.org/api v0.44.0/go.mod h1:EBOGZqzyhtvMDoxwS97ctnh0zUmYY6CxqXsc1AvkYD8= +google.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo= +google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4= +google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw= +google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU= +google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k= +google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= +google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -786,6 +1120,7 @@ google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= @@ -795,10 +1130,36 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201019141844-1ed22bb0c154/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= +google.golang.org/genproto v0.0.0-20210513213006-bf773b8c8384/go.mod h1:P3QM42oQyzQSnHPnZ/vqoCdDmzH28fzWByN9asMeM8A= google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= +google.golang.org/genproto v0.0.0-20210604141403-392c879c8b08/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= +google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= +google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= +google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= +google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= +google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= +google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= +google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w= +google.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= +google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= +google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= +google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= +google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20220107163113-42d7afdf6368/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20230106154932-a12b697841d9 h1:3wPBShTLWQnEkZ9VW/HZZ8zT/9LLtleBtq7l8SKtJIA= -google.golang.org/genproto v0.0.0-20230106154932-a12b697841d9/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= +google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21/go.mod h1:RAyBrSAP7Fh3Nc84ghnVLDPuV51xc9agzmm4Ph6i0Q4= +google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f h1:BWUVssLB0HVOSY78gIdvk1dTVYtT1y8SBWtPYuTJ/6w= +google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -811,14 +1172,25 @@ google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKa google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= +google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= +google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k= -google.golang.org/grpc v1.51.0 h1:E1eGv1FTqoLIdnBCZufiSHgKjlqG6fKFf6pPWtMTh8U= -google.golang.org/grpc v1.51.0/go.mod h1:wgNDFcnuBGmxLKI/qn4T+m5BtEBYXJPvibbUPsAIPww= +google.golang.org/grpc v1.46.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= +google.golang.org/grpc v1.47.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= +google.golang.org/grpc v1.53.0 h1:LAv2ds7cmFV/XTS3XG1NneeENYrXGmorPxsBbptIjNc= +google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -832,6 +1204,7 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= @@ -844,10 +1217,15 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= +gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -862,7 +1240,9 @@ gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= +gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= @@ -870,8 +1250,6 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -howett.net/plist v0.0.0-20181124034731-591f970eefbb h1:jhnBjNi9UFpfpl8YZhA9CrOqpnJdvzuiHsl/dnxl11M= -howett.net/plist v0.0.0-20181124034731-591f970eefbb/go.mod h1:vMygbs4qMhSZSc4lCUl2OEE+rDiIIJAIdR4m7MiMcm0= k8s.io/api v0.25.11 h1:4mjYDfE3yp22jrytjH0knwgzjXKkxHX4D01ZCAazvZM= k8s.io/api v0.25.11/go.mod h1:bK4UvD4bthtutNlvensrfBX21PRQ/vs2cIYggHkOOAo= k8s.io/apiextensions-apiserver v0.25.11 h1:qZY0kCt0tW3QHPKcogp3k4zrlZhe9f8H6EJOr7sNRbA= @@ -884,12 +1262,18 @@ k8s.io/client-go v0.25.11 h1:DJQ141UsbNRI6wYSlcYLP5J5BW5Wq7Bgm42Ztq2SW70= k8s.io/client-go v0.25.11/go.mod h1:41Xs7p1SfhoReUnmjjYCfCNWFiq4xSkexwJfbxF2F7A= k8s.io/cloud-provider v0.25.11 h1:t/mMWKvO52IrznQ5dAziigNt+EzXuM9jWfisEmAaaYQ= k8s.io/cloud-provider v0.25.11/go.mod h1:9xL8k1YZsU6dCN3djftvum0y84rwYW+xorF+8LFs5Ho= +k8s.io/code-generator v0.25.11/go.mod h1:FA5a4rk4tMTCgmiDeNdRjml+AGvm72SwZYwD5lBrezY= k8s.io/component-base v0.25.11 h1:3QmISCE9n9CJkVpTA4spQO1IZCrLlOwbKdzSN9dqZZA= k8s.io/component-base v0.25.11/go.mod h1:wFR4pfB+xTc6FBak+RoWRNeTmelGE4XWJP/xVOvn3vM= k8s.io/component-helpers v0.25.11 h1:NO8FqIZd0LgEYiNhzFcwlEa6P8/8lX366r00niFB2XY= k8s.io/component-helpers v0.25.11/go.mod h1:TeIbtyuelY6lnG6F3Uu+/lzMp31TEg/YtyuYWBNTVHY= k8s.io/csi-translation-lib v0.25.11 h1:JgpoBenEAfCjpbfwjCPvL8bI/P9un+BQUV/uNxZnhP0= k8s.io/csi-translation-lib v0.25.11/go.mod h1:Ff2gRYDRoGkoIoosW3jcZ6Q1T0MO+iZEGO21RSVKWbs= +k8s.io/gengo v0.0.0-20210813121822-485abfe95c7c/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= +k8s.io/gengo v0.0.0-20211129171323-c02415ce4185/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= +k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= +k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= +k8s.io/klog/v2 v2.70.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/klog/v2 v2.80.1 h1:atnLQ121W371wYYFawwYx1aEY2eUfs4l3J72wtgAwV4= k8s.io/klog/v2 v2.80.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1 h1:MQ8BAZPZlWk3S9K4a9NCkIFQtZShWqoha7snGixVgEA= @@ -900,8 +1284,12 @@ k8s.io/kubernetes v1.25.11 h1:vl9UYkjHuWOyk1EAfnzVlakFCziEBMazLthW/YuHb8M= k8s.io/kubernetes v1.25.11/go.mod h1:uokqZvgUrcgwuapBSvrq9+y5TMXsvm68qgiRiidZs2A= k8s.io/mount-utils v0.25.11 h1:WFzlMxcML7xXDHuVDzqcJpl1xF4P6hwrnbHTruNBWno= k8s.io/mount-utils v0.25.11/go.mod h1:IM9QOFh15E1a4Nb6Rcn8FJ9Z1PbBpuyAPCty/qvKSAw= +k8s.io/utils v0.0.0-20210802155522-efc7438f0176/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +k8s.io/utils v0.0.0-20220728103510-ee6ede2d64ed/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= k8s.io/utils v0.0.0-20221107191617-1a15be271d1d h1:0Smp/HP1OH4Rvhe+4B8nWGERtlqAGSftbSbbmm45oFs= k8s.io/utils v0.0.0-20221107191617-1a15be271d1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= +nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= @@ -909,8 +1297,6 @@ sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.37 h1:fAPTNEpzQMOLM sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.37/go.mod h1:vfnxT4FXNT8eGvO+xi/DsyC/qHmdujqwrUa1WSspCsk= sigs.k8s.io/controller-runtime v0.13.1 h1:tUsRCSJVM1QQOOeViGeX3GMT3dQF1eePPw6sEE3xSlg= sigs.k8s.io/controller-runtime v0.13.1/go.mod h1:Zbz+el8Yg31jubvAEyglRZGdLAjplZl+PgtYNI6WNTI= -sigs.k8s.io/controller-tools v0.10.0 h1:0L5DTDTFB67jm9DkfrONgTGmfc/zYow0ZaHyppizU2U= -sigs.k8s.io/controller-tools v0.10.0/go.mod h1:uvr0EW6IsprfB0jpQq6evtKy+hHyHCXNfdWI5ONPx94= sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 h1:iXTIw73aPyC+oRdyqqvVJuloN1p0AC/kzH07hu3NE+k= sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/structured-merge-diff/v4 v4.2.3 h1:PRbqxJClWWYMNV1dhaG4NsibJbArud9kFxnAMREiWFE= diff --git a/neonvm/apis/neonvm/v1/virtualmachine_types.go b/neonvm/apis/neonvm/v1/virtualmachine_types.go index 6629b7a39..d5cf8b1b6 100644 --- a/neonvm/apis/neonvm/v1/virtualmachine_types.go +++ b/neonvm/apis/neonvm/v1/virtualmachine_types.go @@ -69,11 +69,12 @@ type VirtualMachineSpec struct { // +optional TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds"` - NodeSelector map[string]string `json:"nodeSelector,omitempty"` - Affinity *corev1.Affinity `json:"affinity,omitempty"` - Tolerations []corev1.Toleration `json:"tolerations,omitempty"` - SchedulerName string `json:"schedulerName,omitempty"` - PodResources corev1.ResourceRequirements `json:"podResources,omitempty"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + Affinity *corev1.Affinity `json:"affinity,omitempty"` + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + SchedulerName string `json:"schedulerName,omitempty"` + ServiceAccountName string `json:"serviceAccountName,omitempty"` + PodResources corev1.ResourceRequirements `json:"podResources,omitempty"` // +kubebuilder:default:=Always // +optional @@ -83,6 +84,8 @@ type VirtualMachineSpec struct { Guest Guest `json:"guest"` + ExtraInitContainers []corev1.Container `json:"extraInitContainers,omitempty"` + // List of disk that can be mounted by virtual machine. // +optional Disks []Disk `json:"disks,omitempty"` @@ -97,7 +100,7 @@ type VirtualMachineSpec struct { // Use KVM acceleation // +kubebuilder:default:=true // +optional - EnableAcceleration bool `json:"enableAcceleration"` + EnableAcceleration *bool `json:"enableAcceleration,omitempty"` } // +kubebuilder:validation:Enum=Always;OnFailure;Never @@ -308,6 +311,8 @@ type DiskSource struct { type EmptyDiskSource struct { Size resource.Quantity `json:"size"` + // Discard enables the "discard" mount option for the filesystem + Discard bool `json:"discard,omitempty"` } type TmpfsDiskSource struct { diff --git a/neonvm/apis/neonvm/v1/zz_generated.deepcopy.go b/neonvm/apis/neonvm/v1/zz_generated.deepcopy.go index 5131cb359..24ee8e058 100644 --- a/neonvm/apis/neonvm/v1/zz_generated.deepcopy.go +++ b/neonvm/apis/neonvm/v1/zz_generated.deepcopy.go @@ -625,6 +625,13 @@ func (in *VirtualMachineSpec) DeepCopyInto(out *VirtualMachineSpec) { copy(*out, *in) } in.Guest.DeepCopyInto(&out.Guest) + if in.ExtraInitContainers != nil { + in, out := &in.ExtraInitContainers, &out.ExtraInitContainers + *out = make([]corev1.Container, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Disks != nil { in, out := &in.Disks, &out.Disks *out = make([]Disk, len(*in)) @@ -642,6 +649,11 @@ func (in *VirtualMachineSpec) DeepCopyInto(out *VirtualMachineSpec) { *out = new(bool) **out = **in } + if in.EnableAcceleration != nil { + in, out := &in.EnableAcceleration, &out.EnableAcceleration + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachineSpec. diff --git a/neonvm/config/common/crd/bases/vm.neon.tech_virtualmachines.yaml b/neonvm/config/common/crd/bases/vm.neon.tech_virtualmachines.yaml index b34bf889b..f8dc1e5f9 100644 --- a/neonvm/config/common/crd/bases/vm.neon.tech_virtualmachines.yaml +++ b/neonvm/config/common/crd/bases/vm.neon.tech_virtualmachines.yaml @@ -963,6 +963,10 @@ spec: description: EmptyDisk represents a temporary empty qcow2 disk that shares a vm's lifetime. properties: + discard: + description: Discard enables the "discard" mount option + for the filesystem + type: boolean size: anyOf: - type: integer @@ -1070,6 +1074,1238 @@ spec: default: true description: Use KVM acceleation type: boolean + extraInitContainers: + items: + description: A single application container that you want to run + within a pod. + properties: + args: + description: 'Arguments to the entrypoint. The container image''s + CMD is used if this is not provided. Variable references $(VAR_NAME) + are expanded using the container''s environment. If a variable + cannot be resolved, the reference in the input string will + be unchanged. Double $$ are reduced to a single $, which allows + for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references + will never be expanded, regardless of whether the variable + exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + command: + description: 'Entrypoint array. Not executed within a shell. + The container image''s ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container''s + environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: + i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether + the variable exists or not. Cannot be updated. More info: + https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + env: + description: List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be + a C_IDENTIFIER. + type: string + value: + description: 'Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in + the container and any service environment variables. + If a variable cannot be resolved, the reference in the + input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) + syntax: i.e. "$$(VAR_NAME)" will produce the string + literal "$(VAR_NAME)". Escaped references will never + be expanded, regardless of whether the variable exists + or not. Defaults to "".' + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: + https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: 'Selects a field of the pod: supports + metadata.name, metadata.namespace, `metadata.labels['''']`, + `metadata.annotations['''']`, spec.nodeName, + spec.serviceAccountName, status.hostIP, status.podIP, + status.podIPs.' + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: 'Selects a resource of the container: + only resources limits and requests (limits.cpu, + limits.memory, limits.ephemeral-storage, requests.cpu, + requests.memory and requests.ephemeral-storage) + are currently supported.' + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the + exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: + https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + envFrom: + description: List of sources to populate environment variables + in the container. The keys defined within a source must be + a C_IDENTIFIER. All invalid keys will be reported as an event + when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take + precedence. Values defined by an Env with a duplicate key + will take precedence. Cannot be updated. + items: + description: EnvFromSource represents the source of a set + of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the ConfigMap must be + defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend to each + key in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the Secret must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + image: + description: 'Container image name. More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management + to default or override container images in workload controllers + like Deployments and StatefulSets.' + type: string + imagePullPolicy: + description: 'Image pull policy. One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent + otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' + type: string + lifecycle: + description: Actions that the management system should take + in response to container lifecycle events. Cannot be updated. + properties: + postStart: + description: 'PostStart is called immediately after a container + is created. If the handler fails, the container is terminated + and restarted according to its restart policy. Other management + of the container blocks until the hook completes. More + info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's + filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions + ('|', etc) won't work. To use a shell, you need + to explicitly call out to that shell. Exit status + of 0 is treated as live/healthy and non-zero is + unhealthy. + items: + type: string + type: array + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in + httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name. This will + be canonicalized upon output, so case-variant + names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the + host. Defaults to HTTP. + type: string + required: + - port + type: object + tcpSocket: + description: Deprecated. TCPSocket is NOT supported + as a LifecycleHandler and kept for the backward compatibility. + There are no validation of this field and lifecycle + hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: 'PreStop is called immediately before a container + is terminated due to an API request or management event + such as liveness/startup probe failure, preemption, resource + contention, etc. The handler is not called if the container + crashes or exits. The Pod''s termination grace period + countdown begins before the PreStop hook is executed. + Regardless of the outcome of the handler, the container + will eventually terminate within the Pod''s termination + grace period (unless delayed by finalizers). Other management + of the container blocks until the hook completes or until + the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's + filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions + ('|', etc) won't work. To use a shell, you need + to explicitly call out to that shell. Exit status + of 0 is treated as live/healthy and non-zero is + unhealthy. + items: + type: string + type: array + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in + httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name. This will + be canonicalized upon output, so case-variant + names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the + host. Defaults to HTTP. + type: string + required: + - port + type: object + tcpSocket: + description: Deprecated. TCPSocket is NOT supported + as a LifecycleHandler and kept for the backward compatibility. + There are no validation of this field and lifecycle + hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + This is a beta field and requires enabling GRPCContainerProbe + feature gate. + properties: + port: + description: Port number of the gRPC service. Number + must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to + place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + \n If this is not specified, the default behavior + is defined by gRPC." + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name. This will + be canonicalized upon output, so case-variant + names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + name: + description: Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: List of ports to expose from the container. Not + specifying a port here DOES NOT prevent that port from being + exposed. Any port which is listening on the default "0.0.0.0" + address inside a container will be accessible from the network. + Modifying this array with strategic merge patch may corrupt + the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network port in a + single container. + properties: + containerPort: + description: Number of port to expose on the pod's IP + address. This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external port to. + type: string + hostPort: + description: Number of port to expose on the host. If + specified, this must be a valid port number, 0 < x < + 65536. If HostNetwork is specified, this must match + ContainerPort. Most containers do not need this. + format: int32 + type: integer + name: + description: If specified, this must be an IANA_SVC_NAME + and unique within the pod. Each named port in a pod + must have a unique name. Name for the port that can + be referred to by services. + type: string + protocol: + default: TCP + description: Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: 'Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe + fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + This is a beta field and requires enabling GRPCContainerProbe + feature gate. + properties: + port: + description: Port number of the gRPC service. Number + must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to + place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + \n If this is not specified, the default behavior + is defined by gRPC." + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name. This will + be canonicalized upon output, so case-variant + names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + resources: + description: 'Compute Resources required by this container. + Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + type: object + securityContext: + description: 'SecurityContext defines the security options the + container should be run with. If set, the fields of SecurityContext + override the equivalent fields of PodSecurityContext. More + info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether + a process can gain more privileges than its parent process. + This bool directly controls if the no_new_privs flag will + be set on the container process. AllowPrivilegeEscalation + is true always when the container is: 1) run as Privileged + 2) has CAP_SYS_ADMIN Note that this field cannot be set + when spec.os.name is windows.' + type: boolean + capabilities: + description: The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by + the container runtime. Note that this field cannot be + set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + type: object + privileged: + description: Run container in privileged mode. Processes + in privileged containers are essentially equivalent to + root on the host. Defaults to false. Note that this field + cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: procMount denotes the type of proc mount to + use for the containers. The default is DefaultProcMount + which uses the container runtime defaults for readonly + paths and masked paths. This requires the ProcMountType + feature flag to be enabled. Note that this field cannot + be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: Whether this container has a read-only root + filesystem. Default is false. Note that this field cannot + be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container + process. Uses runtime default if unset. May also be set + in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. Note that this field cannot be set when + spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a + non-root user. If true, the Kubelet will validate the + image at runtime to ensure that it does not run as UID + 0 (root) and fail to start the container if it does. If + unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both + SecurityContext and PodSecurityContext, the value specified + in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container + process. Defaults to user specified in image metadata + if unspecified. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. Note + that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a + random SELinux context for each container. May also be + set in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. Note that this field cannot be set when + spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: The seccomp options to use by this container. + If seccomp options are provided at both the pod & container + level, the container options override the pod options. + Note that this field cannot be set when spec.os.name is + windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined + in a file on the node should be used. The profile + must be preconfigured on the node to work. Must be + a descending path, relative to the kubelet's configured + seccomp profile location. Must only be set if type + is "Localhost". + type: string + type: + description: "type indicates which kind of seccomp profile + will be applied. Valid options are: \n Localhost - + a profile defined in a file on the node should be + used. RuntimeDefault - the container runtime default + profile should be used. Unconfined - no profile should + be applied." + type: string + required: + - type + type: object + windowsOptions: + description: The Windows specific settings applied to all + containers. If unspecified, the options from the PodSecurityContext + will be used. If set in both SecurityContext and PodSecurityContext, + the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is + linux. + properties: + gmsaCredentialSpec: + description: GMSACredentialSpec is where the GMSA admission + webhook (https://github.com/kubernetes-sigs/windows-gmsa) + inlines the contents of the GMSA credential spec named + by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: HostProcess determines if a container should + be run as a 'Host Process' container. This field is + alpha-level and will only be honored by components + that enable the WindowsHostProcessContainers feature + flag. Setting this field without the feature flag + will result in errors when validating the Pod. All + of a Pod's containers must have the same effective + HostProcess value (it is not allowed to have a mix + of HostProcess containers and non-HostProcess containers). In + addition, if HostProcess is true then HostNetwork + must also be set to true. + type: boolean + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set + in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. + type: string + type: object + type: object + startupProbe: + description: 'StartupProbe indicates that the Pod has successfully + initialized. If specified, no other probes are executed until + this completes successfully. If this probe fails, the Pod + will be restarted, just as if the livenessProbe failed. This + can be used to provide different probe parameters at the beginning + of a Pod''s lifecycle, when it might take a long time to load + data or warm a cache, than during steady-state operation. + This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + This is a beta field and requires enabling GRPCContainerProbe + feature gate. + properties: + port: + description: Port number of the gRPC service. Number + must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to + place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + \n If this is not specified, the default behavior + is defined by gRPC." + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name. This will + be canonicalized upon output, so case-variant + names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + stdin: + description: Whether this container should allocate a buffer + for stdin in the container runtime. If this is not set, reads + from stdin in the container will always result in EOF. Default + is false. + type: boolean + stdinOnce: + description: Whether the container runtime should close the + stdin channel after it has been opened by a single attach. + When stdin is true the stdin stream will remain open across + multiple attach sessions. If stdinOnce is set to true, stdin + is opened on container start, is empty until the first client + attaches to stdin, and then remains open and accepts data + until the client disconnects, at which time stdin is closed + and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin + will never receive an EOF. Default is false + type: boolean + terminationMessagePath: + description: 'Optional: Path at which the file to which the + container''s termination message will be written is mounted + into the container''s filesystem. Message written is intended + to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. + The total message length across all containers will be limited + to 12kb. Defaults to /dev/termination-log. Cannot be updated.' + type: string + terminationMessagePolicy: + description: Indicate how the termination message should be + populated. File will use the contents of terminationMessagePath + to populate the container status message on both success and + failure. FallbackToLogsOnError will use the last chunk of + container log output if the termination message file is empty + and the container exited with an error. The log output is + limited to 2048 bytes or 80 lines, whichever is smaller. Defaults + to File. Cannot be updated. + type: string + tty: + description: Whether this container should allocate a TTY for + itself, also requires 'stdin' to be true. Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices to be + used by the container. + items: + description: volumeDevice describes a mapping of a raw block + device within a container. + properties: + devicePath: + description: devicePath is the path inside of the container + that the device will be mapped to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - devicePath + - name + type: object + type: array + volumeMounts: + description: Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume + within a container. + properties: + mountPath: + description: Path within the container at which the volume + should be mounted. Must not contain ':'. + type: string + mountPropagation: + description: mountPropagation determines how mounts are + propagated from the host to container and the other + way around. When not set, MountPropagationNone is used. + This field is beta in 1.10. + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: Mounted read-only if true, read-write otherwise + (false or unspecified). Defaults to false. + type: boolean + subPath: + description: Path within the volume from which the container's + volume should be mounted. Defaults to "" (volume's root). + type: string + subPathExpr: + description: Expanded path within the volume from which + the container's volume should be mounted. Behaves similarly + to SubPath but environment variable references $(VAR_NAME) + are expanded using the container's environment. Defaults + to "" (volume's root). SubPathExpr and SubPath are mutually + exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + workingDir: + description: Container's working directory. If not specified, + the container runtime's default will be used, which might + be configured in the container image. Cannot be updated. + type: string + required: + - name + type: object + type: array extraNetwork: description: Extra network interface attached to network provided by Mutlus CNI. @@ -1280,6 +2516,8 @@ spec: type: string service_links: type: boolean + serviceAccountName: + type: string terminationGracePeriodSeconds: default: 5 format: int64 diff --git a/neonvm/config/common/rbac/virtualmachine_editor_role.yaml b/neonvm/config/common/rbac/virtualmachine_editor_role.yaml index 3712a9e56..72f38313c 100644 --- a/neonvm/config/common/rbac/virtualmachine_editor_role.yaml +++ b/neonvm/config/common/rbac/virtualmachine_editor_role.yaml @@ -9,6 +9,8 @@ metadata: app.kubernetes.io/created-by: neonvm app.kubernetes.io/part-of: neonvm app.kubernetes.io/managed-by: kustomize + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" name: virtualmachine-editor-role rules: - apiGroups: diff --git a/neonvm/config/common/rbac/virtualmachine_viewer_role.yaml b/neonvm/config/common/rbac/virtualmachine_viewer_role.yaml index 509d66eb9..9becd630f 100644 --- a/neonvm/config/common/rbac/virtualmachine_viewer_role.yaml +++ b/neonvm/config/common/rbac/virtualmachine_viewer_role.yaml @@ -9,6 +9,9 @@ metadata: app.kubernetes.io/created-by: neonvm app.kubernetes.io/part-of: neonvm app.kubernetes.io/managed-by: kustomize + rbac.authorization.k8s.io/aggregate-to-view: "true" + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" name: virtualmachine-viewer-role rules: - apiGroups: diff --git a/neonvm/config/common/rbac/virtualmachinemigration_editor_role.yaml b/neonvm/config/common/rbac/virtualmachinemigration_editor_role.yaml index c6ed235f8..9bc05af8f 100644 --- a/neonvm/config/common/rbac/virtualmachinemigration_editor_role.yaml +++ b/neonvm/config/common/rbac/virtualmachinemigration_editor_role.yaml @@ -9,6 +9,8 @@ metadata: app.kubernetes.io/created-by: neonvm app.kubernetes.io/part-of: neonvm app.kubernetes.io/managed-by: kustomize + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" name: virtualmachinemigration-editor-role rules: - apiGroups: diff --git a/neonvm/config/common/rbac/virtualmachinemigration_viewer_role.yaml b/neonvm/config/common/rbac/virtualmachinemigration_viewer_role.yaml index f8b7eb1c9..48d97f97d 100644 --- a/neonvm/config/common/rbac/virtualmachinemigration_viewer_role.yaml +++ b/neonvm/config/common/rbac/virtualmachinemigration_viewer_role.yaml @@ -9,6 +9,9 @@ metadata: app.kubernetes.io/created-by: neonvm app.kubernetes.io/part-of: neonvm app.kubernetes.io/managed-by: kustomize + rbac.authorization.k8s.io/aggregate-to-view: "true" + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" name: virtualmachinemigration-viewer-role rules: - apiGroups: diff --git a/neonvm/controllers/virtualmachine_controller.go b/neonvm/controllers/virtualmachine_controller.go index 5888ddd00..fae0ccd60 100644 --- a/neonvm/controllers/virtualmachine_controller.go +++ b/neonvm/controllers/virtualmachine_controller.go @@ -379,7 +379,7 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi if err != nil && apierrors.IsNotFound(err) { // lost runner pod for running VirtualMachine ? r.Recorder.Event(virtualmachine, "Warning", "NotFound", - fmt.Sprintf("runner pod %s not fodund", + fmt.Sprintf("runner pod %s not found", virtualmachine.Status.PodName)) virtualmachine.Status.Phase = vmv1.VmFailed meta.SetStatusCondition(&virtualmachine.Status.Conditions, @@ -409,7 +409,7 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi virtualmachine.Status.Node = vmRunner.Spec.NodeName // get CPU details from QEMU and update status - cpuSlotsPlugged, _, err := QmpGetCpus(virtualmachine) + cpuSlotsPlugged, _, err := QmpGetCpus(QmpAddr(virtualmachine)) if err != nil { log.Error(err, "Failed to get CPU details from VirtualMachine", "VirtualMachine", virtualmachine.Name) return err @@ -452,7 +452,7 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi } // get Memory details from hypervisor and update VM status - memorySize, err := QmpGetMemorySize(virtualmachine) + memorySize, err := QmpGetMemorySize(QmpAddr(virtualmachine)) if err != nil { log.Error(err, "Failed to get Memory details from VirtualMachine", "VirtualMachine", virtualmachine.Name) return err @@ -508,12 +508,67 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi } case vmv1.VmScaling: + // Check that runner pod is still ok + vmRunner := &corev1.Pod{} + err := r.Get(ctx, types.NamespacedName{Name: virtualmachine.Status.PodName, Namespace: virtualmachine.Namespace}, vmRunner) + if err != nil && apierrors.IsNotFound(err) { + // lost runner pod for running VirtualMachine ? + r.Recorder.Event(virtualmachine, "Warning", "NotFound", + fmt.Sprintf("runner pod %s not found", + virtualmachine.Status.PodName)) + virtualmachine.Status.Phase = vmv1.VmFailed + meta.SetStatusCondition(&virtualmachine.Status.Conditions, + metav1.Condition{Type: typeDegradedVirtualMachine, + Status: metav1.ConditionTrue, + Reason: "Reconciling", + Message: fmt.Sprintf("Pod (%s) for VirtualMachine (%s) not found", virtualmachine.Status.PodName, virtualmachine.Name)}) + } else if err != nil { + log.Error(err, "Failed to get runner Pod") + return err + } + + // Update the metadata (including "usage" annotation) before anything else, so that it + // will be correctly set even if the rest of the reconcile operation fails. + if err := updatePodMetadataIfNecessary(ctx, r.Client, virtualmachine, vmRunner); err != nil { + log.Error(err, "Failed to sync pod labels and annotations", "VirtualMachine", virtualmachine.Name) + } + + // runner pod found, check that it's still up: + switch vmRunner.Status.Phase { + case corev1.PodSucceeded: + virtualmachine.Status.Phase = vmv1.VmSucceeded + meta.SetStatusCondition(&virtualmachine.Status.Conditions, + metav1.Condition{Type: typeAvailableVirtualMachine, + Status: metav1.ConditionFalse, + Reason: "Reconciling", + Message: fmt.Sprintf("Pod (%s) for VirtualMachine (%s) succeeded", virtualmachine.Status.PodName, virtualmachine.Name)}) + return nil + case corev1.PodFailed: + virtualmachine.Status.Phase = vmv1.VmFailed + meta.SetStatusCondition(&virtualmachine.Status.Conditions, + metav1.Condition{Type: typeDegradedVirtualMachine, + Status: metav1.ConditionTrue, + Reason: "Reconciling", + Message: fmt.Sprintf("Pod (%s) for VirtualMachine (%s) failed", virtualmachine.Status.PodName, virtualmachine.Name)}) + return nil + case corev1.PodUnknown: + virtualmachine.Status.Phase = vmv1.VmPending + meta.SetStatusCondition(&virtualmachine.Status.Conditions, + metav1.Condition{Type: typeAvailableVirtualMachine, + Status: metav1.ConditionUnknown, + Reason: "Reconciling", + Message: fmt.Sprintf("Pod (%s) for VirtualMachine (%s) in Unknown phase", virtualmachine.Status.PodName, virtualmachine.Name)}) + return nil + default: + // do nothing + } + cpuScaled := false ramScaled := false // do hotplug/unplug CPU // firstly get current state from QEMU - cpuSlotsPlugged, _, err := QmpGetCpus(virtualmachine) + cpuSlotsPlugged, _, err := QmpGetCpus(QmpAddr(virtualmachine)) if err != nil { log.Error(err, "Failed to get CPU details from VirtualMachine", "VirtualMachine", virtualmachine.Name) return err @@ -524,7 +579,7 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi if specCPU > pluggedCPU { // going to plug one CPU log.Info("Plug one more CPU into VM") - if err := QmpPlugCpu(virtualmachine); err != nil { + if err := QmpPlugCpu(QmpAddr(virtualmachine)); err != nil { return err } r.Recorder.Event(virtualmachine, "Normal", "ScaleUp", @@ -533,7 +588,7 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi } else if specCPU < pluggedCPU { // going to unplug one CPU log.Info("Unplug one CPU from VM") - if err := QmpUnplugCpu(virtualmachine); err != nil { + if err := QmpUnplugCpu(QmpAddr(virtualmachine)); err != nil { return err } r.Recorder.Event(virtualmachine, "Normal", "ScaleDown", @@ -546,7 +601,7 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi // do hotplug/unplug Memory // firstly get current state from QEMU - memoryDevices, err := QmpQueryMemoryDevices(virtualmachine) + memoryDevices, err := QmpQueryMemoryDevices(QmpAddr(virtualmachine)) memoryPluggedSlots := *virtualmachine.Spec.Guest.MemorySlots.Min + int32(len(memoryDevices)) if err != nil { log.Error(err, "Failed to get Memory details from VirtualMachine", "VirtualMachine", virtualmachine.Name) @@ -565,7 +620,7 @@ func (r *VirtualMachineReconciler) doReconcile(ctx context.Context, virtualmachi } else if *virtualmachine.Spec.Guest.MemorySlots.Use < memoryPluggedSlots { // going to unplug one Memory Slot log.Info("Unplug one Memory module from VM") - if err := QmpUnplugMemory(virtualmachine); err != nil { + if err := QmpUnplugMemory(QmpAddr(virtualmachine)); err != nil { // special case ! // error means VM hadn't memory devices available for unplug // need set .memorySlots.Use back to real value @@ -991,6 +1046,7 @@ func podSpec(virtualmachine *vmv1.VirtualMachine) (*corev1.Pod, error) { NodeSelector: virtualmachine.Spec.NodeSelector, ImagePullSecrets: virtualmachine.Spec.ImagePullSecrets, Tolerations: virtualmachine.Spec.Tolerations, + ServiceAccountName: virtualmachine.Spec.ServiceAccountName, SchedulerName: virtualmachine.Spec.SchedulerName, Affinity: affinity, InitContainers: []corev1.Container{ @@ -1043,6 +1099,14 @@ func podSpec(virtualmachine *vmv1.VirtualMachine) (*corev1.Pod, error) { "-vmspec", base64.StdEncoding.EncodeToString(vmSpecJson), "-vmstatus", base64.StdEncoding.EncodeToString(vmStatusJson), }, + Env: []corev1.EnvVar{{ + Name: "K8S_POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }}, VolumeMounts: []corev1.VolumeMount{ { Name: "virtualmachineimages", @@ -1081,12 +1145,16 @@ func podSpec(virtualmachine *vmv1.VirtualMachine) (*corev1.Pod, error) { }, } + // Add any InitContainers that were specified by the spec + pod.Spec.InitContainers = append(pod.Spec.InitContainers, virtualmachine.Spec.ExtraInitContainers...) + // allow access to /dev/kvm and /dev/vhost-net devices by generic-device-plugin for kubelet if pod.Spec.Containers[0].Resources.Limits == nil { pod.Spec.Containers[0].Resources.Limits = corev1.ResourceList{} } pod.Spec.Containers[0].Resources.Limits["neonvm/vhost-net"] = resource.MustParse("1") - if virtualmachine.Spec.EnableAcceleration { + // NB: EnableAcceleration guaranteed non-nil because the k8s API server sets the default for us. + if *virtualmachine.Spec.EnableAcceleration { pod.Spec.Containers[0].Resources.Limits["neonvm/kvm"] = resource.MustParse("1") } diff --git a/neonvm/controllers/virtualmachine_qmp_queries.go b/neonvm/controllers/virtualmachine_qmp_queries.go index 9e7a2f2e2..2b44fb1ba 100644 --- a/neonvm/controllers/virtualmachine_qmp_queries.go +++ b/neonvm/controllers/virtualmachine_qmp_queries.go @@ -81,22 +81,11 @@ type MigrationInfo struct { } `json:"compression"` } -func QmpConnect(virtualmachine *vmv1.VirtualMachine) (*qmp.SocketMonitor, error) { - ip := virtualmachine.Status.PodIP - port := virtualmachine.Spec.QMP - - mon, err := qmp.NewSocketMonitor("tcp", fmt.Sprintf("%s:%d", ip, port), 2*time.Second) - if err != nil { - return nil, err - } - if err := mon.Connect(); err != nil { - return nil, err - } - - return mon, nil +func QmpAddr(vm *vmv1.VirtualMachine) (ip string, port int32) { + return vm.Status.PodIP, vm.Spec.QMP } -func QmpConnectByIP(ip string, port int32) (*qmp.SocketMonitor, error) { +func QmpConnect(ip string, port int32) (*qmp.SocketMonitor, error) { mon, err := qmp.NewSocketMonitor("tcp", fmt.Sprintf("%s:%d", ip, port), 2*time.Second) if err != nil { return nil, err @@ -108,8 +97,8 @@ func QmpConnectByIP(ip string, port int32) (*qmp.SocketMonitor, error) { return mon, nil } -func QmpGetCpus(virtualmachine *vmv1.VirtualMachine) ([]QmpCpuSlot, []QmpCpuSlot, error) { - mon, err := QmpConnect(virtualmachine) +func QmpGetCpus(ip string, port int32) ([]QmpCpuSlot, []QmpCpuSlot, error) { + mon, err := QmpConnect(ip, port) if err != nil { return nil, nil, err } @@ -137,37 +126,8 @@ func QmpGetCpus(virtualmachine *vmv1.VirtualMachine) ([]QmpCpuSlot, []QmpCpuSlot return plugged, empty, nil } -func QmpGetCpusFromRunner(ip string, port int32) ([]QmpCpuSlot, []QmpCpuSlot, error) { - mon, err := QmpConnectByIP(ip, port) - if err != nil { - return nil, nil, err - } - defer mon.Disconnect() - - qmpcmd := []byte(`{"execute": "query-hotpluggable-cpus"}`) - raw, err := mon.Run(qmpcmd) - if err != nil { - return nil, nil, err - } - - var result QmpCpus - json.Unmarshal(raw, &result) - - plugged := []QmpCpuSlot{} - empty := []QmpCpuSlot{} - for _, entry := range result.Return { - if entry.QomPath != nil { - plugged = append(plugged, QmpCpuSlot{Core: entry.Props.CoreId, QOM: *entry.QomPath, Type: entry.Type}) - } else { - empty = append(empty, QmpCpuSlot{Core: entry.Props.CoreId, QOM: "", Type: entry.Type}) - } - } - - return plugged, empty, nil -} - -func QmpPlugCpu(virtualmachine *vmv1.VirtualMachine) error { - _, empty, err := QmpGetCpus(virtualmachine) +func QmpPlugCpu(ip string, port int32) error { + _, empty, err := QmpGetCpus(ip, port) if err != nil { return err } @@ -175,7 +135,7 @@ func QmpPlugCpu(virtualmachine *vmv1.VirtualMachine) error { return errors.New("no empty slots for CPU hotplug") } - mon, err := QmpConnect(virtualmachine) + mon, err := QmpConnect(ip, port) if err != nil { return err } @@ -193,35 +153,8 @@ func QmpPlugCpu(virtualmachine *vmv1.VirtualMachine) error { return nil } -func QmpPlugCpuToRunner(ip string, port int32) error { - _, empty, err := QmpGetCpusFromRunner(ip, port) - if err != nil { - return err - } - if len(empty) == 0 { - return errors.New("no empty slots for CPU hotplug") - } - - mon, err := QmpConnectByIP(ip, port) - if err != nil { - return err - } - defer mon.Disconnect() - - // empty list reversed, first cpu slot in the end of list and last cpu slot in the beginning - slot := empty[len(empty)-1] - qmpcmd := []byte(fmt.Sprintf(`{"execute": "device_add", "arguments": {"id": "cpu%d", "driver": "%s", "core-id": %d, "socket-id": 0, "thread-id": 0}}`, slot.Core, slot.Type, slot.Core)) - - _, err = mon.Run(qmpcmd) - if err != nil { - return err - } - - return nil -} - -func QmpUnplugCpu(virtualmachine *vmv1.VirtualMachine) error { - plugged, _, err := QmpGetCpus(virtualmachine) +func QmpUnplugCpu(ip string, port int32) error { + plugged, _, err := QmpGetCpus(ip, port) if err != nil { return err } @@ -239,7 +172,7 @@ func QmpUnplugCpu(virtualmachine *vmv1.VirtualMachine) error { return errors.New("there are no unpluggable CPUs") } - mon, err := QmpConnect(virtualmachine) + mon, err := QmpConnect(ip, port) if err != nil { return err } @@ -257,11 +190,11 @@ func QmpUnplugCpu(virtualmachine *vmv1.VirtualMachine) error { } func QmpSyncCpuToTarget(vm *vmv1.VirtualMachine, migration *vmv1.VirtualMachineMigration) error { - plugged, _, err := QmpGetCpus(vm) + plugged, _, err := QmpGetCpus(QmpAddr(vm)) if err != nil { return err } - pluggedInTarget, _, err := QmpGetCpusFromRunner(migration.Status.TargetPodIP, vm.Spec.QMP) + pluggedInTarget, _, err := QmpGetCpus(migration.Status.TargetPodIP, vm.Spec.QMP) if err != nil { return err } @@ -270,7 +203,7 @@ func QmpSyncCpuToTarget(vm *vmv1.VirtualMachine, migration *vmv1.VirtualMachineM return nil } - target, err := QmpConnectByIP(migration.Status.TargetPodIP, vm.Spec.QMP) + target, err := QmpConnect(migration.Status.TargetPodIP, vm.Spec.QMP) if err != nil { return err } @@ -296,25 +229,8 @@ searchForEmpty: return nil } -func QmpQueryMemoryDevices(virtualmachine *vmv1.VirtualMachine) ([]QmpMemoryDevice, error) { - mon, err := QmpConnect(virtualmachine) - if err != nil { - return nil, err - } - defer mon.Disconnect() - - var result QmpMemoryDevices - cmd := []byte(`{"execute": "query-memory-devices"}`) - raw, err := mon.Run(cmd) - if err != nil { - return nil, err - } - json.Unmarshal(raw, &result) - return result.Return, nil -} - -func QmpQueryMemoryDevicesFromRunner(ip string, port int32) ([]QmpMemoryDevice, error) { - mon, err := QmpConnectByIP(ip, port) +func QmpQueryMemoryDevices(ip string, port int32) ([]QmpMemoryDevice, error) { + mon, err := QmpConnect(ip, port) if err != nil { return nil, err } @@ -334,7 +250,7 @@ func QmpPlugMemory(virtualmachine *vmv1.VirtualMachine) error { // slots - number of pluggable memory slots (Max - Min) slots := *virtualmachine.Spec.Guest.MemorySlots.Max - *virtualmachine.Spec.Guest.MemorySlots.Min - memoryDevices, err := QmpQueryMemoryDevices(virtualmachine) + memoryDevices, err := QmpQueryMemoryDevices(QmpAddr(virtualmachine)) if err != nil { return err } @@ -345,7 +261,7 @@ func QmpPlugMemory(virtualmachine *vmv1.VirtualMachine) error { return errors.New("no empty slots for Memory hotplug") } - mon, err := QmpConnect(virtualmachine) + mon, err := QmpConnect(QmpAddr(virtualmachine)) if err != nil { return err } @@ -382,16 +298,16 @@ func QmpPlugMemory(virtualmachine *vmv1.VirtualMachine) error { } func QmpSyncMemoryToTarget(vm *vmv1.VirtualMachine, migration *vmv1.VirtualMachineMigration) error { - memoryDevices, err := QmpQueryMemoryDevices(vm) + memoryDevices, err := QmpQueryMemoryDevices(QmpAddr(vm)) if err != nil { return err } - memoryDevicesInTarget, err := QmpQueryMemoryDevicesFromRunner(migration.Status.TargetPodIP, vm.Spec.QMP) + memoryDevicesInTarget, err := QmpQueryMemoryDevices(migration.Status.TargetPodIP, vm.Spec.QMP) if err != nil { return err } - target, err := QmpConnectByIP(migration.Status.TargetPodIP, vm.Spec.QMP) + target, err := QmpConnect(migration.Status.TargetPodIP, vm.Spec.QMP) if err != nil { return err } @@ -432,13 +348,13 @@ func QmpSyncMemoryToTarget(vm *vmv1.VirtualMachine, migration *vmv1.VirtualMachi } func QmpPlugMemoryToRunner(ip string, port int32, size int64) error { - memoryDevices, err := QmpQueryMemoryDevicesFromRunner(ip, port) + memoryDevices, err := QmpQueryMemoryDevices(ip, port) if err != nil { return err } plugged := int32(len(memoryDevices)) - mon, err := QmpConnectByIP(ip, port) + mon, err := QmpConnect(ip, port) if err != nil { return err } @@ -469,8 +385,8 @@ func QmpPlugMemoryToRunner(ip string, port int32, size int64) error { return nil } -func QmpUnplugMemory(virtualmachine *vmv1.VirtualMachine) error { - memoryDevices, err := QmpQueryMemoryDevices(virtualmachine) +func QmpUnplugMemory(ip string, port int32) error { + memoryDevices, err := QmpQueryMemoryDevices(ip, port) if err != nil { return err } @@ -479,7 +395,7 @@ func QmpUnplugMemory(virtualmachine *vmv1.VirtualMachine) error { return errors.New("there are no unpluggable Memory slots") } - mon, err := QmpConnect(virtualmachine) + mon, err := QmpConnect(ip, port) if err != nil { return err } @@ -517,8 +433,8 @@ func QmpUnplugMemory(virtualmachine *vmv1.VirtualMachine) error { return merr } -func QmpGetMemorySize(virtualmachine *vmv1.VirtualMachine) (*resource.Quantity, error) { - mon, err := QmpConnect(virtualmachine) +func QmpGetMemorySize(ip string, port int32) (*resource.Quantity, error) { + mon, err := QmpConnect(ip, port) if err != nil { return nil, err } @@ -654,9 +570,9 @@ func QmpStartMigration(virtualmachine *vmv1.VirtualMachine, virtualmachinemigrat return nil } -func QmpGetMigrationInfo(virtualmachine *vmv1.VirtualMachine) (MigrationInfo, error) { +func QmpGetMigrationInfo(ip string, port int32) (MigrationInfo, error) { empty := MigrationInfo{} - mon, err := QmpConnect(virtualmachine) + mon, err := QmpConnect(ip, port) if err != nil { return empty, err } @@ -674,8 +590,8 @@ func QmpGetMigrationInfo(virtualmachine *vmv1.VirtualMachine) (MigrationInfo, er return result.Return, nil } -func QmpCancelMigration(virtualmachine *vmv1.VirtualMachine) error { - mon, err := QmpConnect(virtualmachine) +func QmpCancelMigration(ip string, port int32) error { + mon, err := QmpConnect(ip, port) if err != nil { return err } @@ -691,7 +607,7 @@ func QmpCancelMigration(virtualmachine *vmv1.VirtualMachine) error { } func QmpQuit(ip string, port int32) error { - mon, err := QmpConnectByIP(ip, port) + mon, err := QmpConnect(ip, port) if err != nil { return err } diff --git a/neonvm/controllers/virtualmachinemigration_controller.go b/neonvm/controllers/virtualmachinemigration_controller.go index 01e251e0a..07f3beee7 100644 --- a/neonvm/controllers/virtualmachinemigration_controller.go +++ b/neonvm/controllers/virtualmachinemigration_controller.go @@ -371,7 +371,7 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c } // retrieve migration statistics - migrationInfo, err := QmpGetMigrationInfo(vm) + migrationInfo, err := QmpGetMigrationInfo(QmpAddr(vm)) if err != nil { log.Error(err, "Failed to get migration info") return ctrl.Result{}, err @@ -475,7 +475,7 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c // seems migration still going on, just update status with migration progress once per second time.Sleep(time.Second) // re-retrieve migration statistics - migrationInfo, err = QmpGetMigrationInfo(vm) + migrationInfo, err = QmpGetMigrationInfo(QmpAddr(vm)) if err != nil { log.Error(err, "Failed to re-get migration info") return ctrl.Result{}, err @@ -576,7 +576,7 @@ func (r *VirtualMachineMigrationReconciler) doFinalizerOperationsForVirtualMachi // try to cancel migration log.Info("Canceling migration") - if err := QmpCancelMigration(vm); err != nil { + if err := QmpCancelMigration(QmpAddr(vm)); err != nil { // inform about error but not return error to avoid stuckness in reconciliation cycle log.Error(err, "Migration canceling failed") } diff --git a/neonvm/runner/main.go b/neonvm/runner/main.go index b4933b0bd..017aa4f00 100644 --- a/neonvm/runner/main.go +++ b/neonvm/runner/main.go @@ -12,7 +12,6 @@ import ( "bytes" "flag" "fmt" - "log" "math" "net" "os" @@ -36,6 +35,7 @@ import ( "github.com/kdomanski/iso9660" "github.com/opencontainers/runtime-spec/specs-go" "github.com/vishvananda/netlink" + "go.uber.org/zap" "k8s.io/apimachinery/pkg/api/resource" vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" @@ -46,11 +46,12 @@ const ( QEMU_BIN = "qemu-system-x86_64" QEMU_IMG_BIN = "qemu-img" kernelPath = "/vm/kernel/vmlinuz" - kernelCmdline = "init=/neonvm/bin/init memhp_default_state=online_movable console=ttyS1 loglevel=7 root=/dev/vda rw" + kernelCmdline = "panic=-1 init=/neonvm/bin/init memhp_default_state=online_movable console=ttyS1 loglevel=7 root=/dev/vda rw" - rootDiskPath = "/vm/images/rootdisk.qcow2" - runtimeDiskPath = "/vm/images/runtime.iso" - mountedDiskPath = "/vm/images" + rootDiskPath = "/vm/images/rootdisk.qcow2" + runtimeDiskPath = "/vm/images/runtime.iso" + mountedDiskPath = "/vm/images" + qmpUnixSocketForSigtermHandler = "/vm/qmp-sigterm.sock" defaultNetworkBridgeName = "br-def" defaultNetworkTapName = "tap-def" @@ -68,6 +69,17 @@ const ( // in microseconds. Min 1000 microseconds, max 1 second cgroupPeriod = uint64(100000) cgroupMountPoint = "/sys/fs/cgroup" + + // cpuLimitOvercommitFactor sets the amount above the VM's spec.guest.cpus.use that we set the + // QEMU cgroup's CPU limit to. e.g. if cpuLimitOvercommitFactor = 3 and the VM is using 0.5 + // CPUs, we set the cgroup to limit QEMU+VM to 1.5 CPUs. + // + // This exists because setting the cgroup exactly equal to the VM's CPU value is overly + // pessimistic, results in a lot of unused capacity on the host, and particularly impacts + // operations that parallelize between the VM and QEMU, like heavy disk access. + // + // See also: https://neondb.slack.com/archives/C03TN5G758R/p1693462680623239 + cpuLimitOvercommitFactor = 4 ) var ( @@ -210,7 +222,14 @@ func createISO9660runtime(diskPath string, command []string, args []string, env mounts = append(mounts, fmt.Sprintf(`/neonvm/bin/mkdir -p %s`, disk.MountPath)) switch { case disk.EmptyDisk != nil: - mounts = append(mounts, fmt.Sprintf(`/neonvm/bin/mount $(/neonvm/bin/blkid -L %s) %s`, disk.Name, disk.MountPath)) + opts := "" + if disk.EmptyDisk.Discard { + opts = "-o discard" + } + + mounts = append(mounts, fmt.Sprintf(`/neonvm/bin/mount %s $(/neonvm/bin/blkid -L %s) %s`, opts, disk.Name, disk.MountPath)) + // Note: chmod must be after mount, otherwise it gets overwritten by mount. + mounts = append(mounts, fmt.Sprintf(`/neonvm/bin/chmod 0777 %s`, disk.MountPath)) case disk.ConfigMap != nil || disk.Secret != nil: mounts = append(mounts, fmt.Sprintf(`/neonvm/bin/mount -o ro,mode=0644 $(/neonvm/bin/blkid -L %s) %s`, disk.Name, disk.MountPath)) case disk.Tmpfs != nil: @@ -330,7 +349,7 @@ func createQCOW2(diskName string, diskPath string, diskSize *resource.Quantity, return nil } -func createISO9660FromPath(diskName string, diskPath string, contentPath string) error { +func createISO9660FromPath(logger *zap.Logger, diskName string, diskPath string, contentPath string) error { writer, err := iso9660.NewWriter() if err != nil { return err @@ -370,7 +389,7 @@ func createISO9660FromPath(diskName string, diskPath string, contentPath string) continue } - log.Printf("adding file: %s\n", outputPath) + logger.Info("adding file to ISO9660 disk", zap.String("path", outputPath)) fileToAdd, err := os.Open(fileName) if err != nil { return err @@ -427,28 +446,35 @@ func checkDevTun() bool { } func main() { + logger := zap.Must(zap.NewProduction()).Named("neonvm-runner") + var vmSpecDump string var vmStatusDump string flag.StringVar(&vmSpecDump, "vmspec", vmSpecDump, "Base64 encoded VirtualMachine json specification") flag.StringVar(&vmStatusDump, "vmstatus", vmStatusDump, "Base64 encoded VirtualMachine json status") flag.Parse() + selfPodName, ok := os.LookupEnv("K8S_POD_NAME") + if !ok { + logger.Fatal("environment variable K8S_POD_NAME missing") + } + vmSpecJson, err := base64.StdEncoding.DecodeString(vmSpecDump) if err != nil { - log.Fatalf("Failed to decode VirtualMachine Spec dump: %s", err) + logger.Fatal("Failed to decode VirtualMachine Spec dump", zap.Error(err)) } vmStatusJson, err := base64.StdEncoding.DecodeString(vmStatusDump) if err != nil { - log.Fatalf("Failed to decode VirtualMachine Status dump: %s", err) + logger.Fatal("Failed to decode VirtualMachine Status dump", zap.Error(err)) } vmSpec := &vmv1.VirtualMachineSpec{} if err := json.Unmarshal(vmSpecJson, vmSpec); err != nil { - log.Fatalf("Failed to unmarshal VM Spec: %s", err) + logger.Fatal("Failed to unmarshal VM spec", zap.Error(err)) } vmStatus := &vmv1.VirtualMachineStatus{} if err := json.Unmarshal(vmStatusJson, vmStatus); err != nil { - log.Fatalf("Failed to unmarshal VM Status: %s", err) + logger.Fatal("Failed to unmarshal VM Status", zap.Error(err)) } qemuCPUs := processCPUs(vmSpec.Guest.CPUs) @@ -468,7 +494,7 @@ func main() { // create iso9660 disk with runtime options (command, args, envs, mounts) if err = createISO9660runtime(runtimeDiskPath, vmSpec.Guest.Command, vmSpec.Guest.Args, vmSpec.Guest.Env, vmSpec.Disks); err != nil { - log.Fatalln(err) + logger.Fatal("Failed to create iso9660 disk", zap.Error(err)) } // resize rootDisk image of size specified and new size more than current @@ -478,7 +504,7 @@ func main() { // get current disk size by qemu-img info command qemuImgOut, err := exec.Command(QEMU_IMG_BIN, "info", "--output=json", rootDiskPath).Output() if err != nil { - log.Fatalln(err) + logger.Fatal("could not get root image size", zap.Error(err)) } imageSize := QemuImgOutputPartial{} json.Unmarshal(qemuImgOut, &imageSize) @@ -487,12 +513,12 @@ func main() { // going to resize if !vmSpec.Guest.RootDisk.Size.IsZero() { if vmSpec.Guest.RootDisk.Size.Cmp(*imageSizeQuantity) == 1 { - log.Printf("resizing rootDisk from %s to %s\n", imageSizeQuantity.String(), vmSpec.Guest.RootDisk.Size.String()) + logger.Info(fmt.Sprintf("resizing rootDisk from %s to %s", imageSizeQuantity.String(), vmSpec.Guest.RootDisk.Size.String())) if err := execFg(QEMU_IMG_BIN, "resize", rootDiskPath, fmt.Sprintf("%d", vmSpec.Guest.RootDisk.Size.Value())); err != nil { - log.Fatal(err) + logger.Fatal("Failed to resize rootDisk", zap.Error(err)) } } else { - log.Printf("rootDisk.size (%s) should be more than size in image (%s)\n", vmSpec.Guest.RootDisk.Size.String(), imageSizeQuantity.String()) + logger.Info(fmt.Sprintf("rootDisk.size (%s) is less than than image size (%s)", vmSpec.Guest.RootDisk.Size.String(), imageSizeQuantity.String())) } } @@ -509,6 +535,7 @@ func main() { "-serial", "stdio", "-msg", "timestamp=on", "-qmp", fmt.Sprintf("tcp:0.0.0.0:%d,server,wait=off", vmSpec.QMP), + "-qmp", fmt.Sprintf("unix:%s,server,wait=off", qmpUnixSocketForSigtermHandler), } // disk details @@ -517,18 +544,22 @@ func main() { for _, disk := range vmSpec.Disks { switch { case disk.EmptyDisk != nil: - log.Printf("creating QCOW2 image '%s' with empty ext4 filesystem", disk.Name) + logger.Info("creating QCOW2 image with empty ext4 filesystem", zap.String("diskName", disk.Name)) dPath := fmt.Sprintf("%s/%s.qcow2", mountedDiskPath, disk.Name) if err := createQCOW2(disk.Name, dPath, &disk.EmptyDisk.Size, nil); err != nil { - log.Fatalln(err) + logger.Fatal("Failed to create QCOW2 image", zap.Error(err)) } - qemuCmd = append(qemuCmd, "-drive", fmt.Sprintf("id=%s,file=%s,if=virtio,media=disk,cache=none", disk.Name, dPath)) + discard := "" + if disk.EmptyDisk.Discard { + discard = ",discard=unmap" + } + qemuCmd = append(qemuCmd, "-drive", fmt.Sprintf("id=%s,file=%s,if=virtio,media=disk,cache=none%s", disk.Name, dPath, discard)) case disk.ConfigMap != nil || disk.Secret != nil: dPath := fmt.Sprintf("%s/%s.qcow2", mountedDiskPath, disk.Name) mnt := fmt.Sprintf("/vm/mounts%s", disk.MountPath) - log.Printf("creating iso9660 image '%s' for '%s' from path '%s'", dPath, disk.Name, mnt) - if err := createISO9660FromPath(disk.Name, dPath, mnt); err != nil { - log.Fatalln(err) + logger.Info("creating iso9660 image", zap.String("diskPath", dPath), zap.String("diskName", disk.Name), zap.String("mountPath", mnt)) + if err := createISO9660FromPath(logger, disk.Name, dPath, mnt); err != nil { + logger.Fatal("Failed to create ISO9660 image", zap.Error(err)) } qemuCmd = append(qemuCmd, "-drive", fmt.Sprintf("id=%s,file=%s,if=virtio,media=cdrom,cache=none", disk.Name, dPath)) default: @@ -537,9 +568,12 @@ func main() { } // cpu details - if vmSpec.EnableAcceleration && checkKVM() { - log.Println("using KVM acceleration") + // NB: EnableAcceleration guaranteed non-nil because the k8s API server sets the default for us. + if *vmSpec.EnableAcceleration && checkKVM() { + logger.Info("using KVM acceleration") qemuCmd = append(qemuCmd, "-enable-kvm") + } else { + logger.Warn("not using KVM acceleration") } qemuCmd = append(qemuCmd, "-cpu", "max") qemuCmd = append(qemuCmd, "-smp", strings.Join(cpus, ",")) @@ -548,9 +582,9 @@ func main() { qemuCmd = append(qemuCmd, "-m", strings.Join(memory, ",")) // default (pod) net details - macDefault, err := defaultNetwork(defaultNetworkCIDR, vmSpec.Guest.Ports) + macDefault, err := defaultNetwork(logger, defaultNetworkCIDR, vmSpec.Guest.Ports) if err != nil { - log.Fatalf("can not setup default network: %s", err) + logger.Fatal("cannot set up default network", zap.Error(err)) } qemuCmd = append(qemuCmd, "-netdev", fmt.Sprintf("tap,id=default,ifname=%s,script=no,downscript=no,vhost=on", defaultNetworkTapName)) qemuCmd = append(qemuCmd, "-device", fmt.Sprintf("virtio-net-pci,netdev=default,mac=%s", macDefault.String())) @@ -559,7 +593,7 @@ func main() { if vmSpec.ExtraNetwork != nil && vmSpec.ExtraNetwork.Enable { macOverlay, err := overlayNetwork(vmSpec.ExtraNetwork.Interface) if err != nil { - log.Fatalf("can not setup overlay network: %s", err) + logger.Fatal("cannot set up overlay network", zap.Error(err)) } qemuCmd = append(qemuCmd, "-netdev", fmt.Sprintf("tap,id=overlay,ifname=%s,script=no,downscript=no,vhost=on", overlayNetworkTapName)) qemuCmd = append(qemuCmd, "-device", fmt.Sprintf("virtio-net-pci,netdev=overlay,mac=%s", macOverlay.String())) @@ -578,40 +612,56 @@ func main() { qemuCmd = append(qemuCmd, "-incoming", fmt.Sprintf("tcp:0:%d", vmv1.MigrationPort)) } - // leading slash is important - cgroupPath := fmt.Sprintf("/%s-vm-runner", vmStatus.PodName) + selfCgroupPath, err := getSelfCgroupPath(logger) + if err != nil { + logger.Fatal("Failed to get self cgroup path", zap.Error(err)) + } + // Sometimes we'll get just '/' as our cgroup path. If that's the case, we should reset it so + // that the cgroup '/neonvm-qemu-...' still works. + if selfCgroupPath == "/" { + selfCgroupPath = "" + } + // ... but also we should have some uniqueness just in case, so we're not sharing a root level + // cgroup if that *is* what's happening. This *should* only be relevant for local clusters. + // + // We don't want to just use the VM spec's .status.PodName because during migrations that will + // be equal to the source pod, not this one, which may be... somewhat confusing. + cgroupPath := fmt.Sprintf("%s/neonvm-qemu-%s", selfCgroupPath, selfPodName) + + logger.Info("Determined QEMU cgroup path", zap.String("path", cgroupPath)) - if err := setCgroupLimit(qemuCPUs.use, cgroupPath); err != nil { - log.Fatalf("Failed to set cgroup limit: %s", err) + if err := setCgroupLimit(logger, qemuCPUs.use, cgroupPath); err != nil { + logger.Fatal("Failed to set cgroup limit", zap.Error(err)) } - defer cleanupCgroup(cgroupPath) ctx, cancel := context.WithCancel(context.Background()) wg := sync.WaitGroup{} wg.Add(1) - go terminateQemuOnSigterm(ctx, vmSpec.QMP, &wg) + go terminateQemuOnSigterm(ctx, logger, &wg) wg.Add(1) - go listenForCPUChanges(ctx, vmSpec.RunnerPort, cgroupPath, &wg) + go listenForCPUChanges(ctx, logger, vmSpec.RunnerPort, cgroupPath, &wg) args := append([]string{"-g", fmt.Sprintf("cpu:%s", cgroupPath), QEMU_BIN}, qemuCmd...) - log.Printf("using cgexec args: %v", args) + logger.Info("calling cgexec", zap.Strings("args", args)) if err := execFg("cgexec", args...); err != nil { - log.Printf("Qemu exited: %s", err) + logger.Error("QEMU exited with error", zap.Error(err)) + } else { + logger.Info("QEMU exited without error") } cancel() wg.Wait() } -func handleCPUChange(w http.ResponseWriter, r *http.Request, cgroupPath string) { +func handleCPUChange(logger *zap.Logger, w http.ResponseWriter, r *http.Request, cgroupPath string) { if r.Method != "POST" { - log.Printf("unexpected method: %s\n", r.Method) + logger.Error("unexpected method", zap.String("method", r.Method)) w.WriteHeader(400) return } body, err := io.ReadAll(r.Body) if err != nil { - log.Printf("could not read body: %s\n", err) + logger.Error("could not read body", zap.Error(err)) w.WriteHeader(400) return } @@ -619,16 +669,16 @@ func handleCPUChange(w http.ResponseWriter, r *http.Request, cgroupPath string) parsed := api.VCPUChange{} err = json.Unmarshal(body, &parsed) if err != nil { - log.Printf("could not parse body: %s\n", err) + logger.Error("could not parse body", zap.Error(err)) w.WriteHeader(400) return } // update cgroup - log.Printf("got CPU update %v", parsed.VCPUs.AsFloat64()) - err = setCgroupLimit(parsed.VCPUs, cgroupPath) + logger.Info("got CPU update", zap.Float64("CPU", parsed.VCPUs.AsFloat64())) + err = setCgroupLimit(logger, parsed.VCPUs, cgroupPath) if err != nil { - log.Printf("could not set cgroup limit: %s\n", err) + logger.Error("could not set cgroup limit", zap.Error(err)) w.WriteHeader(500) return } @@ -636,23 +686,23 @@ func handleCPUChange(w http.ResponseWriter, r *http.Request, cgroupPath string) w.WriteHeader(200) } -func handleCPUCurrent(w http.ResponseWriter, r *http.Request, cgroupPath string) { +func handleCPUCurrent(logger *zap.Logger, w http.ResponseWriter, r *http.Request, cgroupPath string) { if r.Method != "GET" { - log.Printf("unexpected method: %s\n", r.Method) + logger.Error("unexpected method", zap.String("method", r.Method)) w.WriteHeader(400) return } cpus, err := getCgroupQuota(cgroupPath) if err != nil { - log.Printf("could not get cgroup quota: %s\n", err) + logger.Error("could not get cgroup quota", zap.Error(err)) w.WriteHeader(500) return } resp := api.VCPUCgroup{VCPUs: *cpus} body, err := json.Marshal(resp) if err != nil { - log.Printf("could not marshal body: %s\n", err) + logger.Error("could not marshal body", zap.Error(err)) w.WriteHeader(500) return } @@ -661,14 +711,17 @@ func handleCPUCurrent(w http.ResponseWriter, r *http.Request, cgroupPath string) w.Write(body) } -func listenForCPUChanges(ctx context.Context, port int32, cgroupPath string, wg *sync.WaitGroup) { +func listenForCPUChanges(ctx context.Context, logger *zap.Logger, port int32, cgroupPath string, wg *sync.WaitGroup) { defer wg.Done() mux := http.NewServeMux() + loggerHandlers := logger.Named("http-handlers") + cpuChangeLogger := loggerHandlers.Named("cpu_change") mux.HandleFunc("/cpu_change", func(w http.ResponseWriter, r *http.Request) { - handleCPUChange(w, r, cgroupPath) + handleCPUChange(cpuChangeLogger, w, r, cgroupPath) }) + cpuCurrentLogger := loggerHandlers.Named("cpu_current") mux.HandleFunc("/cpu_current", func(w http.ResponseWriter, r *http.Request) { - handleCPUCurrent(w, r, cgroupPath) + handleCPUCurrent(cpuCurrentLogger, w, r, cgroupPath) }) server := http.Server{ Addr: fmt.Sprintf("0.0.0.0:%d", port), @@ -684,23 +737,141 @@ func listenForCPUChanges(ctx context.Context, port int32, cgroupPath string, wg select { case err := <-errChan: if errors.Is(err, http.ErrServerClosed) { - log.Println("cpu_change server closed") + logger.Info("cpu_change server closed") } else if err != nil { - log.Fatalf("error starting server: %s\n", err) + logger.Fatal("cpu_change exited with error", zap.Error(err)) } case <-ctx.Done(): err := server.Shutdown(context.Background()) - log.Printf("shut down cpu_change server: %v", err) + logger.Info("shut down cpu_change server", zap.Error(err)) + } +} + +func getSelfCgroupPath(logger *zap.Logger) (string, error) { + // There's some fun stuff here. For general information, refer to `man 7 cgroups` - specifically + // the section titled "/proc files" - for "/proc/cgroups" and "/proc/pid/cgroup". + // + // In general, the idea is this: If we start QEMU outside of the cgroup for the container we're + // running in, we run into multiple problems - it won't show up in metrics, and we'll have to + // clean up the cgroup ourselves. (not good!). + // + // So we'd like to start it in the same cgroup - the question is just how to find the name of + // the cgroup we're running in. Thankfully, this is visible in `/proc/self/cgroup`! + // The only difficulty is the file format. + // + // In cgroup v1 (which is what we have on EKS [as of 2023-07]), the contents of + // /proc//cgroup tend to look like: + // + // 11:cpuset:/path/to/cgroup + // 10:perf_event:/path/to/cgroup + // 9:hugetlb:/path/to/cgroup + // 8:blkio:/path/to/cgroup + // 7:pids:/path/to/cgroup + // 6:freezer:/path/to/cgroup + // 5:memory:/path/to/cgroup + // 4:net_cls,net_prio:/path/to/cgroup + // 3:cpu,cpuacct:/path/to/cgroup + // 2:devices:/path/to/cgroup + // 1:name=systemd:/path/to/cgroup + // + // For cgroup v2, we have: + // + // 0::/path/to/cgroup + // + // The file format is defined to have 3 fields, separated by colons. The first field gives the + // Hierarchy ID, which is guaranteed to be 0 if the cgroup is part of a cgroup v2 ("unified") + // hierarchy. + // The second field is a comma-separated list of the controllers. Or, if it's cgroup v2, nothing. + // The third field is the "pathname" of the cgroup *in its hierarchy*, relative to the mount + // point of the hierarchy. + // + // So we're looking for EITHER: + // 1. an entry like ':,cpu,:/path/to/cgroup (cgroup v1); OR + // 2. an entry like '0::/path/to/cgroup', and we'll return the path (cgroup v2) + // We primarily care about the 'cpu' controller, so for cgroup v1, we'll search for that instead + // of e.g. "name=systemd", although it *really* shouldn't matter because the paths will be the + // same anyways. + // + // Now: Technically it's possible to run a "hybrid" system with both cgroup v1 and v2 + // hierarchies. If this is the case, it's possible for /proc/self/cgroup to show *some* v1 + // hierarchies attached, in addition to the v2 "unified" hierarchy, for the same cgroup. To + // handle this, we should look for a cgroup v1 "cpu" controller, and if we can't find it, try + // for the cgroup v2 unified entry. + // + // As far as I (@sharnoff) can tell, the only case where that might actually get messed up is if + // the CPU controller isn't available for the cgroup we're running in, in which case there's + // nothing we can do about it! (other than e.g. using a cgroup higher up the chain, which would + // be really bad tbh). + + // --- + // On to the show! + + procSelfCgroupContents, err := os.ReadFile("/proc/self/cgroup") + if err != nil { + return "", fmt.Errorf("failed to read /proc/self/cgroup: %w", err) + } + logger.Info("Read /proc/self/cgroup", zap.String("contents", string(procSelfCgroupContents))) + + // Collect all candidate paths from the lines of the file. If there isn't exactly one, + // something's wrong and we should make an error. + var v1Candidates []string + var v2Candidates []string + for lineno, line := range strings.Split(string(procSelfCgroupContents), "\n") { + if line == "" { + continue + } + + // Split into the three ':'-delimited fields + fields := strings.Split(line, ":") + if len(fields) != 3 { + return "", fmt.Errorf("line %d of /proc/self/cgroup did not have 3 colon-delimited fields", lineno+1) + } + + id := fields[0] + controllers := fields[1] + path := fields[2] + if id == "0" { + v2Candidates = append(v2Candidates, path) + continue + } + + // It's not cgroup v2, otherwise id would have been 0. So, check if the comma-separated list + // of controllers contains 'cpu' as an entry. + for _, c := range strings.Split(controllers, ",") { + if c == "cpu" { + v1Candidates = append(v1Candidates, path) + break // ... and then continue to the next loop iteration + } + } + } + + var errMsg string + + // Check v1, then v2 + if len(v1Candidates) == 1 { + return v1Candidates[0], nil + } else if len(v1Candidates) != 0 { + errMsg = "More than one applicable cgroup v1 entry in /proc/self/cgroup" + } else if len(v2Candidates) == 1 { + return v2Candidates[0], nil + } else if len(v2Candidates) != 0 { + errMsg = "More than one applicable cgroup v2 entry in /proc/self/cgroup" + } else { + errMsg = "Couldn't find applicable entry in /proc/self/cgroup" } + + return "", errors.New(errMsg) } -func setCgroupLimit(r vmv1.MilliCPU, cgroupPath string) error { +func setCgroupLimit(logger *zap.Logger, r vmv1.MilliCPU, cgroupPath string) error { + r *= cpuLimitOvercommitFactor + isV2 := cgroups.Mode() == cgroups.Unified period := cgroupPeriod // quota may be greater than period if the cgroup is allowed // to use more than 100% of a CPU. quota := int64(float64(r) / float64(1000) * float64(cgroupPeriod)) - log.Printf("setting cgroup to %v %v\n", quota, period) + logger.Info(fmt.Sprintf("setting cgroup CPU limit %v %v", quota, period)) if isV2 { resources := cgroup2.Resources{ CPU: &cgroup2.CPU{ @@ -726,23 +897,6 @@ func setCgroupLimit(r vmv1.MilliCPU, cgroupPath string) error { return nil } -func cleanupCgroup(cgroupPath string) error { - isV2 := cgroups.Mode() == cgroups.Unified - if isV2 { - control, err := cgroup2.Load(cgroupPath) - if err != nil { - return err - } - return control.Delete() - } else { - control, err := cgroup1.Load(cgroup1.StaticPath(cgroupPath)) - if err != nil { - return err - } - return control.Delete() - } -} - func getCgroupQuota(cgroupPath string) (*vmv1.MilliCPU, error) { isV2 := cgroups.Mode() == cgroups.Unified var path string @@ -765,6 +919,7 @@ func getCgroupQuota(cgroupPath string) (*vmv1.MilliCPU, error) { return nil, err } cpu := vmv1.MilliCPU(uint32(quota * 1000 / cgroupPeriod)) + cpu /= cpuLimitOvercommitFactor return &cpu, nil } @@ -793,9 +948,11 @@ func processCPUs(cpus vmv1.CPUs) QemuCPUs { } } -func terminateQemuOnSigterm(ctx context.Context, qmpPort int32, wg *sync.WaitGroup) { +func terminateQemuOnSigterm(ctx context.Context, logger *zap.Logger, wg *sync.WaitGroup) { + logger = logger.Named("terminate-qemu-on-sigterm") + defer wg.Done() - log.Println("watching OS signals") + logger.Info("watching OS signals") c := make(chan os.Signal, 1) // we need to reserve to buffer size 1, so the notifier are not blocked signal.Notify(c, os.Interrupt, syscall.SIGTERM) select { @@ -803,16 +960,16 @@ func terminateQemuOnSigterm(ctx context.Context, qmpPort int32, wg *sync.WaitGro case <-ctx.Done(): } - log.Println("got signal, sending powerdown command to QEMU") + logger.Info("got signal, sending powerdown command to QEMU") - mon, err := qmp.NewSocketMonitor("tcp", fmt.Sprintf("127.0.0.1:%d", qmpPort), 2*time.Second) + mon, err := qmp.NewSocketMonitor("unix", qmpUnixSocketForSigtermHandler, 2*time.Second) if err != nil { - log.Println(err) + logger.Error("failed to connect to QEMU monitor", zap.Error(err)) return } if err := mon.Connect(); err != nil { - log.Println(err) + logger.Error("failed to start monitor connection", zap.Error(err)) return } defer mon.Disconnect() @@ -820,11 +977,11 @@ func terminateQemuOnSigterm(ctx context.Context, qmpPort int32, wg *sync.WaitGro qmpcmd := []byte(`{"execute": "system_powerdown"}`) _, err = mon.Run(qmpcmd) if err != nil { - log.Println(err) + logger.Error("failed to execute system_powerdown command", zap.Error(err)) return } - log.Println("system_powerdown command sent to QEMU") + logger.Info("system_powerdown command sent to QEMU") return } @@ -867,25 +1024,28 @@ func execFg(name string, arg ...string) error { return nil } -func defaultNetwork(cidr string, ports []vmv1.Port) (mac.MAC, error) { +func defaultNetwork(logger *zap.Logger, cidr string, ports []vmv1.Port) (mac.MAC, error) { // gerenare random MAC for default Guest interface mac, err := mac.GenerateRandMAC() if err != nil { + logger.Fatal("could not generate random MAC", zap.Error(err)) return nil, err } // create an configure linux bridge - log.Printf("setup bridge interface %s", defaultNetworkBridgeName) + logger.Info("setup bridge interface", zap.String("name", defaultNetworkBridgeName)) bridge := &netlink.Bridge{ LinkAttrs: netlink.LinkAttrs{ Name: defaultNetworkBridgeName, }, } if err := netlink.LinkAdd(bridge); err != nil { + logger.Fatal("could not create bridge interface", zap.Error(err)) return nil, err } ipPod, ipVm, mask, err := calcIPs(cidr) if err != nil { + logger.Fatal("could not parse IP", zap.Error(err)) return nil, err } bridgeAddr := &netlink.Addr{ @@ -895,15 +1055,17 @@ func defaultNetwork(cidr string, ports []vmv1.Port) (mac.MAC, error) { }, } if err := netlink.AddrAdd(bridge, bridgeAddr); err != nil { + logger.Fatal("could not parse IP", zap.Error(err)) return nil, err } if err := netlink.LinkSetUp(bridge); err != nil { + logger.Fatal("could not set up bridge", zap.Error(err)) return nil, err } // create an configure TAP interface if !checkDevTun() { - log.Printf("create /dev/net/tun") + logger.Info("create /dev/net/tun") if err := execFg("mkdir", "-p", "/dev/net"); err != nil { return nil, err } @@ -915,7 +1077,7 @@ func defaultNetwork(cidr string, ports []vmv1.Port) (mac.MAC, error) { } } - log.Printf("setup tap interface %s", defaultNetworkTapName) + logger.Info("setup tap interface", zap.String("name", defaultNetworkTapName)) tap := &netlink.Tuntap{ LinkAttrs: netlink.LinkAttrs{ Name: defaultNetworkTapName, @@ -924,30 +1086,35 @@ func defaultNetwork(cidr string, ports []vmv1.Port) (mac.MAC, error) { Flags: netlink.TUNTAP_DEFAULTS, } if err := netlink.LinkAdd(tap); err != nil { + logger.Error("could not add tap device", zap.Error(err)) return nil, err } if err := netlink.LinkSetMaster(tap, bridge); err != nil { + logger.Error("could not set up tap as master", zap.Error(err)) return nil, err } if err := netlink.LinkSetUp(tap); err != nil { + logger.Error("could not set up tap device", zap.Error(err)) return nil, err } // setup masquerading outgoing (from VM) traffic - log.Println("setup masquerading for outgoing traffic") + logger.Info("setup masquerading for outgoing traffic") if err := execFg("iptables", "-t", "nat", "-A", "POSTROUTING", "-o", "eth0", "-j", "MASQUERADE"); err != nil { + logger.Error("could not setup masquerading for outgoing traffic", zap.Error(err)) return nil, err } // pass incoming traffic to .Guest.Spec.Ports into VM for _, port := range ports { - log.Printf("setup DNAT for incoming traffic, port %d", port.Port) + logger.Info(fmt.Sprintf("setup DNAT for incoming traffic, port %d", port.Port)) iptablesArgs := []string{ "-t", "nat", "-A", "PREROUTING", "-i", "eth0", "-p", fmt.Sprint(port.Protocol), "--dport", fmt.Sprint(port.Port), "-j", "DNAT", "--to", fmt.Sprintf("%s:%d", ipVm.String(), port.Port), } if err := execFg("iptables", iptablesArgs...); err != nil { + logger.Error("could not set up DNAT for incoming traffic", zap.Error(err)) return nil, err } } @@ -955,13 +1122,14 @@ func defaultNetwork(cidr string, ports []vmv1.Port) (mac.MAC, error) { // get dns details from /etc/resolv.conf resolvConf, err := getResolvConf() if err != nil { + logger.Error("could not get DNS details", zap.Error(err)) return nil, err } dns := getNameservers(resolvConf.Content, types.IP)[0] dnsSearch := strings.Join(getSearchDomains(resolvConf.Content), ",") // prepare dnsmask command line (instead of config file) - log.Printf("run dnsmqsq for interface %s", defaultNetworkBridgeName) + logger.Info("run dnsmasq for interface", zap.String("name", defaultNetworkBridgeName)) dnsMaskCmd := []string{ "--port=0", "--bind-interfaces", @@ -977,6 +1145,7 @@ func defaultNetwork(cidr string, ports []vmv1.Port) (mac.MAC, error) { // run dnsmasq for default Guest interface if err := execFg("dnsmasq", dnsMaskCmd...); err != nil { + logger.Error("could not run dnsmasq", zap.Error(err)) return nil, err } diff --git a/neonvm/tools/vm-builder-generic/main.go b/neonvm/tools/vm-builder-generic/main.go index d7110d668..a13f1066c 100644 --- a/neonvm/tools/vm-builder-generic/main.go +++ b/neonvm/tools/vm-builder-generic/main.go @@ -44,12 +44,14 @@ RUN set -e \ su-exec \ e2fsprogs-extra \ blkid \ + flock \ && mv /sbin/acpid /neonvm/bin/ \ && mv /sbin/udevd /neonvm/bin/ \ && mv /sbin/agetty /neonvm/bin/ \ && mv /sbin/su-exec /neonvm/bin/ \ && mv /usr/sbin/resize2fs /neonvm/bin/resize2fs \ && mv /sbin/blkid /neonvm/bin/blkid \ + && mv /usr/bin/flock /neonvm/bin/flock \ && mkdir -p /neonvm/lib \ && cp -f /lib/ld-musl-x86_64.so.1 /neonvm/lib/ \ && cp -f /lib/libblkid.so.1.1.0 /neonvm/lib/libblkid.so.1 \ @@ -77,9 +79,9 @@ RUN set -e \ ADD inittab /neonvm/bin/inittab ADD vminit /neonvm/bin/vminit ADD vmstart /neonvm/bin/vmstart +ADD vmshutdown /neonvm/bin/vmshutdown ADD vmacpi /neonvm/acpi/vmacpi -ADD powerdown /neonvm/bin/powerdown -RUN chmod +rx /neonvm/bin/vminit /neonvm/bin/vmstart /neonvm/bin/powerdown +RUN chmod +rx /neonvm/bin/vminit /neonvm/bin/vmstart /neonvm/bin/vmshutdown FROM vm-runtime AS builder ARG DISK_SIZE @@ -143,26 +145,37 @@ fi /neonvm/bin/chmod +x /neonvm/bin/vmstarter.sh -/neonvm/bin/su-exec {{.User}} /neonvm/bin/sh /neonvm/bin/vmstarter.sh +/neonvm/bin/flock -o /neonvm/vmstart.lock -c 'test -e /neonvm/vmstart.allowed && /neonvm/bin/su-exec {{.User}} /neonvm/bin/sh /neonvm/bin/vmstarter.sh' ` scriptInitTab = ` ::sysinit:/neonvm/bin/vminit +::once:/neonvm/bin/touch /neonvm/vmstart.allowed ::respawn:/neonvm/bin/udhcpc -t 1 -T 1 -A 1 -f -i eth0 -O 121 -O 119 -s /neonvm/bin/udhcpc.script ::respawn:/neonvm/bin/udevd ::respawn:/neonvm/bin/acpid -f -c /neonvm/acpi ::respawn:/neonvm/bin/vmstart ttyS0::respawn:/neonvm/bin/agetty --8bits --local-line --noissue --noclear --noreset --host console --login-program /neonvm/bin/login --login-pause --autologin root 115200 ttyS0 linux +::shutdown:/neonvm/bin/vmshutdown ` scriptVmAcpi = ` event=button/power -action=/neonvm/bin/powerdown +action=/neonvm/bin/poweroff ` - scriptPowerDown = `#!/neonvm/bin/sh - -/neonvm/bin/poweroff + scriptVmShutdown = `#!/neonvm/bin/sh +rm /neonvm/vmstart.allowed +if [ -e /neonvm/vmstart.allowed ]; then + echo "Error: could not remove vmstart.allowed marker, might hang indefinitely during shutdown" 1>&2 +fi +# we inhibited new command starts, but there may still be a command running +while ! /neonvm/bin/flock -n /neonvm/vmstart.lock true; do + # TODO: should be sufficient to keep track of the vmstarter.sh pid and signal it. + echo "Warning: no generic mechanism to signal graceful shutdown request to vmstarter.sh" 1>&2 + exit 2 +done +echo "vmstart workload shut down cleanly" 1>&2 ` scriptVmInit = `#!/neonvm/bin/sh @@ -187,6 +200,18 @@ chmod 1777 /dev/shm mount -t proc proc /proc mount -t sysfs sysfs /sys mount -t cgroup2 cgroup2 /sys/fs/cgroup + +# Allow all users to move processes to/from the root cgroup. +# +# This is required in order to be able to 'cgexec' anything, if the entrypoint is not being run as +# root, because moving tasks betweeen one cgroup and another *requires write access to the +# cgroup.procs file of the common ancestor*, and because the entrypoint isn't already in a cgroup, +# any new tasks are automatically placed in the top-level cgroup. +# +# This *would* be bad for security, if we relied on cgroups for security; but instead because they +# are just used for cooperative signaling, this should be mostly ok. +chmod go+w /sys/fs/cgroup/cgroup.procs + mount -t devpts -o noexec,nosuid devpts /dev/pts mount -t tmpfs -o noexec,nosuid,nodev shm-tmpfs /dev/shm @@ -366,9 +391,9 @@ func main() { }{ {"Dockerfile", dockerfileVmBuilder}, {"vmstart", scriptVmStart}, + {"vmshutdown", scriptVmShutdown}, {"inittab", scriptInitTab}, {"vmacpi", scriptVmAcpi}, - {"powerdown", scriptPowerDown}, {"vminit", scriptVmInit}, } diff --git a/neonvm/tools/vm-builder/main.go b/neonvm/tools/vm-builder/main.go index 3f1045202..58ef05a3e 100644 --- a/neonvm/tools/vm-builder/main.go +++ b/neonvm/tools/vm-builder/main.go @@ -22,16 +22,14 @@ import ( // vm-builder --src alpine:3.16 --dst vm-alpine:dev --file vm-alpine.qcow2 -var entrypointPrefix = []string{"/usr/bin/cgexec", "-g", "memory:neon-postgres"} - const ( dockerfileVmBuilder = ` -FROM {{.InformantImage}} as informant +FROM {{.MonitorImage}} as monitor # Build cgroup-tools # # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically -# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-informant +# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor # requires cgroup v2, so we'll build cgroup-tools ourselves. FROM debian:bullseye-slim as libcgroup-builder ENV LIBCGROUP_VERSION v2.0.3 @@ -89,7 +87,7 @@ RUN set -e \ FROM {{.RootDiskImage}} AS rootdisk USER root -RUN adduser --system --disabled-login --no-create-home --home /nonexistent --gecos "informant user" --shell /bin/false vm-informant +RUN adduser --system --disabled-login --no-create-home --home /nonexistent --gecos "monitor user" --shell /bin/false vm-monitor # tweak nofile limits RUN set -e \ @@ -108,7 +106,7 @@ RUN set -e \ USER postgres -COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant +COPY --from=monitor /usr/bin/vm-monitor /usr/local/bin/vm-monitor COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/ COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/ @@ -133,12 +131,14 @@ RUN set -e \ su-exec \ e2fsprogs-extra \ blkid \ + flock \ && mv /sbin/acpid /neonvm/bin/ \ && mv /sbin/udevd /neonvm/bin/ \ && mv /sbin/agetty /neonvm/bin/ \ && mv /sbin/su-exec /neonvm/bin/ \ && mv /usr/sbin/resize2fs /neonvm/bin/resize2fs \ && mv /sbin/blkid /neonvm/bin/blkid \ + && mv /usr/bin/flock /neonvm/bin/flock \ && mkdir -p /neonvm/lib \ && cp -f /lib/ld-musl-x86_64.so.1 /neonvm/lib/ \ && cp -f /lib/libblkid.so.1.1.0 /neonvm/lib/libblkid.so.1 \ @@ -171,10 +171,10 @@ RUN set -e \ ADD inittab /neonvm/bin/inittab ADD vminit /neonvm/bin/vminit ADD vmstart /neonvm/bin/vmstart +ADD vmshutdown /neonvm/bin/vmshutdown ADD vmacpi /neonvm/acpi/vmacpi ADD vector.yaml /neonvm/config/vector.yaml -ADD powerdown /neonvm/bin/powerdown -RUN chmod +rx /neonvm/bin/vminit /neonvm/bin/vmstart /neonvm/bin/powerdown +RUN chmod +rx /neonvm/bin/vminit /neonvm/bin/vmstart /neonvm/bin/vmshutdown FROM vm-runtime AS builder ARG DISK_SIZE @@ -235,32 +235,42 @@ fi /neonvm/bin/chmod +x /neonvm/bin/vmstarter.sh -/neonvm/bin/su-exec {{.User}} /neonvm/bin/sh /neonvm/bin/vmstarter.sh +/neonvm/bin/flock -o /neonvm/vmstart.lock -c 'test -e /neonvm/vmstart.allowed && /neonvm/bin/su-exec {{.User}} /neonvm/bin/sh /neonvm/bin/vmstarter.sh' ` scriptInitTab = ` ::sysinit:/neonvm/bin/vminit ::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664 +::once:/neonvm/bin/touch /neonvm/vmstart.allowed ::respawn:/neonvm/bin/udhcpc -t 1 -T 1 -A 1 -f -i eth0 -O 121 -O 119 -s /neonvm/bin/udhcpc.script ::respawn:/neonvm/bin/udevd ::respawn:/neonvm/bin/acpid -f -c /neonvm/acpi ::respawn:/neonvm/bin/vector -c /neonvm/config/vector.yaml --config-dir /etc/vector ::respawn:/neonvm/bin/vmstart -::respawn:su -p vm-informant -c '/usr/local/bin/vm-informant --auto-restart --cgroup=neon-postgres{{if .FileCache}} --pgconnstr="dbname=postgres user=cloud_admin sslmode=disable"{{end}}' +{{if .EnableMonitor}} +::respawn:su -p vm-monitor -c 'RUST_LOG=info /usr/local/bin/vm-monitor --addr "0.0.0.0:10301" --cgroup=neon-postgres{{if .FileCache}} --pgconnstr="host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable"{{end}}' +{{end}} ::respawn:su -p nobody -c '/usr/local/bin/pgbouncer /etc/pgbouncer.ini' ::respawn:su -p nobody -c 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres" /bin/postgres_exporter --auto-discover-databases --exclude-databases=template0,template1' ttyS0::respawn:/neonvm/bin/agetty --8bits --local-line --noissue --noclear --noreset --host console --login-program /neonvm/bin/login --login-pause --autologin root 115200 ttyS0 linux +::shutdown:/neonvm/bin/vmshutdown ` scriptVmAcpi = ` event=button/power -action=/neonvm/bin/powerdown +action=/neonvm/bin/poweroff ` - scriptPowerDown = `#!/neonvm/bin/sh - -su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10' -/neonvm/bin/poweroff + scriptVmShutdown = `#!/neonvm/bin/sh +rm /neonvm/vmstart.allowed +if [ -e /neonvm/vmstart.allowed ]; then + echo "Error: could not remove vmstart.allowed marker, might hang indefinitely during shutdown" 1>&2 +fi +# we inhibited new command starts, but there may still be a command running +while ! /neonvm/bin/flock -n /neonvm/vmstart.lock true; do + su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10' +done +echo "vmstart workload shut down cleanly" 1>&2 ` scriptVmInit = `#!/neonvm/bin/sh @@ -285,6 +295,18 @@ chmod 1777 /dev/shm mount -t proc proc /proc mount -t sysfs sysfs /sys mount -t cgroup2 cgroup2 /sys/fs/cgroup + +# Allow all users to move processes to/from the root cgroup. +# +# This is required in order to be able to 'cgexec' anything, if the entrypoint is not being run as +# root, because moving tasks betweeen one cgroup and another *requires write access to the +# cgroup.procs file of the common ancestor*, and because the entrypoint isn't already in a cgroup, +# any new tasks are automatically placed in the top-level cgroup. +# +# This *would* be bad for security, if we relied on cgroups for security; but instead because they +# are just used for cooperative signaling, this should be mostly ok. +chmod go+w /sys/fs/cgroup/cgroup.procs + mount -t devpts -o noexec,nosuid devpts /dev/pts mount -t tmpfs -o noexec,nosuid,nodev shm-tmpfs /dev/shm @@ -329,7 +351,7 @@ sinks: group neon-postgres { perm { admin { - uid = vm-informant; + uid = {{.CgroupUID}}; } task { gid = users; @@ -357,18 +379,20 @@ default_pool_size=16 ) var ( - Version string - VMInformant string - - srcImage = flag.String("src", "", `Docker image used as source for virtual machine disk image: --src=alpine:3.16`) - dstImage = flag.String("dst", "", `Docker image with resulting disk image: --dst=vm-alpine:3.16`) - size = flag.String("size", "1G", `Size for disk image: --size=1G`) - outFile = flag.String("file", "", `Save disk image as file: --file=vm-alpine.qcow2`) - quiet = flag.Bool("quiet", false, `Show less output from the docker build process`) - forcePull = flag.Bool("pull", false, `Pull src image even if already present locally`) - informant = flag.String("informant", VMInformant, `vm-informant docker image`) - fileCache = flag.Bool("enable-file-cache", false, `enables the vm-informant's file cache integration`) - version = flag.Bool("version", false, `Print vm-builder version`) + Version string + VMMonitor string + + srcImage = flag.String("src", "", `Docker image used as source for virtual machine disk image: --src=alpine:3.16`) + dstImage = flag.String("dst", "", `Docker image with resulting disk image: --dst=vm-alpine:3.16`) + size = flag.String("size", "1G", `Size for disk image: --size=1G`) + outFile = flag.String("file", "", `Save disk image as file: --file=vm-alpine.qcow2`) + quiet = flag.Bool("quiet", false, `Show less output from the docker build process`) + forcePull = flag.Bool("pull", false, `Pull src image even if already present locally`) + monitor = flag.String("monitor", VMMonitor, `vm-monitor docker image`) + enableMonitor = flag.Bool("enable-monitor", false, `start the vm-monitor during VM startup`) + fileCache = flag.Bool("enable-file-cache", false, `enables the vm-monitor's file cache integration`) + cgroupUID = flag.String("cgroup-uid", "vm-monitor", `specifies the user that owns the neon-postgres cgroup`) + version = flag.Bool("version", false, `Print vm-builder version`) ) type dockerMessage struct { @@ -419,13 +443,15 @@ func AddTemplatedFileToTar(tw *tar.Writer, tmplArgs any, filename string, tmplSt } type TemplatesContext struct { - User string - Entrypoint []string - Cmd []string - Env []string - RootDiskImage string - InformantImage string - FileCache bool + User string + Entrypoint []string + Cmd []string + Env []string + RootDiskImage string + MonitorImage string + FileCache bool + EnableMonitor bool + CgroupUID string } func main() { @@ -507,12 +533,14 @@ func main() { } tmplArgs := TemplatesContext{ - Entrypoint: append(entrypointPrefix, imageSpec.Config.Entrypoint...), - Cmd: imageSpec.Config.Cmd, - Env: imageSpec.Config.Env, - RootDiskImage: *srcImage, - InformantImage: *informant, - FileCache: *fileCache, + Entrypoint: imageSpec.Config.Entrypoint, + Cmd: imageSpec.Config.Cmd, + Env: imageSpec.Config.Env, + RootDiskImage: *srcImage, + MonitorImage: *monitor, + FileCache: *fileCache, + EnableMonitor: *enableMonitor, + CgroupUID: *cgroupUID, } if len(imageSpec.Config.User) != 0 { @@ -531,9 +559,9 @@ func main() { }{ {"Dockerfile", dockerfileVmBuilder}, {"vmstart", scriptVmStart}, + {"vmshutdown", scriptVmShutdown}, {"inittab", scriptInitTab}, {"vmacpi", scriptVmAcpi}, - {"powerdown", scriptPowerDown}, {"vminit", scriptVmInit}, {"cgconfig.conf", configCgroup}, {"vector.yaml", configVector}, diff --git a/pkg/agent/billing/billing.go b/pkg/agent/billing/billing.go index 7243c77b4..6a72476a0 100644 --- a/pkg/agent/billing/billing.go +++ b/pkg/agent/billing/billing.go @@ -19,12 +19,14 @@ import ( ) type Config struct { - URL string `json:"url"` - CPUMetricName string `json:"cpuMetricName"` - ActiveTimeMetricName string `json:"activeTimeMetricName"` - CollectEverySeconds uint `json:"collectEverySeconds"` - PushEverySeconds uint `json:"pushEverySeconds"` - PushTimeoutSeconds uint `json:"pushTimeoutSeconds"` + URL string `json:"url"` + CPUMetricName string `json:"cpuMetricName"` + ActiveTimeMetricName string `json:"activeTimeMetricName"` + CollectEverySeconds uint `json:"collectEverySeconds"` + AccumulateEverySeconds uint `json:"accumulateEverySeconds"` + PushEverySeconds uint `json:"pushEverySeconds"` + PushRequestTimeoutSeconds uint `json:"pushRequestTimeoutSeconds"` + MaxBatchSize uint `json:"maxBatchSize"` } type metricsState struct { @@ -85,8 +87,8 @@ func RunBillingMetricsCollector( defer collectTicker.Stop() // Offset by half a second, so it's a bit more deterministic. time.Sleep(500 * time.Millisecond) - pushTicker := time.NewTicker(time.Second * time.Duration(conf.PushEverySeconds)) - defer pushTicker.Stop() + accumulateTicker := time.NewTicker(time.Second * time.Duration(conf.AccumulateEverySeconds)) + defer accumulateTicker.Stop() state := metricsState{ historical: make(map[metricsKey]vmMetricsHistory), @@ -95,8 +97,25 @@ func RunBillingMetricsCollector( pushWindowStart: time.Now(), } - state.collect(conf, store, metrics) - batch := client.NewBatch() + queueWriter, queueReader := newEventQueue[*billing.IncrementalEvent](metrics.queueSizeCurrent) + + // Start the sender + signalDone, thisThreadFinished := util.NewCondChannelPair() + defer signalDone.Send() + sender := eventSender{ + client: client, + config: conf, + metrics: metrics, + queue: queueReader, + collectorFinished: thisThreadFinished, + lastSendDuration: 0, + } + go sender.senderLoop(logger.Named("send")) + + // The rest of this function is to do with collection + logger = logger.Named("collect") + + state.collect(logger, conf, store, metrics) for { select { @@ -106,40 +125,17 @@ func RunBillingMetricsCollector( err := errors.New("VM store stopped but background context is still live") logger.Panic("Validation check failed", zap.Error(err)) } - state.collect(conf, store, metrics) - case <-pushTicker.C: + state.collect(logger, conf, store, metrics) + case <-accumulateTicker.C: logger.Info("Creating billing batch") - state.drainAppendToBatch(logger, conf, batch) - metrics.batchSizeCurrent.Set(float64(batch.Count())) - logger.Info("Pushing billing events", zap.Int("count", batch.Count())) - _ = logger.Sync() // Sync before making the network request, so we guarantee logs for the action - if err := pushBillingEvents(conf, batch); err != nil { - metrics.sendErrorsTotal.Inc() - logger.Error("Failed to push billing events", zap.Error(err)) - continue - } - // Sending was successful; clear the batch. - // - // Don't reset metrics.batchSizeCurrent because it stores the *most recent* batch size. - // (The "current" suffix refers to the fact the metric is a gague, not a counter) - batch = client.NewBatch() + state.drainEnqueue(logger, conf, client.Hostname(), queueWriter) case <-backgroundCtx.Done(): - // If we're being shut down, push the latests events we have before returning. - logger.Info("Creating final billing batch") - state.drainAppendToBatch(logger, conf, batch) - metrics.batchSizeCurrent.Set(float64(batch.Count())) - logger.Info("Pushing final billing events", zap.Int("count", batch.Count())) - _ = logger.Sync() // Sync before making the network request, so we guarantee logs for the action - if err := pushBillingEvents(conf, batch); err != nil { - metrics.sendErrorsTotal.Inc() - logger.Error("Failed to push billing events", zap.Error(err)) - } return } } } -func (s *metricsState) collect(conf *Config, store VMStoreForNode, metrics PromMetrics) { +func (s *metricsState) collect(logger *zap.Logger, conf *Config, store VMStoreForNode, metrics PromMetrics) { now := time.Now() metricsBatch := metrics.forBatch() @@ -147,9 +143,14 @@ func (s *metricsState) collect(conf *Config, store VMStoreForNode, metrics PromM old := s.present s.present = make(map[metricsKey]vmMetricsInstant) - vmsOnThisNode := store.ListIndexed(func(i *VMNodeIndex) []*vmapi.VirtualMachine { - return i.List() - }) + var vmsOnThisNode []*vmapi.VirtualMachine + if store.Failing() { + logger.Error("VM store is currently stopped. No events will be recorded") + } else { + vmsOnThisNode = store.ListIndexed(func(i *VMNodeIndex) []*vmapi.VirtualMachine { + return i.List() + }) + } for _, vm := range vmsOnThisNode { endpointID, isEndpoint := vm.Labels[EndpointLabel] metricsBatch.inc(isEndpointFlag(isEndpoint), autoscalingEnabledFlag(api.HasAutoscalingEnabled(vm)), vm.Status.Phase) @@ -248,9 +249,10 @@ func (s *metricsTimeSlice) tryMerge(next metricsTimeSlice) bool { return merged } -func logAddedEvent(logger *zap.Logger, event billing.IncrementalEvent) billing.IncrementalEvent { +func logAddedEvent(logger *zap.Logger, event *billing.IncrementalEvent) *billing.IncrementalEvent { logger.Info( "Adding event to batch", + zap.String("IdempotencyKey", event.IdempotencyKey), zap.String("EndpointID", event.EndpointID), zap.String("MetricName", event.MetricName), zap.Int("Value", event.Value), @@ -258,42 +260,35 @@ func logAddedEvent(logger *zap.Logger, event billing.IncrementalEvent) billing.I return event } -// drainAppendToBatch clears the current history, adding it as events to the batch -func (s *metricsState) drainAppendToBatch(logger *zap.Logger, conf *Config, batch *billing.Batch) { +// drainEnqueue clears the current history, adding it as events to the queue +func (s *metricsState) drainEnqueue(logger *zap.Logger, conf *Config, hostname string, queue eventQueuePusher[*billing.IncrementalEvent]) { now := time.Now() for key, history := range s.historical { history.finalizeCurrentTimeSlice() - batch.AddIncrementalEvent(logAddedEvent(logger, billing.IncrementalEvent{ + queue.enqueue(logAddedEvent(logger, billing.Enrich(hostname, &billing.IncrementalEvent{ MetricName: conf.CPUMetricName, - Type: "", // set in batch method - IdempotencyKey: "", // set in batch method + Type: "", // set by billing.Enrich + IdempotencyKey: "", // set by billing.Enrich EndpointID: key.endpointID, // TODO: maybe we should store start/stop time in the vmMetricsHistory object itself? // That way we can be aligned to collection, rather than pushing. StartTime: s.pushWindowStart, StopTime: now, Value: int(math.Round(history.total.cpu)), - })) - batch.AddIncrementalEvent(logAddedEvent(logger, billing.IncrementalEvent{ + }))) + queue.enqueue(logAddedEvent(logger, billing.Enrich(hostname, &billing.IncrementalEvent{ MetricName: conf.ActiveTimeMetricName, - Type: "", // set in batch method - IdempotencyKey: "", // set in batch method + Type: "", // set by billing.Enrich + IdempotencyKey: "", // set by billing.Enrich EndpointID: key.endpointID, StartTime: s.pushWindowStart, StopTime: now, Value: int(math.Round(history.total.activeTime.Seconds())), - })) + }))) } s.pushWindowStart = now s.historical = make(map[metricsKey]vmMetricsHistory) } - -func pushBillingEvents(conf *Config, batch *billing.Batch) error { - ctx, cancel := context.WithTimeout(context.TODO(), time.Second*time.Duration(conf.PushTimeoutSeconds)) - defer cancel() - - return batch.Send(ctx) -} diff --git a/pkg/agent/billing/prommetrics.go b/pkg/agent/billing/prommetrics.go index be973ebca..9e7a3f503 100644 --- a/pkg/agent/billing/prommetrics.go +++ b/pkg/agent/billing/prommetrics.go @@ -13,7 +13,8 @@ import ( type PromMetrics struct { vmsProcessedTotal *prometheus.CounterVec vmsCurrent *prometheus.GaugeVec - batchSizeCurrent prometheus.Gauge + queueSizeCurrent prometheus.Gauge + lastSendDuration prometheus.Gauge sendErrorsTotal prometheus.Counter } @@ -33,10 +34,16 @@ func NewPromMetrics() PromMetrics { }, []string{"is_endpoint", "autoscaling_enabled", "phase"}, ), - batchSizeCurrent: prometheus.NewGauge( + queueSizeCurrent: prometheus.NewGauge( prometheus.GaugeOpts{ - Name: "autoscaling_agent_billing_batch_size", - Help: "Size of the billing subsystem's most recent batch", + Name: "autoscaling_agent_billing_queue_size", + Help: "Size of the billing subsystem's queue of unsent events", + }, + ), + lastSendDuration: prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "autoscaling_agent_billing_last_send_duration_seconds", + Help: "Duration, in seconds, that it took to send the latest set of billing events (or current time if ongoing)", }, ), sendErrorsTotal: prometheus.NewCounter( @@ -51,7 +58,7 @@ func NewPromMetrics() PromMetrics { func (m PromMetrics) MustRegister(reg *prometheus.Registry) { reg.MustRegister(m.vmsProcessedTotal) reg.MustRegister(m.vmsCurrent) - reg.MustRegister(m.batchSizeCurrent) + reg.MustRegister(m.queueSizeCurrent) reg.MustRegister(m.sendErrorsTotal) } diff --git a/pkg/agent/billing/queue.go b/pkg/agent/billing/queue.go new file mode 100644 index 000000000..1e3f5db00 --- /dev/null +++ b/pkg/agent/billing/queue.go @@ -0,0 +1,79 @@ +package billing + +// Implementation of the event queue for mediating event generation and event sending. +// +// The "public" (ish - it's all one package) types are eventQueuePuller and eventQueuePusher, two +// halves of the same queue. Each half is only safe for use from a single thread, but *together* +// they can be used in separate threads. + +import ( + "sync" + + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/exp/slices" + + "github.com/neondatabase/autoscaling/pkg/util" +) + +// this is generic just so there's less typing - "billing.IncrementalEvent" is long! +type eventQueueInternals[E any] struct { + mu sync.Mutex + items []E + sizeGauge prometheus.Gauge +} + +type eventQueuePuller[E any] struct { + internals *eventQueueInternals[E] +} + +type eventQueuePusher[E any] struct { + internals *eventQueueInternals[E] +} + +func newEventQueue[E any](sizeGauge prometheus.Gauge) (eventQueuePusher[E], eventQueuePuller[E]) { + internals := &eventQueueInternals[E]{ + mu: sync.Mutex{}, + items: nil, + sizeGauge: sizeGauge, + } + return eventQueuePusher[E]{internals}, eventQueuePuller[E]{internals} +} + +// NB: must hold mu +func (qi *eventQueueInternals[E]) updateGauge() { + qi.sizeGauge.Set(float64(len(qi.items))) +} + +func (q eventQueuePusher[E]) enqueue(events ...E) { + q.internals.mu.Lock() + defer q.internals.mu.Unlock() + + q.internals.items = append(q.internals.items, events...) + q.internals.updateGauge() +} + +func (q eventQueuePuller[E]) size() int { + q.internals.mu.Lock() + defer q.internals.mu.Unlock() + + return len(q.internals.items) +} + +func (q eventQueuePuller[E]) get(limit int) []E { + q.internals.mu.Lock() + defer q.internals.mu.Unlock() + + count := util.Min(limit, len(q.internals.items)) + // NOTE: this kind of access escaping the mutex is only sound because this access is only + // granted to the puller, and there's only one puller, and it isn't sound to use the output of a + // previous get() after calling drop(). + return q.internals.items[:count] +} + +func (q eventQueuePuller[E]) drop(count int) { + q.internals.mu.Lock() + defer q.internals.mu.Unlock() + + q.internals.items = slices.Replace(q.internals.items, 0, count) + q.internals.updateGauge() +} diff --git a/pkg/agent/billing/send.go b/pkg/agent/billing/send.go new file mode 100644 index 000000000..6a1751fe6 --- /dev/null +++ b/pkg/agent/billing/send.go @@ -0,0 +1,161 @@ +package billing + +// Logic responsible for sending billing events by repeatedly pulling from the eventQueue + +import ( + "context" + "time" + + "go.uber.org/zap" + + "github.com/neondatabase/autoscaling/pkg/billing" + "github.com/neondatabase/autoscaling/pkg/util" +) + +type eventSender struct { + client billing.Client + config *Config + metrics PromMetrics + queue eventQueuePuller[*billing.IncrementalEvent] + collectorFinished util.CondChannelReceiver + + // lastSendDuration tracks the "real" last full duration of (eventSender).sendAllCurrentEvents(). + // + // It's separate from metrics.lastSendDuration because (a) we'd like to include the duration of + // ongoing calls to sendAllCurrentEvents, but (b) we don't want the bias towards lower durations + // that comes with that. + // + // Here's some more detail: + // + // To make sure that long-running sendAllCurrentEvents() loops show up in the metrics while + // they're still running, we want to periodically update metrics.lastSendDuration before the + // loop has finished. A side-effect of doing this naively is that the gauge will sometimes + // return durations that are much shorter than the *actual* previous send loop duration. + // + // In order to fix this, we store that *actual* previous duration in this field, but and only + // update the metric when either (a) the loop is done, or (b) the duration so far is already + // longer than the previous one. + // + // This means that we remove the bias towards shorter durations, at the expense of sometimes + // returning higher durations for too long. IMO that's ok, and we'd rather have our metrics give + // a pessimistic but more accurate view. + lastSendDuration time.Duration +} + +func (s eventSender) senderLoop(logger *zap.Logger) { + ticker := time.NewTicker(time.Second * time.Duration(s.config.PushEverySeconds)) + defer ticker.Stop() + + for { + final := false + + select { + case <-s.collectorFinished.Recv(): + logger.Info("Received notification that collector finished") + final = true + case <-ticker.C: + } + + s.sendAllCurrentEvents(logger) + + if final { + logger.Info("Ending events sender loop") + return + } + } +} + +func (s eventSender) sendAllCurrentEvents(logger *zap.Logger) { + logger.Info("Pushing all available events") + + if s.queue.size() == 0 { + logger.Info("No billing events to push") + s.lastSendDuration = 0 + s.metrics.lastSendDuration.Set(1e-6) // small value, to indicate that nothing happened + return + } + + total := 0 + startTime := time.Now() + + // while there's still events in the queue, send them + // + // If events are being added to the queue faster than we can send them, this loop will not + // terminate. For the most part, that's ok: worst-case, we miss the collectorFinished + // notification, which isn't the end of the world. Any long-running call to this function will + // be reported by s.metrics.lastSendDuration as we go (provided the request timeout isn't too + // long). + for { + if size := s.queue.size(); size != 0 { + logger.Info("Current queue size is non-zero", zap.Int("queueSize", size)) + } + + chunk := s.queue.get(int(s.config.MaxBatchSize)) + count := len(chunk) + if count == 0 { + totalTime := time.Since(startTime) + s.lastSendDuration = totalTime + s.metrics.lastSendDuration.Set(totalTime.Seconds()) + + logger.Info( + "All available events have been sent", + zap.Int("total", total), + zap.Duration("totalTime", totalTime), + ) + return + } + + traceID := s.client.GenerateTraceID() + + logger.Info( + "Pushing billing events", + zap.String("traceID", string(traceID)), + zap.Int("count", count), + ) + + reqStart := time.Now() + err := func() error { + reqCtx, cancel := context.WithTimeout(context.TODO(), time.Second*time.Duration(s.config.PushRequestTimeoutSeconds)) + defer cancel() + + return billing.Send(reqCtx, s.client, traceID, chunk) + }() + reqDuration := time.Since(reqStart) + + if err != nil { + // Something went wrong and we're going to abandon attempting to push any further + // events. + logger.Error( + "Failed to push billing events", + zap.String("traceID", string(traceID)), + zap.Int("count", count), + zap.Duration("after", reqDuration), + zap.Int("total", total), + zap.Duration("totalTime", time.Since(startTime)), + zap.Error(err), + ) + s.metrics.sendErrorsTotal.Inc() + s.lastSendDuration = 0 + s.metrics.lastSendDuration.Set(0.0) // use 0 as a flag that something went wrong; there's no valid time here. + return + } + + s.queue.drop(count) // mark len(chunk) as successfully processed + total += len(chunk) + currentTotalTime := time.Since(startTime) + + logger.Info( + "Successfully pushed some billing events", + zap.String("traceID", string(traceID)), + zap.Int("count", count), + zap.Duration("after", reqDuration), + zap.Int("total", total), + zap.Duration("totalTime", currentTotalTime), + ) + + if currentTotalTime > s.lastSendDuration { + s.lastSendDuration = currentTotalTime + s.metrics.lastSendDuration.Set(currentTotalTime.Seconds()) + } + } +} diff --git a/pkg/agent/config.go b/pkg/agent/config.go index 8ab7d0329..ae13327e2 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -14,12 +14,41 @@ import ( type Config struct { DumpState *DumpStateConfig `json:"dumpState"` Scaling ScalingConfig `json:"scaling"` - Informant InformantConfig `json:"informant"` Metrics MetricsConfig `json:"metrics"` Scheduler SchedulerConfig `json:"scheduler"` + Monitor MonitorConfig `json:"monitor"` Billing *billing.Config `json:"billing,omitempty"` } +type MonitorConfig struct { + ResponseTimeoutSeconds uint `json:"responseTimeoutSeconds"` + // ConnectionTimeoutSeconds gives how long we may take to connect to the + // monitor before cancelling. + ConnectionTimeoutSeconds uint `json:"connectionTimeoutSeconds"` + // ConnectionRetryMinWaitSeconds gives the minimum amount of time we must wait between attempts + // to connect to the vm-monitor, regardless of whether they're successful. + ConnectionRetryMinWaitSeconds uint `json:"connectionRetryMinWaitSeconds"` + // ServerPort is the port that the dispatcher serves from + ServerPort uint16 `json:"serverPort"` + // UnhealthyAfterSilenceDurationSeconds gives the duration, in seconds, after which failing to + // receive a successful request from the monitor indicates that it is probably unhealthy. + UnhealthyAfterSilenceDurationSeconds uint `json:"unhealthyAfterSilenceDurationSeconds"` + // UnhealthyStartupGracePeriodSeconds gives the duration, in seconds, after which we will no + // longer excuse total VM monitor failures - i.e. when unhealthyAfterSilenceDurationSeconds + // kicks in. + UnhealthyStartupGracePeriodSeconds uint `json:"unhealthyStartupGracePeriodSeconds"` + // MaxHealthCheckSequentialFailuresSeconds gives the duration, in seconds, after which we + // should restart the connection to the vm-monitor if health checks aren't succeeding. + MaxHealthCheckSequentialFailuresSeconds uint `json:"maxHealthCheckSequentialFailuresSeconds"` + + // RetryFailedRequestSeconds gives the duration, in seconds, that we must wait before retrying a + // request that previously failed. + RetryFailedRequestSeconds uint `json:"retryFailedRequestSeconds"` + // RetryDeniedDownscaleSeconds gives the duration, in seconds, that we must wait before retrying + // a downscale request that was previously denied + RetryDeniedDownscaleSeconds uint `json:"retryDeniedDownscaleSeconds"` +} + // DumpStateConfig configures the endpoint to dump all internal state type DumpStateConfig struct { // Port is the port to serve on @@ -38,54 +67,10 @@ type ScalingConfig struct { DefaultConfig api.ScalingConfig `json:"defaultConfig"` } -type InformantConfig struct { - // ServerPort is the port that the VM informant serves from - ServerPort uint16 `json:"serverPort"` - - // RetryServerMinWaitSeconds gives the minimum duration, in seconds, that we must wait between the - // start of one InformantServer and the next - // - // This "minimum wait" is only used when thethe - RetryServerMinWaitSeconds uint `json:"retryServerMinWaitSeconds"` - // RetryServerNormalWaitSeconds gives the typical duration, in seconds, that we wait between an - // InformantServer failing and our retry. - RetryServerNormalWaitSeconds uint `json:"retryServerNormalWaitSeconds"` - // RegisterRetrySeconds gives the duration, in seconds, to wait between retrying a failed - // register request. - RegisterRetrySeconds uint `json:"registerRetrySeconds"` - - // RetryFailedRequestSeconds gives the duration, in seconds, that we must wait before retrying a - // request that previously failed. - RetryFailedRequestSeconds uint `json:"retryFailedRequestSeconds"` - // RetryDeniedDownscaleSeconds gives the duration, in seconds, that we must wait before retrying - // a downscale request that was previously denied - RetryDeniedDownscaleSeconds uint `json:"retryDeniedDownscaleSeconds"` - - // RequestTimeoutSeconds gives the timeout for any individual request to the informant, except - // for those with separately-defined values below. - RequestTimeoutSeconds uint `json:"requestTimeoutSeconds"` - // RegisterTimeoutSeconds gives the timeout duration, in seconds, for a register request. - // - // This is a separate field from RequestTimeoutSeconds because registering may require that the - // informant suspend a previous agent, which could take longer. - RegisterTimeoutSeconds uint `json:"registerTimeoutSeconds"` - // DownscaleTimeoutSeconds gives the timeout duration, in seconds, for a downscale request. - // - // This is a separate field from RequestTimeoutSeconds it's possible that downscaling may - // require some non-trivial work that we want to allow to complete. - DownscaleTimeoutSeconds uint `json:"downscaleTimeoutSeconds"` - - // UnhealthyAfterSilenceDurationSeconds gives the duration, in seconds, after which failing to - // receive a successful request from the informant indicates that it is probably unhealthy. - UnhealthyAfterSilenceDurationSeconds uint `json:"unhealthyAfterSilenceDurationSeconds"` - // UnhealthyStartupGracePeriodSeconds gives the duration, in seconds, after which we will no - // longer excuse total VM informant failures - i.e. when unhealthyAfterSilenceDurationSeconds - // kicks in. - UnhealthyStartupGracePeriodSeconds uint `json:"unhealthyStartupGracePeriodSeconds"` -} - // MetricsConfig defines a few parameters for metrics requests to the VM type MetricsConfig struct { + // Port is the port that VMs are expected to provide metrics on + Port uint16 `json:"port"` // LoadMetricPrefix is the prefix at the beginning of the load metrics that we use. For // node_exporter, this is "node_", and for vector it's "host_" LoadMetricPrefix string `json:"loadMetricPrefix"` @@ -141,35 +126,34 @@ func (c *Config) validate() error { zeroTmpl = "field %q cannot be zero" ) + erc.Whenf(ec, c.Billing != nil && c.Billing.ActiveTimeMetricName == "", emptyTmpl, ".billing.activeTimeMetricName") + erc.Whenf(ec, c.Billing != nil && c.Billing.CPUMetricName == "", emptyTmpl, ".billing.cpuMetricName") + erc.Whenf(ec, c.Billing != nil && c.Billing.CollectEverySeconds == 0, zeroTmpl, ".billing.collectEverySeconds") + erc.Whenf(ec, c.Billing != nil && c.Billing.AccumulateEverySeconds == 0, zeroTmpl, ".billing.accumulateEverySeconds") + erc.Whenf(ec, c.Billing != nil && c.Billing.PushEverySeconds == 0, zeroTmpl, ".billing.pushEverySeconds") + erc.Whenf(ec, c.Billing != nil && c.Billing.PushRequestTimeoutSeconds == 0, zeroTmpl, ".billing.pushRequestTimeoutSeconds") + erc.Whenf(ec, c.Billing != nil && c.Billing.MaxBatchSize == 0, zeroTmpl, ".billing.maxBatchSize") + erc.Whenf(ec, c.Billing != nil && c.Billing.URL == "", emptyTmpl, ".billing.url") erc.Whenf(ec, c.DumpState != nil && c.DumpState.Port == 0, zeroTmpl, ".dumpState.port") erc.Whenf(ec, c.DumpState != nil && c.DumpState.TimeoutSeconds == 0, zeroTmpl, ".dumpState.timeoutSeconds") + erc.Whenf(ec, c.Metrics.Port == 0, zeroTmpl, ".metrics.port") + erc.Whenf(ec, c.Metrics.LoadMetricPrefix == "", emptyTmpl, ".metrics.loadMetricPrefix") + erc.Whenf(ec, c.Metrics.SecondsBetweenRequests == 0, zeroTmpl, ".metrics.secondsBetweenRequests") erc.Whenf(ec, c.Scaling.RequestTimeoutSeconds == 0, zeroTmpl, ".scaling.requestTimeoutSeconds") + erc.Whenf(ec, c.Monitor.ResponseTimeoutSeconds == 0, zeroTmpl, ".monitor.responseTimeoutSeconds") + erc.Whenf(ec, c.Monitor.ConnectionTimeoutSeconds == 0, zeroTmpl, ".monitor.connectionTimeoutSeconds") + erc.Whenf(ec, c.Monitor.ConnectionRetryMinWaitSeconds == 0, zeroTmpl, ".monitor.connectionRetryMinWaitSeconds") + erc.Whenf(ec, c.Monitor.ServerPort == 0, zeroTmpl, ".monitor.serverPort") + erc.Whenf(ec, c.Monitor.UnhealthyAfterSilenceDurationSeconds == 0, zeroTmpl, ".monitor.unhealthyAfterSilenceDurationSeconds") + erc.Whenf(ec, c.Monitor.UnhealthyStartupGracePeriodSeconds == 0, zeroTmpl, ".monitor.unhealthyStartupGracePeriodSeconds") + erc.Whenf(ec, c.Monitor.MaxHealthCheckSequentialFailuresSeconds == 0, zeroTmpl, ".monitor.maxHealthCheckSequentialFailuresSeconds") + erc.Whenf(ec, c.Monitor.RetryFailedRequestSeconds == 0, zeroTmpl, ".monitor.retryFailedRequestSeconds") + erc.Whenf(ec, c.Monitor.RetryDeniedDownscaleSeconds == 0, zeroTmpl, ".monitor.retryDeniedDownscaleSeconds") // add all errors if there are any: https://github.com/neondatabase/autoscaling/pull/195#discussion_r1170893494 ec.Add(c.Scaling.DefaultConfig.Validate()) - erc.Whenf(ec, c.Informant.ServerPort == 0, zeroTmpl, ".informant.serverPort") - erc.Whenf(ec, c.Informant.RetryServerMinWaitSeconds == 0, zeroTmpl, ".informant.retryServerMinWaitSeconds") - erc.Whenf(ec, c.Informant.RetryServerNormalWaitSeconds == 0, zeroTmpl, ".informant.retryServerNormalWaitSeconds") - erc.Whenf(ec, c.Informant.RegisterRetrySeconds == 0, zeroTmpl, ".informant.registerRetrySeconds") - erc.Whenf(ec, c.Informant.RetryFailedRequestSeconds == 0, zeroTmpl, ".informant.retryFailedRequestSeconds") - erc.Whenf(ec, c.Informant.RetryDeniedDownscaleSeconds == 0, zeroTmpl, ".informant.retryDeniedDownscaleSeconds") - erc.Whenf(ec, c.Informant.RequestTimeoutSeconds == 0, zeroTmpl, ".informant.requestTimeoutSeconds") - erc.Whenf(ec, c.Informant.RegisterTimeoutSeconds == 0, zeroTmpl, ".informant.registerTimeoutSeconds") - erc.Whenf(ec, c.Informant.DownscaleTimeoutSeconds == 0, zeroTmpl, ".informant.downscaleTimeoutSeconds") - erc.Whenf(ec, c.Informant.UnhealthyAfterSilenceDurationSeconds == 0, zeroTmpl, ".informant.unhealthyAfterSilenceDurationSeconds") - erc.Whenf(ec, c.Informant.UnhealthyStartupGracePeriodSeconds == 0, zeroTmpl, ".informant.unhealthyStartupGracePeriodSeconds") - erc.Whenf(ec, c.Metrics.LoadMetricPrefix == "", emptyTmpl, ".metrics.loadMetricPrefix") - erc.Whenf(ec, c.Metrics.RequestTimeoutSeconds == 0, zeroTmpl, ".metrics.requestTimeoutSeconds") - erc.Whenf(ec, c.Metrics.SecondsBetweenRequests == 0, zeroTmpl, ".metrics.secondsBetweenRequests") - erc.Whenf(ec, c.Scheduler.SchedulerName == "", emptyTmpl, ".scheduler.schedulerName") - // note: c.Scheduler.RequestTimeoutSeconds == 0 is valid - erc.Whenf(ec, c.Scheduler.RequestAtLeastEverySeconds == 0, zeroTmpl, ".scheduler.requestAtLeastEverySeconds") erc.Whenf(ec, c.Scheduler.RequestPort == 0, zeroTmpl, ".scheduler.requestPort") - erc.Whenf(ec, c.Billing != nil && c.Billing.URL == "", emptyTmpl, ".billing.url") - erc.Whenf(ec, c.Billing != nil && c.Billing.CPUMetricName == "", emptyTmpl, ".billing.cpuMetricName") - erc.Whenf(ec, c.Billing != nil && c.Billing.ActiveTimeMetricName == "", emptyTmpl, ".billing.activeTimeMetricName") - erc.Whenf(ec, c.Billing != nil && c.Billing.CollectEverySeconds == 0, zeroTmpl, ".billing.collectEverySeconds") - erc.Whenf(ec, c.Billing != nil && c.Billing.PushEverySeconds == 0, zeroTmpl, ".billing.pushEverySeconds") - erc.Whenf(ec, c.Billing != nil && c.Billing.PushTimeoutSeconds == 0, zeroTmpl, ".billing.pushTimeoutSeconds") + erc.Whenf(ec, c.Scheduler.RequestTimeoutSeconds == 0, zeroTmpl, ".scheduler.requestTimeoutSeconds") + erc.Whenf(ec, c.Scheduler.SchedulerName == "", emptyTmpl, ".scheduler.schedulerName") return ec.Resolve() } diff --git a/pkg/agent/core/action.go b/pkg/agent/core/action.go index 990064d62..7314f3894 100644 --- a/pkg/agent/core/action.go +++ b/pkg/agent/core/action.go @@ -7,11 +7,11 @@ import ( ) type ActionSet struct { - Wait *ActionWait `json:"wait,omitempty"` - PluginRequest *ActionPluginRequest `json:"pluginRequest,omitempty"` - NeonVMRequest *ActionNeonVMRequest `json:"neonvmRequest,omitempty"` - InformantDownscale *ActionInformantDownscale `json:"informantDownscale,omitempty"` - InformantUpscale *ActionInformantUpscale `json:"informantUpscale,omitempty"` + Wait *ActionWait `json:"wait,omitempty"` + PluginRequest *ActionPluginRequest `json:"pluginRequest,omitempty"` + NeonVMRequest *ActionNeonVMRequest `json:"neonvmRequest,omitempty"` + MonitorDownscale *ActionMonitorDownscale `json:"monitorDownscale,omitempty"` + MonitorUpscale *ActionMonitorUpscale `json:"monitorUpscale,omitempty"` } type ActionWait struct { @@ -29,12 +29,12 @@ type ActionNeonVMRequest struct { Target api.Resources `json:"target"` } -type ActionInformantDownscale struct { +type ActionMonitorDownscale struct { Current api.Resources `json:"current"` Target api.Resources `json:"target"` } -type ActionInformantUpscale struct { +type ActionMonitorUpscale struct { Current api.Resources `json:"current"` Target api.Resources `json:"target"` } diff --git a/pkg/agent/core/dumpstate.go b/pkg/agent/core/dumpstate.go index b36874a6a..03c06dce6 100644 --- a/pkg/agent/core/dumpstate.go +++ b/pkg/agent/core/dumpstate.go @@ -19,23 +19,23 @@ func shallowCopy[T any](ptr *T) *T { // StateDump provides introspection into the current values of the fields of State type StateDump struct { - Config Config `json:"config"` - VM api.VmInfo `json:"vm"` - Plugin pluginStateDump `json:"plugin"` - Informant informantStateDump `json:"informant"` - NeonVM neonvmStateDump `json:"neonvm"` - Metrics *api.Metrics `json:"metrics"` + Config Config `json:"config"` + VM api.VmInfo `json:"vm"` + Plugin pluginStateDump `json:"plugin"` + Monitor monitorStateDump `json:"monitor"` + NeonVM neonvmStateDump `json:"neonvm"` + Metrics *api.Metrics `json:"metrics"` } // Dump produces a JSON-serializable representation of the State func (s *State) Dump() StateDump { return StateDump{ - Config: s.config, - VM: s.vm, - Plugin: s.plugin.dump(), - Informant: s.informant.dump(), - NeonVM: s.neonvm.dump(), - Metrics: shallowCopy(s.metrics), + Config: s.config, + VM: s.vm, + Plugin: s.plugin.dump(), + Monitor: s.monitor.dump(), + NeonVM: s.neonvm.dump(), + Metrics: shallowCopy(s.metrics), } } @@ -69,17 +69,17 @@ func (s *pluginState) dump() pluginStateDump { } } -type informantStateDump struct { - Active bool `json:"active"` - OngoingRequest *OngoingInformantRequestDump `json:"ongoingRequest"` - RequestedUpscale *requestedUpscaleDump `json:"requestedUpscale"` - DeniedDownscale *deniedDownscaleDump `json:"deniedDownscale"` - Approved *api.Resources `json:"approved"` - DownscaleFailureAt *time.Time `json:"downscaleFailureAt"` - UpscaleFailureAt *time.Time `json:"upscaleFailureAt"` +type monitorStateDump struct { + Active bool `json:"active"` + OngoingRequest *OngoingMonitorRequestDump `json:"ongoingRequest"` + RequestedUpscale *requestedUpscaleDump `json:"requestedUpscale"` + DeniedDownscale *deniedDownscaleDump `json:"deniedDownscale"` + Approved *api.Resources `json:"approved"` + DownscaleFailureAt *time.Time `json:"downscaleFailureAt"` + UpscaleFailureAt *time.Time `json:"upscaleFailureAt"` } -type OngoingInformantRequestDump struct { - Kind informantRequestKind `json:"kind"` +type OngoingMonitorRequestDump struct { + Kind monitorRequestKind `json:"kind"` } type requestedUpscaleDump struct { At time.Time `json:"at"` @@ -91,7 +91,7 @@ type deniedDownscaleDump struct { Requested api.Resources `json:"requested"` } -func (s *informantState) dump() informantStateDump { +func (s *monitorState) dump() monitorStateDump { var requestedUpscale *requestedUpscaleDump if s.requestedUpscale != nil { requestedUpscale = &requestedUpscaleDump{ @@ -109,14 +109,14 @@ func (s *informantState) dump() informantStateDump { } } - var ongoingRequest *OngoingInformantRequestDump + var ongoingRequest *OngoingMonitorRequestDump if s.ongoingRequest != nil { - ongoingRequest = &OngoingInformantRequestDump{ + ongoingRequest = &OngoingMonitorRequestDump{ Kind: s.ongoingRequest.kind, } } - return informantStateDump{ + return monitorStateDump{ Active: s.active, OngoingRequest: ongoingRequest, RequestedUpscale: requestedUpscale, diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 287a92a48..637c8eb01 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -13,7 +13,7 @@ package core // // That said, there's still some tricky semantics we want to maintain. Internally, the // autoscaler-agent must be designed around eventual consistency, but the API we expose to the -// vm-informant is strictly synchonous. As such, there's some subtle logic to make sure that we're +// vm-monitor is strictly synchonous. As such, there's some subtle logic to make sure that we're // not violating our own guarantees. // // --- @@ -40,12 +40,12 @@ type Config struct { // plugin, even if nothing's changed. PluginRequestTick time.Duration - // InformantDeniedDownscaleCooldown gives the time we must wait between making duplicate - // downscale requests to the vm-informant where the previous failed. - InformantDeniedDownscaleCooldown time.Duration + // MonitorDeniedDownscaleCooldown gives the time we must wait between making duplicate + // downscale requests to the vm-monitor where the previous failed. + MonitorDeniedDownscaleCooldown time.Duration - // InformantRetryWait gives the amount of time to wait to retry after a *failed* request. - InformantRetryWait time.Duration + // MonitorRetryWait gives the amount of time to wait to retry after a *failed* request. + MonitorRetryWait time.Duration // Warn provides an outlet for (*State).Next() to give warnings about conditions that are // impeding its ability to execute. (e.g. "wanted to do X but couldn't because of Y") @@ -68,8 +68,8 @@ type State struct { // plugin records all state relevant to communications with the scheduler plugin plugin pluginState - // informant records all state relevant to communications with the vm-informant - informant informantState + // monitor records all state relevant to communications with the vm-monitor + monitor monitorState // neonvm records all state relevant to the NeonVM k8s API neonvm neonvmState @@ -98,15 +98,15 @@ type pluginRequested struct { resources api.Resources } -type informantState struct { - // active is true iff the agent is currently "confirmed" and not "suspended" by the informant. +type monitorState struct { + // active is true iff the agent is currently "confirmed" and not "suspended" by the monitor. // Otherwise, we shouldn't be making any kind of scaling requests. active bool - ongoingRequest *ongoingInformantRequest + ongoingRequest *ongoingMonitorRequest // requestedUpscale, if not nil, stores the most recent *unresolved* upscaling requested by the - // vm-informant, along with the time at which it occurred. + // vm-monitor, along with the time at which it occurred. requestedUpscale *requestedUpscale // deniedDownscale, if not nil, stores the result of the lastest denied /downscale request. @@ -120,15 +120,15 @@ type informantState struct { upscaleFailureAt *time.Time } -type ongoingInformantRequest struct { - kind informantRequestKind +type ongoingMonitorRequest struct { + kind monitorRequestKind } -type informantRequestKind string +type monitorRequestKind string const ( - informantRequestKindDownscale informantRequestKind = "downscale" - informantRequestKindUpscale informantRequestKind = "upscale" + monitorRequestKindDownscale monitorRequestKind = "downscale" + monitorRequestKindUpscale monitorRequestKind = "upscale" ) type requestedUpscale struct { @@ -160,7 +160,7 @@ func NewState(vm api.VmInfo, config Config) *State { lastRequest: nil, permit: nil, }, - informant: informantState{ + monitor: monitorState{ active: false, ongoingRequest: nil, requestedUpscale: nil, @@ -185,30 +185,12 @@ func (s *State) NextActions(now time.Time) ActionSet { using := s.vm.Using() - var desiredResources api.Resources + desiredResources := s.DesiredResourcesFromMetricsOrRequestedUpscaling() - if s.informant.active { - desiredResources = s.desiredResourcesFromMetricsOrRequestedUpscaling() - } else { - // If we're not deemed "active" by the informant, then we shouldn't be making any kind of - // scaling requests on its behalf. - // - // We'll still talk to the scheduler to inform it about the current resource usage though, - // to mitigate any reliability issues - much of the informant is built (as of 2023-07-09) - // under the assumption that we could, in theory, have multiple autoscaler-agents on the - // same node at the same time. That's... not really true, so an informant that isn't - // "active" is more likely to just be crash-looping due to a bug. - // - // *In theory* if we had mutliple autoscaler-agents talking to a single informant, this - // would be incorrect; we'd override another one's scaling requests. But this should be - // fine. - desiredResources = using - } - - desiredResourcesApprovedByInformant := s.boundResourcesByInformantApproved(desiredResources) + desiredResourcesApprovedByMonitor := s.boundResourcesByMonitorApproved(desiredResources) desiredResourcesApprovedByPlugin := s.boundResourcesByPluginApproved(desiredResources) - // NB: informant approved provides a lower bound - approvedDesiredResources := desiredResourcesApprovedByPlugin.Max(desiredResourcesApprovedByInformant) + // NB: monitor approved provides a lower bound + approvedDesiredResources := desiredResourcesApprovedByPlugin.Max(desiredResourcesApprovedByMonitor) ongoingNeonVMRequest := s.neonvm.ongoingRequested != nil @@ -248,7 +230,7 @@ func (s *State) NextActions(now time.Time) ActionSet { // ... Otherwise, we should try requesting something new form it. actions.PluginRequest = &ActionPluginRequest{ LastPermit: s.plugin.permit, - Target: desiredResourcesApprovedByInformant, + Target: desiredResourcesApprovedByMonitor, Metrics: s.metrics, } } @@ -285,63 +267,63 @@ func (s *State) NextActions(now time.Time) ActionSet { } } - // We should make an upscale request to the informant if we've upscaled and the informant + // We should make an upscale request to the monitor if we've upscaled and the monitor // doesn't know about it. - wantInformantUpscaleRequest := s.informant.approved != nil && *s.informant.approved != desiredResources.Max(*s.informant.approved) + wantMonitorUpscaleRequest := s.monitor.approved != nil && *s.monitor.approved != desiredResources.Max(*s.monitor.approved) // However, we may need to wait before retrying (or for any ongoing requests to finish) - makeInformantUpscaleRequest := wantInformantUpscaleRequest && - s.informant.active && - s.informant.ongoingRequest == nil && - (s.informant.upscaleFailureAt == nil || - now.Sub(*s.informant.upscaleFailureAt) >= s.config.InformantRetryWait) - if wantInformantUpscaleRequest { - if makeInformantUpscaleRequest { - actions.InformantUpscale = &ActionInformantUpscale{ - Current: *s.informant.approved, - Target: desiredResources.Max(*s.informant.approved), + makeMonitorUpscaleRequest := wantMonitorUpscaleRequest && + s.monitor.active && + s.monitor.ongoingRequest == nil && + (s.monitor.upscaleFailureAt == nil || + now.Sub(*s.monitor.upscaleFailureAt) >= s.config.MonitorRetryWait) + if wantMonitorUpscaleRequest { + if makeMonitorUpscaleRequest { + actions.MonitorUpscale = &ActionMonitorUpscale{ + Current: *s.monitor.approved, + Target: desiredResources.Max(*s.monitor.approved), } - } else if !s.informant.active { + } else if !s.monitor.active { s.config.Warn("Wanted to send informant upscale request, but not active") - } else if s.informant.ongoingRequest != nil && s.informant.ongoingRequest.kind != informantRequestKindUpscale { - s.config.Warn("Wanted to send informant upscale request, but waiting other ongoing %s request", s.informant.ongoingRequest.kind) - } else if s.informant.ongoingRequest == nil { + } else if s.monitor.ongoingRequest != nil && s.monitor.ongoingRequest.kind != monitorRequestKindUpscale { + s.config.Warn("Wanted to send informant upscale request, but waiting other ongoing %s request", s.monitor.ongoingRequest.kind) + } else if s.monitor.ongoingRequest == nil { s.config.Warn("Wanted to send informant upscale request, but waiting on retry rate limit") } } - // We should make a downscale request to the informant if we want to downscale but haven't been + // We should make a downscale request to the monitor if we want to downscale but haven't been // approved for it. - var resourcesForInformantDownscale api.Resources - if s.informant.approved != nil { - resourcesForInformantDownscale = desiredResources.Min(*s.informant.approved) + var resourcesForMonitorDownscale api.Resources + if s.monitor.approved != nil { + resourcesForMonitorDownscale = desiredResources.Min(*s.monitor.approved) } else { - resourcesForInformantDownscale = desiredResources.Min(using) + resourcesForMonitorDownscale = desiredResources.Min(using) } - wantInformantDownscaleRequest := s.informant.approved != nil && *s.informant.approved != resourcesForInformantDownscale - if s.informant.approved == nil && resourcesForInformantDownscale != using { + wantMonitorDownscaleRequest := s.monitor.approved != nil && *s.monitor.approved != resourcesForMonitorDownscale + if s.monitor.approved == nil && resourcesForMonitorDownscale != using { s.config.Warn("Wanted to send informant downscale request, but haven't yet gotten information about its resources") } // However, we may need to wait before retrying (or for any ongoing requests to finish) - makeInformantDownscaleRequest := wantInformantDownscaleRequest && - s.informant.active && - s.informant.ongoingRequest == nil && - (s.informant.deniedDownscale == nil || - s.informant.deniedDownscale.requested != desiredResources.Min(using) || - now.Sub(s.informant.deniedDownscale.at) >= s.config.InformantDeniedDownscaleCooldown) && - (s.informant.downscaleFailureAt == nil || - now.Sub(*s.informant.downscaleFailureAt) >= s.config.InformantRetryWait) - - if wantInformantDownscaleRequest { - if makeInformantDownscaleRequest { - actions.InformantDownscale = &ActionInformantDownscale{ - Current: *s.informant.approved, - Target: resourcesForInformantDownscale, + makeMonitorDownscaleRequest := wantMonitorDownscaleRequest && + s.monitor.active && + s.monitor.ongoingRequest == nil && + (s.monitor.deniedDownscale == nil || + s.monitor.deniedDownscale.requested != desiredResources.Min(using) || + now.Sub(s.monitor.deniedDownscale.at) >= s.config.MonitorDeniedDownscaleCooldown) && + (s.monitor.downscaleFailureAt == nil || + now.Sub(*s.monitor.downscaleFailureAt) >= s.config.MonitorRetryWait) + + if wantMonitorDownscaleRequest { + if makeMonitorDownscaleRequest { + actions.MonitorDownscale = &ActionMonitorDownscale{ + Current: *s.monitor.approved, + Target: resourcesForMonitorDownscale, } - } else if !s.informant.active { + } else if !s.monitor.active { s.config.Warn("Wanted to send informant downscale request, but not active") - } else if s.informant.ongoingRequest != nil && s.informant.ongoingRequest.kind != informantRequestKindDownscale { - s.config.Warn("Wanted to send informant downscale request, but waiting on other ongoing %s request", s.informant.ongoingRequest.kind) - } else if s.informant.ongoingRequest == nil { + } else if s.monitor.ongoingRequest != nil && s.monitor.ongoingRequest.kind != monitorRequestKindDownscale { + s.config.Warn("Wanted to send informant downscale request, but waiting on other ongoing %s request", s.monitor.ongoingRequest.kind) + } else if s.monitor.ongoingRequest == nil { s.config.Warn("Wanted to send informant downscale request, but waiting on retry rate limit") } } @@ -349,7 +331,7 @@ func (s *State) NextActions(now time.Time) ActionSet { // --- and that's all the request types! --- // If there's anything waiting, we should also note how long we should wait for. - // There's two components we could be waiting on: the scheduler plugin, and the vm-informant. + // There's two components we could be waiting on: the scheduler plugin, and the vm-monitor. maximumDuration := time.Duration(int64(uint64(1)<<63 - 1)) requiredWait := maximumDuration @@ -365,28 +347,28 @@ func (s *State) NextActions(now time.Time) ActionSet { requiredWait = util.Min(requiredWait, now.Sub(s.plugin.lastRequest.at)) } - // For the vm-informant: + // For the vm-monitor: // if we wanted to make EITHER a downscale or upscale request, but we previously couldn't - // because of retry timeouts, we should wait for s.config.InformantRetryWait before trying + // because of retry timeouts, we should wait for s.config.MonitorRetryWait before trying // again. // OR if we wanted to downscale but got denied, we should wait for - // s.config.InformantDownscaleCooldown before retrying. - if s.informant.ongoingRequest == nil { + // s.config.MonitorDownscaleCooldown before retrying. + if s.monitor.ongoingRequest == nil { // Retry upscale on failure - if wantInformantUpscaleRequest && s.informant.upscaleFailureAt != nil { - if wait := now.Sub(*s.informant.upscaleFailureAt); wait >= s.config.InformantRetryWait { + if wantMonitorUpscaleRequest && s.monitor.upscaleFailureAt != nil { + if wait := now.Sub(*s.monitor.upscaleFailureAt); wait >= s.config.MonitorRetryWait { requiredWait = util.Min(requiredWait, wait) } } // Retry downscale on failure - if wantInformantDownscaleRequest && s.informant.downscaleFailureAt != nil { - if wait := now.Sub(*s.informant.downscaleFailureAt); wait >= s.config.InformantRetryWait { + if wantMonitorDownscaleRequest && s.monitor.downscaleFailureAt != nil { + if wait := now.Sub(*s.monitor.downscaleFailureAt); wait >= s.config.MonitorRetryWait { requiredWait = util.Min(requiredWait, wait) } } // Retry downscale if denied - if wantInformantDownscaleRequest && s.informant.deniedDownscale != nil && resourcesForInformantDownscale == s.informant.deniedDownscale.requested { - if wait := now.Sub(s.informant.deniedDownscale.at); wait >= s.config.InformantDeniedDownscaleCooldown { + if wantMonitorDownscaleRequest && s.monitor.deniedDownscale != nil && resourcesForMonitorDownscale == s.monitor.deniedDownscale.requested { + if wait := now.Sub(s.monitor.deniedDownscale.at); wait >= s.config.MonitorDeniedDownscaleCooldown { requiredWait = util.Min(requiredWait, wait) } } @@ -408,7 +390,7 @@ func (s *State) scalingConfig() api.ScalingConfig { } } -func (s *State) desiredResourcesFromMetricsOrRequestedUpscaling() api.Resources { +func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling() api.Resources { // There's some annoying edge cases that this function has to be able to handle properly. For // the sake of completeness, they are: // @@ -445,9 +427,9 @@ func (s *State) desiredResourcesFromMetricsOrRequestedUpscaling() api.Resources // resources for the desired "goal" compute units var goalResources api.Resources - // If there's no constraints from s.metrics or s.informant.requestedUpscale, then we'd prefer to + // If there's no constraints from s.metrics or s.monitor.requestedUpscale, then we'd prefer to // keep things as-is, rather than scaling down (because otherwise goalCU = 0). - if s.metrics == nil && s.informant.requestedUpscale == nil { + if s.metrics == nil && s.monitor.requestedUpscale == nil { goalResources = s.vm.Using() } else { goalResources = s.plugin.computeUnit.Mul(uint16(goalCU)) @@ -476,12 +458,12 @@ func (s *State) desiredResourcesFromMetricsOrRequestedUpscaling() api.Resources // NB: we could just use s.plugin.computeUnit, but that's sometimes nil. This way, it's clear that // it's the caller's responsibility to ensure that s.plugin.computeUnit != nil. func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources) uint32 { - if s.informant.requestedUpscale == nil { + if s.monitor.requestedUpscale == nil { return 0 } var required uint32 - requested := s.informant.requestedUpscale.requested + requested := s.monitor.requestedUpscale.requested // note: floor(x / M) + 1 gives the minimum integer value greater than x / M. @@ -495,10 +477,10 @@ func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources) uint3 return required } -func (s *State) boundResourcesByInformantApproved(resources api.Resources) api.Resources { +func (s *State) boundResourcesByMonitorApproved(resources api.Resources) api.Resources { var lowerBound api.Resources - if s.informant.approved != nil { - lowerBound = *s.informant.approved + if s.monitor.approved != nil { + lowerBound = *s.monitor.approved } else { lowerBound = s.vm.Using() } @@ -598,17 +580,17 @@ func (h PluginHandle) RequestSuccessful(now time.Time, resp api.PluginResponse) return nil } -// InformantHandle provides write access to the vm-informant pieces of an UpdateState -type InformantHandle struct { +// MonitorHandle provides write access to the vm-monitor pieces of an UpdateState +type MonitorHandle struct { s *State } -func (s *State) Informant() InformantHandle { - return InformantHandle{s} +func (s *State) Monitor() MonitorHandle { + return MonitorHandle{s} } -func (h InformantHandle) Reset() { - h.s.informant = informantState{ +func (h MonitorHandle) Reset() { + h.s.monitor = monitorState{ active: false, ongoingRequest: nil, requestedUpscale: nil, @@ -619,61 +601,56 @@ func (h InformantHandle) Reset() { } } -func (h InformantHandle) Active(active bool) { - h.s.informant.active = active -} - -func (h InformantHandle) SuccessfullyRegistered() { - using := h.s.vm.Using() - h.s.informant.approved = &using // TODO: this is racy (although... informant synchronization should help *some* with this?) +func (h MonitorHandle) Active(active bool) { + h.s.monitor.active = active } -func (h InformantHandle) UpscaleRequested(now time.Time, resources api.MoreResources) { - h.s.informant.requestedUpscale = &requestedUpscale{ +func (h MonitorHandle) UpscaleRequested(now time.Time, resources api.MoreResources) { + h.s.monitor.requestedUpscale = &requestedUpscale{ at: now, - base: h.s.vm.Using(), // TODO: this is racy (maybe the resources were different when the informant originally made the request) + base: h.s.vm.Using(), // TODO: this is racy (maybe the resources were different when the monitor originally made the request) requested: resources, } } -func (h InformantHandle) StartingUpscaleRequest(now time.Time) { - h.s.informant.ongoingRequest = &ongoingInformantRequest{kind: informantRequestKindUpscale} - h.s.informant.upscaleFailureAt = nil +func (h MonitorHandle) StartingUpscaleRequest(now time.Time) { + h.s.monitor.ongoingRequest = &ongoingMonitorRequest{kind: monitorRequestKindUpscale} + h.s.monitor.upscaleFailureAt = nil } -func (h InformantHandle) UpscaleRequestSuccessful(now time.Time, resources api.Resources) { - h.s.informant.ongoingRequest = nil - h.s.informant.approved = &resources +func (h MonitorHandle) UpscaleRequestSuccessful(now time.Time, resources api.Resources) { + h.s.monitor.ongoingRequest = nil + h.s.monitor.approved = &resources } -func (h InformantHandle) UpscaleRequestFailed(now time.Time) { - h.s.informant.ongoingRequest = nil - h.s.informant.upscaleFailureAt = &now +func (h MonitorHandle) UpscaleRequestFailed(now time.Time) { + h.s.monitor.ongoingRequest = nil + h.s.monitor.upscaleFailureAt = &now } -func (h InformantHandle) StartingDownscaleRequest(now time.Time) { - h.s.informant.ongoingRequest = &ongoingInformantRequest{kind: informantRequestKindDownscale} - h.s.informant.downscaleFailureAt = nil +func (h MonitorHandle) StartingDownscaleRequest(now time.Time) { + h.s.monitor.ongoingRequest = &ongoingMonitorRequest{kind: monitorRequestKindDownscale} + h.s.monitor.downscaleFailureAt = nil } -func (h InformantHandle) DownscaleRequestAllowed(now time.Time, requested api.Resources) { - h.s.informant.ongoingRequest = nil - h.s.informant.approved = &requested - h.s.informant.deniedDownscale = nil +func (h MonitorHandle) DownscaleRequestAllowed(now time.Time, requested api.Resources) { + h.s.monitor.ongoingRequest = nil + h.s.monitor.approved = &requested + h.s.monitor.deniedDownscale = nil } -// Downscale request was successful but the informant denied our request. -func (h InformantHandle) DownscaleRequestDenied(now time.Time, requested api.Resources) { - h.s.informant.ongoingRequest = nil - h.s.informant.deniedDownscale = &deniedDownscale{ +// Downscale request was successful but the monitor denied our request. +func (h MonitorHandle) DownscaleRequestDenied(now time.Time, requested api.Resources) { + h.s.monitor.ongoingRequest = nil + h.s.monitor.deniedDownscale = &deniedDownscale{ at: now, requested: requested, } } -func (h InformantHandle) DownscaleRequestFailed(now time.Time) { - h.s.informant.ongoingRequest = nil - h.s.informant.downscaleFailureAt = &now +func (h MonitorHandle) DownscaleRequestFailed(now time.Time) { + h.s.monitor.ongoingRequest = nil + h.s.monitor.downscaleFailureAt = &now } type NeonVMHandle struct { diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go new file mode 100644 index 000000000..8abc19c7e --- /dev/null +++ b/pkg/agent/core/state_test.go @@ -0,0 +1,83 @@ +package core_test + +import ( + "testing" + "time" + + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/neondatabase/autoscaling/pkg/agent/core" + "github.com/neondatabase/autoscaling/pkg/api" +) + +func Test_desiredVMState(t *testing.T) { + cases := []struct { + name string + + // helpers for setting fields (ish) of State: + metrics api.Metrics + vmUsing api.Resources + requestedUpscale api.MoreResources + + // expected output from (*State).DesiredResourcesFromMetricsOrRequestedUpscaling() + expected api.Resources + }{ + { + name: "BasicScaleup", + metrics: api.Metrics{ + LoadAverage1Min: 0.30, + LoadAverage5Min: 0.0, // unused + MemoryUsageBytes: 0.0, + }, + vmUsing: api.Resources{VCPU: 250, Mem: 1}, + requestedUpscale: api.MoreResources{Cpu: false, Memory: false}, + + expected: api.Resources{VCPU: 500, Mem: 2}, + }, + } + + for _, c := range cases { + state := core.NewState( + api.VmInfo{ + Name: "test", + Namespace: "test", + Cpu: api.VmCpuInfo{ + Min: 250, + Use: c.vmUsing.VCPU, + Max: 1000, + }, + Mem: api.VmMemInfo{ + SlotSize: resource.NewQuantity(1<<30 /* 1 Gi */, resource.BinarySI), // unused, doesn't actually matter. + Min: 1, + Use: c.vmUsing.Mem, + Max: 4, + }, + // remaining fields are also unused: + ScalingConfig: nil, + AlwaysMigrate: false, + ScalingEnabled: true, + }, + core.Config{ + DefaultScalingConfig: api.ScalingConfig{ + LoadAverageFractionTarget: 0.5, + MemoryUsageFractionTarget: 0.5, + }, + // these don't really matter, because we're not using (*State).NextActions() + PluginRequestTick: time.Second, + MonitorDeniedDownscaleCooldown: time.Second, + MonitorRetryWait: time.Second, + Warn: nil, + }, + ) + + // set the metrics + state.UpdateMetrics(c.metrics) + + t.Run(c.name, func(t *testing.T) { + actual := state.DesiredResourcesFromMetricsOrRequestedUpscaling() + if actual != c.expected { + t.Errorf("expected output %+v but got %+v", c.expected, actual) + } + }) + } +} diff --git a/pkg/agent/dispatcher.go b/pkg/agent/dispatcher.go new file mode 100644 index 000000000..ac61f2567 --- /dev/null +++ b/pkg/agent/dispatcher.go @@ -0,0 +1,678 @@ +package agent + +// The Dispatcher is our interface with the monitor. We interact via a websocket +// connection through a simple RPC-style protocol. + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + + "go.uber.org/zap" + "nhooyr.io/websocket" + "nhooyr.io/websocket/wsjson" + + "github.com/neondatabase/autoscaling/pkg/api" + "github.com/neondatabase/autoscaling/pkg/util" +) + +const ( + MinMonitorProtocolVersion api.MonitorProtoVersion = api.MonitorProtoV1_0 + MaxMonitorProtocolVersion api.MonitorProtoVersion = api.MonitorProtoV1_0 +) + +// This struct represents the result of a dispatcher.Call. Because the SignalSender +// passed in can only be generic over one type - we have this mock enum. Only +// one field should ever be non-nil, and it should always be clear which field +// is readable. For example, the caller of dispatcher.call(HealthCheck { .. }) +// should only read the healthcheck field. +type MonitorResult struct { + Result *api.DownscaleResult + Confirmation *api.UpscaleConfirmation + HealthCheck *api.HealthCheck +} + +// The Dispatcher is the main object managing the websocket connection to the +// monitor. For more information on the protocol, see pkg/api/types.go +type Dispatcher struct { + // The underlying connection we are managing + conn *websocket.Conn + + // When someone sends a message, the dispatcher will attach a transaction id + // to it so that it knows when a response is back. When it receives a message + // with the same transaction id, it knows that that is the repsonse to the original + // message and will send it down the SignalSender so the original sender can use it. + waiters map[uint64]util.SignalSender[waiterResult] + + // lock guards mutating the waiters, exitError, and (closing) exitSignal field. + // conn and lastTransactionID are all thread safe. + // runner, exit, and protoVersion are never modified. + lock sync.Mutex + + // The runner that this dispatcher is part of + runner *Runner + + exit func(status websocket.StatusCode, err error) + + exitError error + exitSignal chan struct{} + + // lastTransactionID is the last transaction id. When we need a new one + // we simply bump it and take the new number. + // + // In order to prevent collisions between the IDs generated here vs by + // the monitor, we only generate even IDs, and the monitor only generates + // odd ones. So generating a new value is done by adding 2. + lastTransactionID atomic.Uint64 + + protoVersion api.MonitorProtoVersion +} + +type waiterResult struct { + err error + res *MonitorResult +} + +// Create a new Dispatcher, establishing a connection with the vm-monitor and setting up all the +// background threads to manage the connection. +func NewDispatcher( + ctx context.Context, + logger *zap.Logger, + addr string, + runner *Runner, + sendUpscaleRequested func(request api.MoreResources, withLock func()), +) (_finalDispatcher *Dispatcher, _ error) { + // Create a new root-level context for this Dispatcher so that we can cancel if need be + ctx, cancelRootContext := context.WithCancel(ctx) + defer func() { + // cancel on failure or panic + if _finalDispatcher == nil { + cancelRootContext() + } + }() + + connectTimeout := time.Second * time.Duration(runner.global.config.Monitor.ConnectionTimeoutSeconds) + conn, protoVersion, err := connectToMonitor(ctx, logger, addr, connectTimeout) + if err != nil { + return nil, err + } + + disp := &Dispatcher{ + conn: conn, + waiters: make(map[uint64]util.SignalSender[waiterResult]), + runner: runner, + lock: sync.Mutex{}, + exit: nil, // set below + exitError: nil, + exitSignal: make(chan struct{}), + lastTransactionID: atomic.Uint64{}, // Note: initialized to 0, so it's even, as required. + protoVersion: *protoVersion, + } + disp.exit = func(status websocket.StatusCode, err error) { + disp.lock.Lock() + defer disp.lock.Unlock() + + if disp.Exited() { + return + } + + close(disp.exitSignal) + disp.exitError = err + cancelRootContext() + + var closeReason string + if err != nil { + closeReason = err.Error() + } else { + closeReason = "normal exit" + } + + // Run the actual websocket closing in a separate goroutine so we don't block while holding + // the lock. It can take up to 10s to close: + // + // > [Close] will write a WebSocket close frame with a timeout of 5s and then wait 5s for + // > the peer to send a close frame. + // + // This *potentially* runs us into race issues, but those are probably less bad to deal + // with, tbh. + go disp.conn.Close(status, closeReason) + } + + go func() { + <-ctx.Done() + disp.exit(websocket.StatusNormalClosure, nil) + }() + + msgHandlerLogger := logger.Named("message-handler") + runner.spawnBackgroundWorker(ctx, msgHandlerLogger, "vm-monitor message handler", func(c context.Context, l *zap.Logger) { + disp.run(c, l, sendUpscaleRequested) + }) + runner.spawnBackgroundWorker(ctx, logger.Named("health-checks"), "vm-monitor health checks", func(ctx context.Context, logger *zap.Logger) { + timeout := time.Second * time.Duration(runner.global.config.Monitor.ResponseTimeoutSeconds) + // FIXME: make this duration configurable + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + // if we've had sequential failures for more than + var firstSequentialFailure *time.Time + continuedFailureAbortTimeout := time.Second * time.Duration(runner.global.config.Monitor.MaxHealthCheckSequentialFailuresSeconds) + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + + _, err := disp.Call(ctx, logger, timeout, "HealthCheck", api.HealthCheck{}) + if err != nil { + logger.Warn("vm-monitor health check failed", zap.Error(err)) + + if firstSequentialFailure == nil { + now := time.Now() + firstSequentialFailure = &now + } else if since := time.Since(*firstSequentialFailure); since > continuedFailureAbortTimeout { + err := fmt.Errorf("vm-monitor has been failing health checks for at least %s", continuedFailureAbortTimeout) + logger.Error(fmt.Sprintf("%s, triggering connection restart", err.Error())) + disp.exit(websocket.StatusInternalError, err) + } + } else { + // health check was successful, so reset the sequential failures count + firstSequentialFailure = nil + + runner.status.update(runner.global, func(s podStatus) podStatus { + now := time.Now() + s.lastSuccessfulMonitorComm = &now + return s + }) + } + } + }) + return disp, nil +} + +func connectToMonitor( + ctx context.Context, + logger *zap.Logger, + addr string, + timeout time.Duration, +) (_ *websocket.Conn, _ *api.MonitorProtoVersion, finalErr error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + logger.Info("Connecting to vm-monitor via websocket", zap.String("addr", addr)) + + // We do not need to close the response body according to docs. + // Doing so causes memory bugs. + c, _, err := websocket.Dial(ctx, addr, nil) //nolint:bodyclose // see comment above + if err != nil { + return nil, nil, fmt.Errorf("error establishing websocket connection to %s: %w", addr, err) + } + + // If we return early, make sure we close the websocket + var failureReason websocket.StatusCode + defer func() { + if finalErr != nil { + if failureReason == 0 { + failureReason = websocket.StatusInternalError + } + c.Close(failureReason, finalErr.Error()) + } + }() + + versionRange := api.VersionRange[api.MonitorProtoVersion]{ + Min: MinMonitorProtocolVersion, + Max: MaxMonitorProtocolVersion, + } + logger.Info("Sending protocol version range", zap.Any("range", versionRange)) + + // Figure out protocol version + err = wsjson.Write(ctx, c, versionRange) + if err != nil { + return nil, nil, fmt.Errorf("error sending protocol range to monitor: %w", err) + } + + logger.Info("Reading monitor version response") + var resp api.MonitorProtocolResponse + err = wsjson.Read(ctx, c, &resp) + if err != nil { + logger.Error("Failed to read monitor response", zap.Error(err)) + failureReason = websocket.StatusProtocolError + return nil, nil, fmt.Errorf("Error reading vm-monitor response during protocol handshake: %w", err) + } + + logger.Info("Got monitor version response", zap.Any("response", resp)) + if resp.Error != nil { + logger.Error("Got error response from vm-monitor", zap.Any("response", resp), zap.String("error", *resp.Error)) + failureReason = websocket.StatusProtocolError + return nil, nil, fmt.Errorf("Monitor returned error during protocol handshake: %q", *resp.Error) + } + + logger.Info("negotiated protocol version with monitor", zap.Any("response", resp), zap.String("version", resp.Version.String())) + return c, &resp.Version, nil +} + +// ExitSignal returns a channel that is closed when the Dispatcher is no longer running +func (disp *Dispatcher) ExitSignal() <-chan struct{} { + return disp.exitSignal +} + +// Exited returns whether the Dispatcher is no longer running +// +// Exited will return true iff the channel returned by ExitSignal is closed. +func (disp *Dispatcher) Exited() bool { + select { + case <-disp.exitSignal: + return true + default: + return false + } +} + +// ExitError returns the error that caused the dispatcher to exit, if there was one +func (disp *Dispatcher) ExitError() error { + disp.lock.Lock() + defer disp.lock.Unlock() + return disp.exitError +} + +// Send a message down the connection. Only call this method with types that +// SerializeMonitorMessage can handle. +func (disp *Dispatcher) send(ctx context.Context, logger *zap.Logger, id uint64, message any) error { + data, err := api.SerializeMonitorMessage(message, id) + if err != nil { + return fmt.Errorf("error serializing message: %w", err) + } + // wsjson.Write serializes whatever is passed in, and go serializes []byte + // by base64 encoding it, so use RawMessage to avoid serializing to []byte + // (done by SerializeMonitorMessage), and then base64 encoding again + raw := json.RawMessage(data) + logger.Info("sending message to monitor", zap.ByteString("message", raw)) + return wsjson.Write(ctx, disp.conn, &raw) +} + +// registerWaiter registers a util.SignalSender to get notified when a +// message with the given id arrives. +func (disp *Dispatcher) registerWaiter(id uint64, sender util.SignalSender[waiterResult]) { + disp.lock.Lock() + defer disp.lock.Unlock() + disp.waiters[id] = sender +} + +// unregisterWaiter deletes a preexisting waiter without interacting with it. +func (disp *Dispatcher) unregisterWaiter(id uint64) { + disp.lock.Lock() + defer disp.lock.Unlock() + delete(disp.waiters, id) +} + +// Make a request to the monitor and wait for a response. The value passed as message must be a +// valid value to send to the monitor. See the docs for SerializeMonitorMessage for more. +// +// This function must NOT be called while holding disp.runner.lock. +func (disp *Dispatcher) Call( + ctx context.Context, + logger *zap.Logger, + timeout time.Duration, + messageType string, + message any, +) (*MonitorResult, error) { + id := disp.lastTransactionID.Add(2) + sender, receiver := util.NewSingleSignalPair[waiterResult]() + + status := "internal error" + defer func() { + disp.runner.global.metrics.monitorRequestsOutbound.WithLabelValues(messageType, status).Inc() + }() + + // register the waiter *before* sending, so that we avoid a potential race where we'd get a + // reply to the message before being ready to receive it. + disp.registerWaiter(id, sender) + err := disp.send(ctx, logger, id, message) + if err != nil { + logger.Error("failed to send message", zap.Any("message", message), zap.Error(err)) + disp.unregisterWaiter(id) + status = "[error: failed to send]" + return nil, err + } + + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case result := <-receiver.Recv(): + if result.err != nil { + status = fmt.Sprintf("[error: %s]", result.err) + return nil, errors.New("monitor experienced an internal error") + } + + status = "ok" + return result.res, nil + case <-timer.C: + err := fmt.Errorf("timed out waiting %v for monitor response", timeout) + disp.unregisterWaiter(id) + status = "[error: timed out waiting for response]" + return nil, err + } +} + +func extractField[T any](data map[string]interface{}, key string) (*T, error) { + field, ok := data[key] + if !ok { + return nil, fmt.Errorf("data had no key %q", key) + } + + coerced, ok := field.(T) + if !ok { + return nil, fmt.Errorf("data[%q] was not of type %T", key, *new(T)) + } + + return &coerced, nil +} + +type messageHandlerFuncs struct { + handleUpscaleRequest func(api.UpscaleRequest) + handleUpscaleConfirmation func(api.UpscaleConfirmation, uint64) error + handleDownscaleResult func(api.DownscaleResult, uint64) error + handleMonitorError func(api.InternalError, uint64) error + handleHealthCheck func(api.HealthCheck, uint64) error +} + +// Handle messages from the monitor. Make sure that all message types the monitor +// can send are included in the inner switch statement. +func (disp *Dispatcher) HandleMessage( + ctx context.Context, + logger *zap.Logger, + handlers messageHandlerFuncs, +) error { + // Deserialization has several steps: + // 1. Deserialize into an unstructured map[string]interface{} + // 2. Read the `type` field to know the type of the message + // 3. Then try to to deserialize again, but into that specific type + // 4. All message also come with an integer id under the key `id` + + // wsjson.Read tries to deserialize the message. If we were to read to a + // []byte, it would base64 encode it as part of deserialization. json.RawMessage + // avoids this, and we manually deserialize later + var message json.RawMessage + if err := wsjson.Read(ctx, disp.conn, &message); err != nil { + return fmt.Errorf("Error receiving message: %w", err) + } + logger.Info("(pre-decoding): received a message", zap.ByteString("message", message)) + + var unstructured map[string]interface{} + if err := json.Unmarshal(message, &unstructured); err != nil { + return fmt.Errorf("Error deserializing message: %q", string(message)) + } + + typeStr, err := extractField[string](unstructured, "type") + if err != nil { + return fmt.Errorf("Error extracting 'type' field: %w", err) + } + + // go thinks all json numbers are float64 so we first deserialize to that to + // avoid the type error, then cast to uint64 + f, err := extractField[float64](unstructured, "id") + if err != nil { + return fmt.Errorf("Error extracting 'id field: %w", err) + } + id := uint64(*f) + + var rootErr error + + // now that we have the waiter's ID, make sure that if there's some failure past this point, we + // propagate that along to the monitor and remove it + defer func() { + // speculatively determine the root error, to send that along to the instance of Call + // waiting for it. + var err error + + panicPayload := recover() + if panicPayload != nil { + err = errors.New("panicked") + } else if rootErr != nil { + err = rootErr + } else { + // if HandleMessage bailed without panicking or setting rootErr, but *also* without + // sending a message to the waiter, we should make sure that *something* gets sent, so + // the message doesn't just time out. But we don't have more information, so the error + // is still just "unknown". + err = errors.New("unknown") + } + + disp.lock.Lock() + defer disp.lock.Unlock() + if sender, ok := disp.waiters[id]; ok { + sender.Send(waiterResult{err: err, res: nil}) + delete(disp.waiters, id) + } else if rootErr != nil { + // we had some error while handling the message with this ID, and there wasn't a + // corresponding waiter. We should make note of this in the metrics: + status := fmt.Sprintf("[error: %s]", rootErr) + disp.runner.global.metrics.monitorRequestsInbound.WithLabelValues(*typeStr, status) + } + + // resume panicking if we were before + if panicPayload != nil { + panic(panicPayload) + } + }() + + // Helper function to handle common unmarshalling logic + unmarshal := func(value any) error { + if err := json.Unmarshal(message, value); err != nil { + rootErr = errors.New("Failed unmarshaling JSON") + err := fmt.Errorf("Error unmarshaling %s: %w", *typeStr, err) + logger.Error(rootErr.Error(), zap.Error(err)) + // we're already on the error path anyways + _ = disp.send(ctx, logger, id, api.InvalidMessage{Error: err.Error()}) + return err + } + + return nil + } + + switch *typeStr { + case "UpscaleRequest": + var req api.UpscaleRequest + if err := unmarshal(&req); err != nil { + return err + } + handlers.handleUpscaleRequest(req) + return nil + case "UpscaleConfirmation": + var confirmation api.UpscaleConfirmation + if err := unmarshal(&confirmation); err != nil { + return err + } + return handlers.handleUpscaleConfirmation(confirmation, id) + case "DownscaleResult": + var res api.DownscaleResult + if err := unmarshal(&res); err != nil { + return err + } + return handlers.handleDownscaleResult(res, id) + case "InternalError": + var monitorErr api.InternalError + if err := unmarshal(&monitorErr); err != nil { + return err + } + return handlers.handleMonitorError(monitorErr, id) + case "HealthCheck": + var healthCheck api.HealthCheck + if err := unmarshal(&healthCheck); err != nil { + return err + } + return handlers.handleHealthCheck(healthCheck, id) + case "InvalidMessage": + var warning api.InvalidMessage + if err := unmarshal(&warning); err != nil { + return err + } + logger.Warn("Received notification we sent an invalid message", zap.Any("warning", warning)) + return nil + default: + rootErr = errors.New("Received unknown message type") + return disp.send( + ctx, + logger, + id, + api.InvalidMessage{Error: fmt.Sprintf("Received message of unknown type: %q", *typeStr)}, + ) + } +} + +// Long running function that orchestrates all requests/responses. +func (disp *Dispatcher) run(ctx context.Context, logger *zap.Logger, upscaleRequester func(_ api.MoreResources, withLock func())) { + logger.Info("Starting message handler") + + // Utility for logging + returning an error when we get a message with an + // id we're unaware of. Note: unknownMessage is not a message type. + handleUnkownMessage := func(messageType string, id uint64) error { + fmtString := "Received %s with id %d but no record of previous message with that id" + msg := fmt.Sprintf(fmtString, messageType, id) + logger.Warn(msg, zap.Uint64("id", id)) + return disp.send(ctx, logger, id, api.InvalidMessage{Error: msg}) + } + + // Does not take a message id because we don't know when the agent will + // upscale. The monitor will get the result back as a NotifyUpscale message + // from us, with a new id. + handleUpscaleRequest := func(req api.UpscaleRequest) { + // TODO: it shouldn't be this function's responsibility to update metrics. + defer func() { + disp.runner.global.metrics.monitorRequestsInbound.WithLabelValues("UpscaleRequest", "ok") + }() + + resourceReq := api.MoreResources{ + Cpu: false, + Memory: true, + } + + upscaleRequester(resourceReq, func() { + logger.Info("Updating requested upscale", zap.Any("requested", resourceReq)) + }) + } + handleUpscaleConfirmation := func(_ api.UpscaleConfirmation, id uint64) error { + disp.lock.Lock() + defer disp.lock.Unlock() + + sender, ok := disp.waiters[id] + if ok { + logger.Info("vm-monitor confirmed upscale", zap.Uint64("id", id)) + sender.Send(waiterResult{ + err: nil, + res: &MonitorResult{ + Confirmation: &api.UpscaleConfirmation{}, + Result: nil, + HealthCheck: nil, + }, + }) + // Don't forget to delete the waiter + delete(disp.waiters, id) + return nil + } else { + return handleUnkownMessage("UpscaleConfirmation", id) + } + } + handleDownscaleResult := func(res api.DownscaleResult, id uint64) error { + disp.lock.Lock() + defer disp.lock.Unlock() + + sender, ok := disp.waiters[id] + if ok { + logger.Info("vm-monitor returned downscale result", zap.Uint64("id", id), zap.Any("result", res)) + sender.Send(waiterResult{ + err: nil, + res: &MonitorResult{ + Result: &res, + Confirmation: nil, + HealthCheck: nil, + }, + }) + // Don't forget to delete the waiter + delete(disp.waiters, id) + return nil + } else { + return handleUnkownMessage("DownscaleResult", id) + } + } + handleMonitorError := func(err api.InternalError, id uint64) error { + disp.lock.Lock() + defer disp.lock.Unlock() + + sender, ok := disp.waiters[id] + if ok { + logger.Warn( + "vm-monitor experienced an internal error", + zap.Uint64("id", id), + zap.String("error", err.Error), + ) + // Indicate to the receiver that an error occured + sender.Send(waiterResult{ + err: errors.New("vm-monitor internal error"), + res: nil, + }) + // Don't forget to delete the waiter + delete(disp.waiters, id) + return nil + } else { + return handleUnkownMessage("MonitorError", id) + } + } + handleHealthCheck := func(confirmation api.HealthCheck, id uint64) error { + disp.lock.Lock() + defer disp.lock.Unlock() + + sender, ok := disp.waiters[id] + if ok { + logger.Info("vm-monitor responded to health check", zap.Uint64("id", id)) + // Indicate to the receiver that an error occured + sender.Send(waiterResult{ + err: nil, + res: &MonitorResult{ + HealthCheck: &api.HealthCheck{}, + Result: nil, + Confirmation: nil, + }, + }) + // Don't forget to delete the waiter + delete(disp.waiters, id) + return nil + } else { + return handleUnkownMessage("HealthCheck", id) + } + } + + handlers := messageHandlerFuncs{ + handleUpscaleRequest: handleUpscaleRequest, + handleUpscaleConfirmation: handleUpscaleConfirmation, + handleDownscaleResult: handleDownscaleResult, + handleMonitorError: handleMonitorError, + handleHealthCheck: handleHealthCheck, + } + + for { + err := disp.HandleMessage(ctx, logger, handlers) + if err != nil { + if ctx.Err() != nil { + // The context is already cancelled, so this error is mostly likely + // expected. For example, if the context is cancelled because the + // runner exited, we should expect to fail to read off the connection, + // which is closed by the server exit. + logger.Warn("Error handling message", zap.Error(err)) + } else { + logger.Error("Error handling message, shutting down connection", zap.Error(err)) + err = fmt.Errorf("Error handling message: %w", err) + // note: in theory we *could* be more descriptive with these statuses, but the only + // consumer of this API is the vm-monitor, and it doesn't check those. + disp.exit(websocket.StatusInternalError, err) + } + return + } + } +} diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go index 9eba23565..50d56a64c 100644 --- a/pkg/agent/execbridge.go +++ b/pkg/agent/execbridge.go @@ -16,9 +16,9 @@ import ( ) var ( - _ executor.PluginInterface = (*execPluginInterface)(nil) - _ executor.NeonVMInterface = (*execNeonVMInterface)(nil) - _ executor.InformantInterface = (*execInformantInterface)(nil) + _ executor.PluginInterface = (*execPluginInterface)(nil) + _ executor.NeonVMInterface = (*execNeonVMInterface)(nil) + _ executor.MonitorInterface = (*execMonitorInterface)(nil) ) ///////////////////////////////////////////////////////////// @@ -119,46 +119,53 @@ func (iface *execNeonVMInterface) Request(ctx context.Context, logger *zap.Logge } //////////////////////////////////////////////////// -// Informant-related interface and implementation // +// Monitor-related interface and implementation // //////////////////////////////////////////////////// -type execInformantInterface struct { - runner *Runner - core *executor.ExecutorCore +type execMonitorInterface struct { + runner *Runner + core *executor.ExecutorCore + requestLock util.ChanMutex } -func makeInformantInterface(r *Runner, core *executor.ExecutorCore) *execInformantInterface { - return &execInformantInterface{runner: r, core: core} +func makeMonitorInterface(r *Runner, core *executor.ExecutorCore) *execMonitorInterface { + return &execMonitorInterface{runner: r, core: core, requestLock: util.NewChanMutex()} } -// EmptyID implements executor.InformantInterface -func (iface *execInformantInterface) EmptyID() string { +// EmptyID implements executor.MonitorInterface +func (iface *execMonitorInterface) EmptyID() string { return "" } -func (iface *execInformantInterface) GetHandle() executor.InformantHandle { - server := iface.runner.server.Load() +func (iface *execMonitorInterface) GetHandle() executor.MonitorHandle { + dispatcher := iface.runner.monitor.Load() - if server == nil || server.ExitStatus() != nil { + if dispatcher == nil || dispatcher.Exited() { return nil } - return &execInformantHandle{server: server} + return &execMonitorHandle{ + runner: iface.runner, + dispatcher: dispatcher, + requestLock: iface.requestLock, + } } -type execInformantHandle struct { - server *InformantServer +type execMonitorHandle struct { + runner *Runner + dispatcher *Dispatcher + requestLock util.ChanMutex } -func (h *execInformantHandle) ID() string { - return h.server.desc.AgentID.String() +func (h *execMonitorHandle) ID() string { + panic("todo") } -func (h *execInformantHandle) RequestLock() util.ChanMutex { - return h.server.requestLock +func (h *execMonitorHandle) RequestLock() util.ChanMutex { + return h.requestLock } -func (h *execInformantHandle) Downscale( +func (h *execMonitorHandle) Downscale( ctx context.Context, logger *zap.Logger, current api.Resources, @@ -167,33 +174,33 @@ func (h *execInformantHandle) Downscale( // Check validity of the message we're sending if target.HasFieldGreaterThan(current) { innerMsg := fmt.Errorf("%+v has field greater than %+v", target, current) - panic(fmt.Errorf("(*execInformantHandle).Downscale() called with target greater than current: %w", innerMsg)) + panic(fmt.Errorf("(*execMonitorHandle).Downscale() called with target greater than current: %w", innerMsg)) } - h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantRequestedChange) + h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorRequestedChange) - result, err := h.server.Downscale(ctx, logger, target) + result, err := doMonitorDownscale(ctx, logger, h.dispatcher, target) if err != nil && result.Ok { - h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantApprovedChange) + h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorApprovedChange) } return result, err } -func (h *execInformantHandle) Upscale(ctx context.Context, logger *zap.Logger, current, target api.Resources) error { +func (h *execMonitorHandle) Upscale(ctx context.Context, logger *zap.Logger, current, target api.Resources) error { // Check validity of the message we're sending if target.HasFieldLessThan(current) { innerMsg := fmt.Errorf("%+v has field less than %+v", target, current) - panic(fmt.Errorf("(*execInformantHandle).Upscale() called with target less than current: %w", innerMsg)) + panic(fmt.Errorf("(*execMonitorHandle).Upscale() called with target less than current: %w", innerMsg)) } - h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantRequestedChange) + h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorRequestedChange) - err := h.server.Upscale(ctx, logger, target) + err := doMonitorUpscale(ctx, logger, h.dispatcher, target) if err != nil { - h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantApprovedChange) + h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorApprovedChange) } return err diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go index 5ae6cfd10..81c9573ec 100644 --- a/pkg/agent/executor/core.go +++ b/pkg/agent/executor/core.go @@ -31,9 +31,9 @@ type ExecutorCore struct { } type ClientSet struct { - Plugin PluginInterface - NeonVM NeonVMInterface - Informant InformantInterface + Plugin PluginInterface + NeonVM NeonVMInterface + Monitor MonitorInterface } func NewExecutorCore(stateLogger *zap.Logger, vm api.VmInfo, config core.Config) *ExecutorCore { @@ -123,33 +123,23 @@ func (c ExecutorCoreUpdater) SchedulerGone(withLock func()) { }) } -func (c ExecutorCoreUpdater) ResetInformant(withLock func()) { +func (c ExecutorCoreUpdater) ResetMonitor(withLock func()) { c.core.update(func(state *core.State) { - state.Informant().Reset() + state.Monitor().Reset() withLock() }) } func (c ExecutorCoreUpdater) UpscaleRequested(resources api.MoreResources, withLock func()) { c.core.update(func(state *core.State) { - state.Informant().UpscaleRequested(time.Now(), resources) + state.Monitor().UpscaleRequested(time.Now(), resources) withLock() }) } -func (c ExecutorCoreUpdater) InformantRegistered(active bool, withLock func()) { +func (c ExecutorCoreUpdater) MonitorActive(active bool, withLock func()) { c.core.update(func(state *core.State) { - state.Informant().SuccessfullyRegistered() - if active { - state.Informant().Active(active) - } - withLock() - }) -} - -func (c ExecutorCoreUpdater) InformantActive(active bool, withLock func()) { - c.core.update(func(state *core.State) { - state.Informant().Active(active) + state.Monitor().Active(active) withLock() }) } diff --git a/pkg/agent/executor/exec_informant.go b/pkg/agent/executor/exec_monitor.go similarity index 66% rename from pkg/agent/executor/exec_informant.go rename to pkg/agent/executor/exec_monitor.go index 6f758d079..817a6213c 100644 --- a/pkg/agent/executor/exec_informant.go +++ b/pkg/agent/executor/exec_monitor.go @@ -12,19 +12,19 @@ import ( "github.com/neondatabase/autoscaling/pkg/util" ) -type InformantInterface interface { +type MonitorInterface interface { EmptyID() string - GetHandle() InformantHandle + GetHandle() MonitorHandle } -type InformantHandle interface { +type MonitorHandle interface { ID() string RequestLock() util.ChanMutex Downscale(_ context.Context, _ *zap.Logger, current, target api.Resources) (*api.DownscaleResult, error) Upscale(_ context.Context, _ *zap.Logger, current, target api.Resources) error } -func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, logger *zap.Logger) { +func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logger *zap.Logger) { var ( updates util.BroadcastReceiver = c.updates.NewReceiver() requestLock util.ChanMutex = util.NewChanMutex() @@ -42,10 +42,10 @@ func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, log // meant to be called while holding c's lock idUnchanged := func(current string) bool { - if h := c.clients.Informant.GetHandle(); h != nil { + if h := c.clients.Monitor.GetHandle(); h != nil { return current == h.ID() } else { - return current == c.clients.Informant.EmptyID() + return current == c.clients.Monitor.EmptyID() } } @@ -63,7 +63,7 @@ func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, log } // Wait until we're supposed to make a request. - if last.actions.InformantDownscale == nil { + if last.actions.MonitorDownscale == nil { select { case <-ctx.Done(): return @@ -73,12 +73,12 @@ func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, log } } - action := *last.actions.InformantDownscale + action := *last.actions.MonitorDownscale - informant := c.clients.Informant.GetHandle() + monitor := c.clients.Monitor.GetHandle() - if informant != nil { - requestLock = informant.RequestLock() + if monitor != nil { + requestLock = monitor.RequestLock() // Try to acquire the request lock, but if something happens while we're waiting, we'll // abort & retry on the next loop iteration (or maybe not, if last.actions changed). @@ -95,16 +95,16 @@ func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, log var startTime time.Time c.update(func(state *core.State) { - logger.Info("Starting informant downscale request", zap.Any("action", action)) + logger.Info("Starting vm-monitor downscale request", zap.Any("action", action)) startTime = time.Now() - state.Informant().StartingDownscaleRequest(startTime) + state.Monitor().StartingDownscaleRequest(startTime) }) - result, err := doSingleInformantDownscaleRequest(ctx, ifaceLogger, informant, action) + result, err := doSingleMonitorDownscaleRequest(ctx, ifaceLogger, monitor, action) endTime := time.Now() c.update(func(state *core.State) { - unchanged := idUnchanged(informant.ID()) + unchanged := idUnchanged(monitor.ID()) logFields := []zap.Field{ zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime)), @@ -112,9 +112,9 @@ func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, log } if err != nil { - logger.Error("Informant downscale request failed", append(logFields, zap.Error(err))...) + logger.Error("vm-monitor downscale request failed", append(logFields, zap.Error(err))...) if unchanged { - state.Informant().DownscaleRequestFailed(endTime) + state.Monitor().DownscaleRequestFailed(endTime) } return } @@ -122,34 +122,34 @@ func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, log logFields = append(logFields, zap.Any("response", result)) if !result.Ok { - logger.Warn("Informant denied downscale", logFields...) + logger.Warn("vm-monitor denied downscale", logFields...) if unchanged { - state.Informant().DownscaleRequestDenied(endTime, action.Target) + state.Monitor().DownscaleRequestDenied(endTime, action.Target) } } else { - logger.Info("Informant approved downscale", logFields...) + logger.Info("vm-monitor approved downscale", logFields...) if unchanged { - state.Informant().DownscaleRequestAllowed(endTime, action.Target) + state.Monitor().DownscaleRequestAllowed(endTime, action.Target) } } }) } } -func doSingleInformantDownscaleRequest( +func doSingleMonitorDownscaleRequest( ctx context.Context, logger *zap.Logger, - iface InformantHandle, - action core.ActionInformantDownscale, + iface MonitorHandle, + action core.ActionMonitorDownscale, ) (*api.DownscaleResult, error) { if iface == nil { - return nil, errors.New("No currently active informant") + return nil, errors.New("No currently active vm-monitor connection") } return iface.Downscale(ctx, logger, action.Current, action.Target) } -func (c *ExecutorCoreWithClients) DoInformantUpscales(ctx context.Context, logger *zap.Logger) { +func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger *zap.Logger) { var ( updates util.BroadcastReceiver = c.updates.NewReceiver() requestLock util.ChanMutex = util.NewChanMutex() @@ -167,10 +167,10 @@ func (c *ExecutorCoreWithClients) DoInformantUpscales(ctx context.Context, logge // meant to be called while holding c's lock idUnchanged := func(current string) bool { - if h := c.clients.Informant.GetHandle(); h != nil { + if h := c.clients.Monitor.GetHandle(); h != nil { return current == h.ID() } else { - return current == c.clients.Informant.EmptyID() + return current == c.clients.Monitor.EmptyID() } } @@ -188,7 +188,7 @@ func (c *ExecutorCoreWithClients) DoInformantUpscales(ctx context.Context, logge } // Wait until we're supposed to make a request. - if last.actions.InformantUpscale == nil { + if last.actions.MonitorUpscale == nil { select { case <-ctx.Done(): return @@ -198,12 +198,12 @@ func (c *ExecutorCoreWithClients) DoInformantUpscales(ctx context.Context, logge } } - action := *last.actions.InformantUpscale + action := *last.actions.MonitorUpscale - informant := c.clients.Informant.GetHandle() + monitor := c.clients.Monitor.GetHandle() - if informant != nil { - requestLock = informant.RequestLock() + if monitor != nil { + requestLock = monitor.RequestLock() // Try to acquire the request lock, but if something happens while we're waiting, we'll // abort & retry on the next loop iteration (or maybe not, if last.actions changed). @@ -220,16 +220,16 @@ func (c *ExecutorCoreWithClients) DoInformantUpscales(ctx context.Context, logge var startTime time.Time c.update(func(state *core.State) { - logger.Info("Starting informant upscale request", zap.Any("action", action)) + logger.Info("Starting vm-monitor upscale request", zap.Any("action", action)) startTime = time.Now() - state.Informant().StartingUpscaleRequest(startTime) + state.Monitor().StartingUpscaleRequest(startTime) }) - err := doSingleInformantUpscaleRequest(ctx, ifaceLogger, informant, action) + err := doSingleMonitorUpscaleRequest(ctx, ifaceLogger, monitor, action) endTime := time.Now() c.update(func(state *core.State) { - unchanged := idUnchanged(informant.ID()) + unchanged := idUnchanged(monitor.ID()) logFields := []zap.Field{ zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime)), @@ -237,29 +237,29 @@ func (c *ExecutorCoreWithClients) DoInformantUpscales(ctx context.Context, logge } if err != nil { - logger.Error("Informant upscale request failed", append(logFields, zap.Error(err))...) + logger.Error("vm-monitor upscale request failed", append(logFields, zap.Error(err))...) if unchanged { - state.Informant().UpscaleRequestFailed(endTime) + state.Monitor().UpscaleRequestFailed(endTime) } return } - logger.Info("Informant upscale request successful", logFields...) + logger.Info("vm-monitor upscale request successful", logFields...) if unchanged { - state.Informant().UpscaleRequestSuccessful(endTime, action.Target) + state.Monitor().UpscaleRequestSuccessful(endTime, action.Target) } }) } } -func doSingleInformantUpscaleRequest( +func doSingleMonitorUpscaleRequest( ctx context.Context, logger *zap.Logger, - iface InformantHandle, - action core.ActionInformantUpscale, + iface MonitorHandle, + action core.ActionMonitorUpscale, ) error { if iface == nil { - return errors.New("No currently active informant") + return errors.New("No currently active vm-monitor connection") } return iface.Upscale(ctx, logger, action.Current, action.Target) diff --git a/pkg/agent/globalstate.go b/pkg/agent/globalstate.go index db877e634..34e17996a 100644 --- a/pkg/agent/globalstate.go +++ b/pkg/agent/globalstate.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "strconv" "sync" "sync/atomic" "time" @@ -115,14 +116,22 @@ func (s *agentState) handleEvent(ctx context.Context, logger *zap.Logger, event switch event.kind { case vmEventDeleted: state.stop() - delete(s.pods, podName) + // mark the status as deleted, so that it gets removed from metrics. + state.status.update(s, func(stat podStatus) podStatus { + stat.deleted = true + delete(s.pods, podName) // Do the removal while synchronized, because we can :) + return stat + }) case vmEventUpdated: - state.status.mu.Lock() - defer state.status.mu.Unlock() - - state.status.vmInfo = event.vmInfo - state.status.endpointID = event.endpointID - state.vmInfoUpdated.Send() + state.status.update(s, func(stat podStatus) podStatus { + now := time.Now() + stat.vmInfo = event.vmInfo + stat.endpointID = event.endpointID + stat.endpointAssignedAt = &now + state.vmInfoUpdated.Send() + + return stat + }) case vmEventAdded: s.handleVMEventAdded(ctx, event, podName) default: @@ -137,17 +146,28 @@ func (s *agentState) handleVMEventAdded( ) { runnerCtx, cancelRunnerContext := context.WithCancel(ctx) - status := &podStatus{ - mu: sync.Mutex{}, - endState: nil, - previousEndStates: nil, - vmInfo: event.vmInfo, - endpointID: event.endpointID, - - startTime: time.Now(), - lastSuccessfulInformantComm: nil, + now := time.Now() + + status := &lockedPodStatus{ + mu: sync.Mutex{}, + podStatus: podStatus{ + deleted: false, + endState: nil, + previousEndStates: nil, + vmInfo: event.vmInfo, + endpointID: event.endpointID, + endpointAssignedAt: &now, + state: "", // Explicitly set state to empty so that the initial state update does no decrement + stateUpdatedAt: now, + + startTime: now, + lastSuccessfulMonitorComm: nil, + }, } + // Empty update to trigger updating metrics and state. + status.update(s, func(s podStatus) podStatus { return s }) + restartCount := 0 runner := s.newRunner(event.vmInfo, podName, event.podIP, restartCount) runner.status = status @@ -183,7 +203,7 @@ func (s *agentState) TriggerRestartIfNecessary(runnerCtx context.Context, logger // 2. Wait for a random amount of time (between RunnerRestartMinWaitSeconds and RunnerRestartMaxWaitSeconds) // 3. Restart the Runner (if it still should be restarted) - status, ok := func() (*podStatus, bool) { + status, ok := func() (*lockedPodStatus, bool) { s.lock.Lock() defer s.lock.Unlock() // note: pod.status has a separate lock, so we're ok to release s.lock @@ -248,7 +268,7 @@ func (s *agentState) TriggerRestartIfNecessary(runnerCtx context.Context, logger r := util.NewTimeRange(time.Second, RunnerRestartMinWaitSeconds, RunnerRestartMaxWaitSeconds) waitDuration = r.Random() logger.Info( - "Runner was not runnign for long, restarting after delay", + "Runner was not running for long, restarting after delay", zap.Duration("totalRuntime", totalRuntime), zap.Duration("delay", waitDuration), ) @@ -287,34 +307,34 @@ func (s *agentState) TriggerRestartIfNecessary(runnerCtx context.Context, logger return } - pod.status.mu.Lock() - defer pod.status.mu.Unlock() - - // Runner was already restarted - if pod.status.endState == nil { - addedInfo := "this generally shouldn't happen, but could if there's a new pod with the same name" - logCancel(logger.Warn, fmt.Errorf("Runner was already restarted (%s)", addedInfo)) - return - } + pod.status.update(s, func(status podStatus) podStatus { + // Runner was already restarted + if status.endState == nil { + addedInfo := "this generally shouldn't happen, but could if there's a new pod with the same name" + logCancel(logger.Warn, fmt.Errorf("Runner was already restarted (%s)", addedInfo)) + return status + } - logger.Info("Restarting runner", zap.String("exitKind", string(exitKind)), zap.Duration("delay", time.Since(endTime))) - s.metrics.runnerRestarts.Inc() + logger.Info("Restarting runner", zap.String("exitKind", string(exitKind)), zap.Duration("delay", time.Since(endTime))) + s.metrics.runnerRestarts.Inc() - restartCount := len(pod.status.previousEndStates) + 1 - runner := s.newRunner(pod.status.vmInfo, podName, podIP, restartCount) - runner.status = pod.status + restartCount := len(status.previousEndStates) + 1 + runner := s.newRunner(status.vmInfo, podName, podIP, restartCount) + runner.status = pod.status - txVMUpdate, rxVMUpdate := util.NewCondChannelPair() - // note: pod is *podState, so we don't need to re-assign to the map. - pod.vmInfoUpdated = txVMUpdate - pod.runner = runner + txVMUpdate, rxVMUpdate := util.NewCondChannelPair() + // note: pod is *podState, so we don't need to re-assign to the map. + pod.vmInfoUpdated = txVMUpdate + pod.runner = runner - pod.status.previousEndStates = append(pod.status.previousEndStates, *pod.status.endState) - pod.status.endState = nil - pod.status.startTime = time.Now() + status.previousEndStates = append(status.previousEndStates, *status.endState) + status.endState = nil + status.startTime = time.Now() - runnerLogger := s.loggerForRunner(pod.status.vmInfo.NamespacedName(), podName) - runner.Spawn(runnerCtx, runnerLogger, rxVMUpdate) + runnerLogger := s.loggerForRunner(status.vmInfo.NamespacedName(), podName) + runner.Spawn(runnerCtx, runnerLogger, rxVMUpdate) + return status + }) }() } @@ -338,12 +358,10 @@ func (s *agentState) newRunner(vmInfo api.VmInfo, podName util.NamespacedName, p lastMetrics: nil, scheduler: atomic.Pointer[Scheduler]{}, - server: atomic.Pointer[InformantServer]{}, - informant: nil, + monitor: atomic.Pointer[Dispatcher]{}, computeUnit: nil, lastApproved: nil, lastSchedulerError: nil, - lastInformantError: nil, backgroundWorkerCount: atomic.Int64{}, backgroundPanic: make(chan error), @@ -355,7 +373,7 @@ type podState struct { stop context.CancelFunc runner *Runner - status *podStatus + status *lockedPodStatus vmInfoUpdated util.CondChannelSender } @@ -381,16 +399,23 @@ func (p *podState) dump(ctx context.Context) podStateDump { } } -type podStatus struct { +type lockedPodStatus struct { mu sync.Mutex + podStatus +} + +type podStatus struct { startTime time.Time + // if true, the corresponding podState is no longer included in the global pod map + deleted bool + // if non-nil, the runner is finished endState *podStatusEndState previousEndStates []podStatusEndState - lastSuccessfulInformantComm *time.Time + lastSuccessfulMonitorComm *time.Time // vmInfo stores the latest information about the VM, as given by the global VM watcher. // @@ -400,6 +425,12 @@ type podStatus struct { // endpointID, if non-empty, stores the ID of the endpoint associated with the VM endpointID string + + // NB: this value, once non-nil, is never changed. + endpointAssignedAt *time.Time + + state runnerMetricState + stateUpdatedAt time.Time } type podStatusDump struct { @@ -408,11 +439,15 @@ type podStatusDump struct { EndState *podStatusEndState `json:"endState"` PreviousEndStates []podStatusEndState `json:"previousEndStates"` - LastSuccessfulInformantComm *time.Time `json:"lastSuccessfulInformantComm"` + LastSuccessfulMonitorComm *time.Time `json:"lastSuccessfulMonitorComm"` VMInfo api.VmInfo `json:"vmInfo"` - EndpointID string `json:"endpointID"` + EndpointID string `json:"endpointID"` + EndpointAssignedAt *time.Time `json:"endpointAssignedAt"` + + State runnerMetricState `json:"state"` + StateUpdatedAt time.Time `json:"stateUpdatedAt"` } type podStatusEndState struct { @@ -431,21 +466,115 @@ const ( podStatusExitCanceled podStatusExitKind = "canceled" // top-down signal that the Runner should stop. ) -func (s *podStatus) informantIsUnhealthy(config *Config) bool { +func (s *lockedPodStatus) update(global *agentState, with func(podStatus) podStatus) { s.mu.Lock() defer s.mu.Unlock() - startupGracePeriod := time.Second * time.Duration(config.Informant.UnhealthyStartupGracePeriodSeconds) - unhealthySilencePeriod := time.Second * time.Duration(config.Informant.UnhealthyAfterSilenceDurationSeconds) + newStatus := with(s.podStatus) + now := time.Now() + + // Calculate the new state: + var newState runnerMetricState + if s.deleted { + // If deleted, don't change anything. + } else if s.endState != nil { + switch s.endState.ExitKind { + case podStatusExitCanceled: + // If canceled, don't change the state. + newState = s.state + case podStatusExitErrored: + newState = runnerMetricStateErrored + case podStatusExitPanicked: + newState = runnerMetricStatePanicked + } + } else if newStatus.monitorStuckAt(global.config).Before(now) { + newState = runnerMetricStateStuck + } else { + newState = runnerMetricStateOk + } + + if !newStatus.deleted { + newStatus.state = newState + newStatus.stateUpdatedAt = now + } - if s.lastSuccessfulInformantComm == nil { - return time.Since(s.startTime) >= startupGracePeriod + // Update the metrics: + // Note: s.state is initialized to the empty string to signify that it's not yet represented in + // the metrics. + if !s.deleted && s.state != "" { + oldIsEndpoint := strconv.FormatBool(s.endpointID != "") + global.metrics.runnersCount.WithLabelValues(oldIsEndpoint, string(s.state)).Dec() + } + + if !newStatus.deleted && newStatus.state != "" { + newIsEndpoint := strconv.FormatBool(newStatus.endpointID != "") + global.metrics.runnersCount.WithLabelValues(newIsEndpoint, string(newStatus.state)).Inc() + } + + s.podStatus = newStatus +} + +// monitorStuckAt returns the time at which the Runner will be marked "stuck" +func (s podStatus) monitorStuckAt(config *Config) time.Time { + startupGracePeriod := time.Second * time.Duration(config.Monitor.UnhealthyStartupGracePeriodSeconds) + unhealthySilencePeriod := time.Second * time.Duration(config.Monitor.UnhealthyAfterSilenceDurationSeconds) + + if s.lastSuccessfulMonitorComm == nil { + start := s.startTime + + // For endpoints, we should start the grace period from when the VM was *assigned* the + // endpoint, rather than when the VM was created. + if s.endpointID != "" { + start = *s.endpointAssignedAt + } + + return start.Add(startupGracePeriod) } else { - return time.Since(*s.lastSuccessfulInformantComm) >= unhealthySilencePeriod + return s.lastSuccessfulMonitorComm.Add(unhealthySilencePeriod) + } +} + +func (s *lockedPodStatus) periodicallyRefreshState(ctx context.Context, logger *zap.Logger, global *agentState) { + maxUpdateSeconds := util.Min( + global.config.Monitor.UnhealthyStartupGracePeriodSeconds, + global.config.Monitor.UnhealthyAfterSilenceDurationSeconds, + ) + // make maxTick a bit less than maxUpdateSeconds for the benefit of consistency and having + // relatively frequent log messages if things are stuck. + maxTick := time.Second * time.Duration(maxUpdateSeconds/2) + + timer := time.NewTimer(0) + defer timer.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + } + + // use s.update to trigger re-evaluating the metrics, and simultaneously reset the timer to + // the next point in time at which the state might have changed, so that we minimize the + // time between the VM meeting the conditions for being "stuck" and us recognizing it. + s.update(global, func(stat podStatus) podStatus { + stuckAt := stat.monitorStuckAt(global.config) + now := time.Now() + if stuckAt.Before(now) && stat.state != runnerMetricStateErrored && stat.state != runnerMetricStatePanicked { + if stat.endpointID != "" { + logger.Warn("Runner with endpoint is currently stuck", zap.String("endpointID", stat.endpointID)) + } else { + logger.Warn("Runner without endpoint is currently stuck") + } + timer.Reset(maxTick) + } else { + timer.Reset(util.Min(maxTick, stuckAt.Sub(now))) + } + return stat + }) } } -func (s *podStatus) dump() podStatusDump { +func (s *lockedPodStatus) dump() podStatusDump { s.mu.Lock() defer s.mu.Unlock() @@ -463,10 +592,14 @@ func (s *podStatus) dump() podStatusDump { PreviousEndStates: previousEndStates, // FIXME: api.VmInfo contains a resource.Quantity - is that safe to copy by value? - VMInfo: s.vmInfo, - EndpointID: s.endpointID, - StartTime: s.startTime, + VMInfo: s.vmInfo, + EndpointID: s.endpointID, + EndpointAssignedAt: s.endpointAssignedAt, // ok to share the pointer, because it's not updated + StartTime: s.startTime, + + State: s.state, + StateUpdatedAt: s.stateUpdatedAt, - LastSuccessfulInformantComm: s.lastSuccessfulInformantComm, + LastSuccessfulMonitorComm: s.lastSuccessfulMonitorComm, } } diff --git a/pkg/agent/informant.go b/pkg/agent/informant.go deleted file mode 100644 index 9b38895da..000000000 --- a/pkg/agent/informant.go +++ /dev/null @@ -1,1075 +0,0 @@ -package agent - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "io" - "net" - "net/http" - "strconv" - "strings" - "sync/atomic" - "time" - - "github.com/google/uuid" - "github.com/tychoish/fun/srv" - "go.uber.org/zap" - - "github.com/neondatabase/autoscaling/pkg/api" - "github.com/neondatabase/autoscaling/pkg/util" -) - -// The autoscaler-agent currently supports v1.0 to v2.0 of the agent<->informant protocol. -// -// If you update either of these values, make sure to also update VERSIONING.md. -const ( - MinInformantProtocolVersion api.InformantProtoVersion = api.InformantProtoV1_0 - MaxInformantProtocolVersion api.InformantProtoVersion = api.InformantProtoV2_0 -) - -type InformantServer struct { - // runner is the Runner currently responsible for this InformantServer. We must acquire its lock - // before making any updates to other fields of this struct - runner *Runner - - // desc is the AgentDesc describing this VM informant server. This field is immutable. - desc api.AgentDesc - - seqNum uint64 - // receivedIDCheck is true if the server has received at least one successful request at the /id - // endpoint by the expected IP address of the VM - // - // This field is used to check for protocol violations (i.e. responding to /register without - // checking with /id), and *may* help prevent certain IP-spoofing based attacks - although the - // security implications are entirely speculation. - receivedIDCheck bool - - // madeContact is true if any request to the VM informant could have interacted with it. - // - // If madeContact is false, then mode is guaranteed to be InformantServerUnconfirmed, so - // madeContact only needs to be set on /register requests (because all others require a - // successful register first). - // - // This field MUST NOT be updated without holding BOTH runner.lock and requestLock. - // - // This field MAY be read while holding EITHER runner.lock OR requestLock. - madeContact bool - - // protoVersion gives the version of the agent<->informant protocol currently in use, if the - // server has been confirmed. - // - // In other words, this field is not nil if and only if mode is not InformantServerUnconfirmed. - protoVersion *api.InformantProtoVersion - - // mode indicates whether the informant has marked the connection as resumed or not - // - // This field MUST NOT be updated without holding BOTH runner.lock AND requestLock. - // - // This field MAY be read while holding EITHER runner.lock OR requestLock. - mode InformantServerMode - - // callbacks provide an abstraction for - callbacks informantStateCallbacks - - // requestLock guards requests to the VM informant to make sure that only one request is being - // made at a time. - // - // If both requestLock and runner.lock are required, then requestLock MUST be acquired before - // runner.lock. - requestLock util.ChanMutex - - // exitStatus holds some information about why the server exited - exitStatus atomic.Pointer[InformantServerExitStatus] - - // exit signals that the server should shut down, and sets exitStatus to status. - // - // This function MUST be called while holding runner.lock. - exit func(status InformantServerExitStatus) -} - -type InformantServerMode string - -const ( - InformantServerUnconfirmed InformantServerMode = "unconfirmed" - InformantServerSuspended InformantServerMode = "suspended" - InformantServerRunning InformantServerMode = "running" -) - -// InformantServerState is the serializable state of the InformantServer, produced by calls to the -// Runner's State() method. -type InformantServerState struct { - Desc api.AgentDesc `json:"desc"` - SeqNum uint64 `json:"seqNum"` - ReceivedIDCheck bool `json:"receivedIDCheck"` - MadeContact bool `json:"madeContact"` - ProtoVersion *api.InformantProtoVersion `json:"protoVersion"` - Mode InformantServerMode `json:"mode"` - ExitStatus *InformantServerExitStatus `json:"exitStatus"` -} - -type InformantServerExitStatus struct { - // Err is the error, if any, that caused the server to exit. This is only non-nil when context - // used to start the server becomes canceled (i.e. the Runner is exiting). - Err error - // RetryShouldFix is true if simply retrying should resolve err. This is true when e.g. the - // informant responds with a 404 to a downscale or upscale request - it might've restarted, so - // we just need to re-register. - RetryShouldFix bool -} - -// NewInformantServer starts an InformantServer, returning it and a signal receiver that will be -// signalled when it exits. -func NewInformantServer( - ctx context.Context, - logger *zap.Logger, - runner *Runner, - callbacks informantStateCallbacks, -) (*InformantServer, util.SignalReceiver, error) { - // Manually start the TCP listener so that we can see the port it's assigned - addr := net.TCPAddr{IP: net.IPv4zero, Port: 0 /* 0 means it'll be assigned any(-ish) port */} - listener, err := net.ListenTCP("tcp", &addr) - if err != nil { - return nil, util.SignalReceiver{}, fmt.Errorf("Error listening on TCP: %w", err) - } - - // Get back the assigned port - var serverAddr string - switch addr := listener.Addr().(type) { - case *net.TCPAddr: - serverAddr = fmt.Sprintf("%s:%d", runner.global.podIP, addr.Port) - default: - panic(errors.New("unexpected net.Addr type")) - } - - server := &InformantServer{ - runner: runner, - desc: api.AgentDesc{ - AgentID: uuid.New(), - ServerAddr: serverAddr, - MinProtoVersion: MinInformantProtocolVersion, - MaxProtoVersion: MaxInformantProtocolVersion, - }, - seqNum: 0, - receivedIDCheck: false, - madeContact: false, - protoVersion: nil, - mode: InformantServerUnconfirmed, - callbacks: callbacks, - requestLock: util.NewChanMutex(), - exitStatus: atomic.Pointer[InformantServerExitStatus]{}, - exit: nil, // see below. - } - - logger = logger.With(zap.Object("server", server.desc)) - logger.Info("Starting Informant server") - - mux := http.NewServeMux() - util.AddHandler(logger, mux, "/id", http.MethodGet, "struct{}", server.handleID) - util.AddHandler(logger, mux, "/resume", http.MethodPost, "ResumeAgent", server.handleResume) - util.AddHandler(logger, mux, "/suspend", http.MethodPost, "SuspendAgent", server.handleSuspend) - util.AddHandler(logger, mux, "/try-upscale", http.MethodPost, "MoreResourcesRequest", server.handleTryUpscale) - httpServer := &http.Server{Handler: mux} - - sendFinished, recvFinished := util.NewSingleSignalPair() - backgroundCtx, cancelBackground := context.WithCancel(ctx) - - // note: docs for server.exit guarantee this function is called while holding runner.lock. - server.exit = func(status InformantServerExitStatus) { - sendFinished.Send() - cancelBackground() - - // Set server.exitStatus if isn't already - if swapped := server.exitStatus.CompareAndSwap(nil, &status); swapped { - logFunc := logger.Warn - if status.RetryShouldFix { - logFunc = logger.Info - } - - logFunc("Informant server exiting", zap.Bool("retry", status.RetryShouldFix), zap.Error(status.Err)) - } - - // we need to spawn these in separate threads so the caller doesn't block while holding - // runner.lock - runner.spawnBackgroundWorker(srv.GetBaseContext(ctx), logger, "InformantServer shutdown", func(_ context.Context, logger *zap.Logger) { - // we want shutdown to (potentially) live longer than the request which - // made it, but having a timeout is still good. - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - if err := httpServer.Shutdown(ctx); err != nil { - logger.Warn("Error shutting down InformantServer", zap.Error(err)) - } - }) - if server.madeContact { - // only unregister the server if we could have plausibly contacted the informant - runner.spawnBackgroundWorker(srv.GetBaseContext(ctx), logger, "InformantServer unregister", func(_ context.Context, logger *zap.Logger) { - // we want shutdown to (potentially) live longer than the request which - // made it, but having a timeout is still good. - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - if err := server.unregisterFromInformant(ctx, logger); err != nil { - logger.Warn("Error unregistering", zap.Error(err)) - } - }) - } - } - - // Deadlock checker for server.requestLock - // - // FIXME: make these timeouts/delays separately defined constants, or configurable - deadlockChecker := server.requestLock.DeadlockChecker(5*time.Second, time.Second) - runner.spawnBackgroundWorker(backgroundCtx, logger, "InformantServer deadlock checker", ignoreLogger(deadlockChecker)) - - // Main thread running the server. After httpServer.Serve() completes, we do some error - // handling, but that's about it. - runner.spawnBackgroundWorker(ctx, logger, "InformantServer", func(c context.Context, logger *zap.Logger) { - if err := httpServer.Serve(listener); !errors.Is(err, http.ErrServerClosed) { - logger.Error("InformantServer exited with unexpected error", zap.Error(err)) - } - - // set server.exitStatus if it isn't already -- generally this should only occur if err - // isn't http.ErrServerClosed, because other server exits should be controlled by - server.exitStatus.CompareAndSwap(nil, &InformantServerExitStatus{ - Err: fmt.Errorf("Unexpected exit: %w", err), - RetryShouldFix: false, - }) - }) - - // Thread waiting for the context to be canceled so we can use it to shut down the server - runner.spawnBackgroundWorker(ctx, logger, "InformantServer shutdown waiter", func(context.Context, *zap.Logger) { - // Wait until parent context OR server's context is done. - <-backgroundCtx.Done() - server.exit(InformantServerExitStatus{Err: nil, RetryShouldFix: false}) - }) - - runner.spawnBackgroundWorker(backgroundCtx, logger, "InformantServer health-checker", func(c context.Context, logger *zap.Logger) { - // FIXME: make this duration configurable - ticker := time.NewTicker(5 * time.Second) - defer ticker.Stop() - for { - select { - case <-c.Done(): - return - case <-ticker.C: - } - - var done bool - func() { - server.requestLock.Lock() - defer server.requestLock.Unlock() - - // If we've already registered with the informant, and it doesn't support health - // checks, exit. - if server.protoVersion != nil && !server.protoVersion.AllowsHealthCheck() { - logger.Info("Aborting future informant health checks because it does not support them") - done = true - return - } - - if _, err := server.HealthCheck(c, logger); err != nil { - logger.Warn("Informant health check failed", zap.Error(err)) - } - }() - if done { - return - } - } - }) - - return server, recvFinished, nil -} - -var ( - InformantServerAlreadyExitedError error = errors.New("Informant server has already exited") - InformantServerSuspendedError error = errors.New("Informant server is currently suspended") - InformantServerUnconfirmedError error = errors.New("Informant server has not yet been confirmed") - InformantServerNotCurrentError error = errors.New("Informant server has been replaced") -) - -// IsNormalInformantError returns true if the error is one of the "expected" errors that can occur -// in valid exchanges - due to unavoidable raciness or otherwise. -func IsNormalInformantError(err error) bool { - return errors.Is(err, InformantServerAlreadyExitedError) || - errors.Is(err, InformantServerSuspendedError) || - errors.Is(err, InformantServerUnconfirmedError) || - errors.Is(err, InformantServerNotCurrentError) -} - -// valid checks if the InformantServer is good to use for communication, returning an error if not -// -// This method can return errors for a number of unavoidably-racy protocol states - errors from this -// method should be handled as unusual, but not unexpected. Any error returned will be one of -// InformantServer{AlreadyExited,Suspended,Confirmed}Error. -// -// This method MUST be called while holding s.runner.lock. -func (s *InformantServer) valid() error { - if s.exitStatus.Load() != nil { - return InformantServerAlreadyExitedError - } - - switch s.mode { - case InformantServerRunning: - // all good; one more check - case InformantServerUnconfirmed: - return InformantServerUnconfirmedError - case InformantServerSuspended: - return InformantServerSuspendedError - default: - panic(fmt.Errorf("Unexpected InformantServerMode %q", s.mode)) - } - - if s.runner.server.Load() != s { - return InformantServerNotCurrentError - } - return nil -} - -// ExitStatus returns the InformantServerExitStatus associated with the server, if it has been -// instructed to exit -func (s *InformantServer) ExitStatus() *InformantServerExitStatus { - return s.exitStatus.Load() -} - -// setLastInformantError is a helper method to abbreviate setting the Runner's lastInformantError -// field. If runnerLocked is true, s.runner.lock will be acquired. -// -// This method MUST be called while holding s.requestLock AND EITHER holding s.runner.lock OR -// runnerLocked MUST be true. -func (s *InformantServer) setLastInformantError(err error, runnerLocked bool) { - if !runnerLocked { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - } - - if s.runner.server.Load() == s { - s.runner.lastInformantError = err - } -} - -// RegisterWithInformant sends a /register request to the VM Informant -// -// If called after a prior success, this method will panic. If the server has already exited, this -// method will return InformantServerAlreadyExitedError. -// -// On certain errors, this method will force the server to exit. This can be checked by calling -// s.ExitStatus() and checking for a non-nil result. -// -// This method MUST NOT be called while holding s.requestLock OR s.runner.lock. -func (s *InformantServer) RegisterWithInformant(ctx context.Context, logger *zap.Logger) error { - logger = logger.With(zap.Object("server", s.desc)) - - s.requestLock.Lock() - defer s.requestLock.Unlock() - - // Check the current state: - err := func() error { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - switch s.mode { - case InformantServerUnconfirmed: - // good; this is what we're expecting - case InformantServerRunning, InformantServerSuspended: - panic(fmt.Errorf("Register called while InformantServer is already registered (mode = %q)", s.mode)) - default: - panic(fmt.Errorf("Unexpected InformantServerMode %q", s.mode)) - } - - if s.ExitStatus() != nil { - err := InformantServerAlreadyExitedError - s.setLastInformantError(err, true) - return err - } - - return nil - }() - if err != nil { - return err - } - - // Make the request: - timeout := time.Second * time.Duration(s.runner.global.config.Informant.RegisterTimeoutSeconds) - resp, statusCode, err := doInformantRequest[api.AgentDesc, api.InformantDesc]( - ctx, logger, s, timeout, http.MethodPost, "/register", &s.desc, - ) - // Do some stuff with the lock acquired: - func() { - maybeMadeContact := statusCode != 0 || ctx.Err() != nil - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - // Record whether we might've contacted the informant: - s.madeContact = maybeMadeContact - - if err != nil { - s.setLastInformantError(fmt.Errorf("Register request failed: %w", err), true) - - // If the informant responds that it's our fault, or it had an internal failure, we know - // that: - // 1. Neither should happen under normal operation, and - // 2. Restarting the server is *more likely* to fix it than continuing - // We shouldn't *assume* that restarting will actually fix it though, so we'll still set - // RetryShouldFix = false. - if 400 <= statusCode && statusCode <= 599 { - s.exit(InformantServerExitStatus{ - Err: err, - RetryShouldFix: false, - }) - } - } - }() - - if err != nil { - return err // the errors returned by doInformantRequest are descriptive enough. - } - - if err := validateInformantDesc(&s.desc, resp); err != nil { - err = fmt.Errorf("Received bad InformantDesc: %w", err) - s.setLastInformantError(err, false) - return err - } - - // Now that we know it's valid, set s.runner.informant ... - err = func() error { - // ... but only if the server is still current. We're ok setting it if the server isn't - // running, because it's good to have the information there. - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - logger.Info( - "Informant server mode updated", - zap.String("action", "register"), - zap.String("oldMode", string(s.mode)), - zap.String("newMode", string(InformantServerSuspended)), - ) - - s.mode = InformantServerSuspended - s.protoVersion = &resp.ProtoVersion - - if s.runner.server.Load() == s { - // signal we've changed the informant, and do the logging while we're at it, so there's - // a synchronous record of what happened. - s.callbacks.registered(false, func() { - oldInformant := s.runner.informant - s.runner.informant = resp - - if oldInformant == nil { - logger.Info("Registered with informant", zap.Any("informant", *resp)) - } else if *oldInformant != *resp { - logger.Info( - "Re-registered with informant, InformantDesc changed", - zap.Any("oldInformant", *oldInformant), - zap.Any("informant", *resp), - ) - } else { - logger.Info("Re-registered with informant; InformantDesc unchanged", zap.Any("informant", *oldInformant)) - } - }) - } else { - logger.Warn("Registering with informant completed but the server has already been replaced") - } - - // we also want to do a quick protocol check here as well - if !s.receivedIDCheck { - // protocol violation - err := errors.New("Informant responded to /register with 200 without requesting /id") - s.setLastInformantError(fmt.Errorf("Protocol violation: %w", err), true) - logger.Error("Protocol violation", zap.Error(err)) - s.exit(InformantServerExitStatus{ - Err: err, - RetryShouldFix: false, - }) - return errors.New("Protocol violation") // we already logged it; don't double-log a long message - } - - return nil - }() - - if err != nil { - return err - } - - // Record that this request was handled without error - s.setLastInformantError(nil, false) - return nil -} - -// validateInformantDesc checks that the provided api.InformantDesc is valid and matches with an -// InformantServer's api.AgentDesc -func validateInformantDesc(server *api.AgentDesc, informant *api.InformantDesc) error { - // To quote the docs for api.InformantDesc.ProtoVersion: - // - // > If the VM informant does not use a protocol version within [the agent's] bounds, then it - // > MUST respond with an error status code. - // - // So if we're asked to validate the response, mismatch *should* have already been handled. - goodProtoVersion := server.MinProtoVersion <= informant.ProtoVersion && - informant.ProtoVersion <= server.MaxProtoVersion - - if !goodProtoVersion { - return fmt.Errorf( - "Unexpected protocol version: should be between %d and %d, but got %d", - server.MinProtoVersion, server.MaxProtoVersion, informant.ProtoVersion, - ) - } - - // To quote the docs for api.InformantMetricsMethod: - // - // > At least one method *must* be provided in an InformantDesc, and more than one method gives - // > the autoscaler-agent freedom to choose. - // - // We just need to check that there aren't none. - hasMetricsMethod := informant.MetricsMethod.Prometheus != nil - if !hasMetricsMethod { - return errors.New("No known metrics method given") - } - - return nil -} - -// unregisterFromInformant is an internal-ish function that sends an /unregister request to the VM -// informant -// -// Because sending an /unregister request is generally out of courtesy on exit, this method is more -// permissive about server state, and is typically called with a different Context from what would -// normally be expected. -// -// This method is only expected to be called by s.exit; calling this method before s.exitStatus has -// been set will likely cause the server to restart. -// -// This method MUST NOT be called while holding s.requestLock OR s.runner.lock. -func (s *InformantServer) unregisterFromInformant(ctx context.Context, logger *zap.Logger) error { - // note: Because this method is typically called during shutdown, we don't set - // s.runner.lastInformantError or call s.exit, even though other request helpers do. - - logger = logger.With(zap.Object("server", s.desc)) - - s.requestLock.Lock() - defer s.requestLock.Unlock() - - logger.Info("Sending unregister request to informant") - - // Make the request: - timeout := time.Second * time.Duration(s.runner.global.config.Informant.RegisterTimeoutSeconds) - resp, _, err := doInformantRequest[api.AgentDesc, api.UnregisterAgent]( - ctx, logger, s, timeout, http.MethodDelete, "/unregister", &s.desc, - ) - if err != nil { - return err // the errors returned by doInformantRequest are descriptive enough. - } - - logger.Info("Unregister request successful", zap.Any("response", *resp)) - return nil -} - -// doInformantRequest makes a single HTTP request to the VM informant, doing only the validation -// required to JSON decode the response -// -// The returned int gives the status code of the response. It is possible for a response with status -// 200 to still yield an error - either because of a later IO failure or bad JSON. -// -// If an error occurs before we get a response, the status code will be 0. -// -// This method MUST be called while holding s.requestLock. If not, the program will silently violate -// the protocol guarantees. -func doInformantRequest[Q any, R any]( - ctx context.Context, - logger *zap.Logger, - s *InformantServer, - timeout time.Duration, - method string, - path string, - reqData *Q, -) (_ *R, statusCode int, _ error) { - result := "" - defer func() { - s.runner.global.metrics.informantRequestsOutbound.WithLabelValues(result).Inc() - }() - - reqBody, err := json.Marshal(reqData) - if err != nil { - return nil, statusCode, fmt.Errorf("Error encoding request JSON: %w", err) - } - - reqCtx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - - url := s.informantURL(path) - request, err := http.NewRequestWithContext(reqCtx, method, url, bytes.NewReader(reqBody)) - if err != nil { - return nil, statusCode, fmt.Errorf("Error building request to %q: %w", url, err) - } - request.Header.Set("content-type", "application/json") - - logger.Info("Sending informant request", zap.String("url", url), zap.Any("request", reqData)) - - response, err := http.DefaultClient.Do(request) - if err != nil { - result = fmt.Sprintf("[error doing request: %s]", util.RootError(err)) - return nil, statusCode, fmt.Errorf("Error doing request: %w", err) - } - defer response.Body.Close() - - statusCode = response.StatusCode - result = strconv.Itoa(statusCode) - - respBody, err := io.ReadAll(response.Body) - if err != nil { - return nil, statusCode, fmt.Errorf("Error reading body for response: %w", err) - } - - if statusCode != 200 { - return nil, statusCode, fmt.Errorf( - "Received response status %d body %q", statusCode, string(respBody), - ) - } - - var respData R - if err := json.Unmarshal(respBody, &respData); err != nil { - return nil, statusCode, fmt.Errorf("Bad JSON response: %w", err) - } - - logger.Info("Got informant response", zap.String("url", url), zap.Any("response", respData)) - - return &respData, statusCode, nil -} - -// fetchAndIncrementSequenceNumber increments the sequence number and returns it -// -// This method MUST be called while holding s.runner.lock. -func (s *InformantServer) incrementSequenceNumber() uint64 { - s.seqNum += 1 - return s.seqNum -} - -// informantURL creates a string representing the URL for a request to the VM informant, given the -// path to use -func (s *InformantServer) informantURL(path string) string { - if !strings.HasPrefix(path, "/") { - panic(errors.New("informant URL path must start with '/'")) - } - - ip := s.runner.podIP - port := s.runner.global.config.Informant.ServerPort - return fmt.Sprintf("http://%s:%d/%s", ip, port, path[1:]) -} - -// handleID handles a request on the server's /id endpoint. This method should not be called outside -// of that context. -// -// Returns: response body (if successful), status code, error (if unsuccessful) -func (s *InformantServer) handleID(ctx context.Context, _ *zap.Logger, body *struct{}) (_ *api.AgentIdentificationMessage, code int, _ error) { - defer func() { - s.runner.global.metrics.informantRequestsInbound.WithLabelValues("/id", strconv.Itoa(code)).Inc() - }() - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - s.receivedIDCheck = true - - if s.ExitStatus() != nil { - return nil, 404, errors.New("Server has already exited") - } - - // Update our record of the last successful time we heard from the informant, if the server is - // currently enabled. This allows us to detect cases where the informant is not currently - // communicating back to the agent - OR when the informant never /resume'd the agent. - if s.mode == InformantServerRunning { - s.runner.setStatus(func(s *podStatus) { - now := time.Now() - s.lastSuccessfulInformantComm = &now - }) - } - - return &api.AgentIdentificationMessage{ - Data: api.AgentIdentification{AgentID: s.desc.AgentID}, - SequenceNumber: s.incrementSequenceNumber(), - }, 200, nil -} - -// handleResume handles a request on the server's /resume endpoint. This method should not be called -// outside of that context. -// -// Returns: response body (if successful), status code, error (if unsuccessful) -func (s *InformantServer) handleResume( - ctx context.Context, logger *zap.Logger, body *api.ResumeAgent, -) (_ *api.AgentIdentificationMessage, code int, _ error) { - defer func() { - s.runner.global.metrics.informantRequestsInbound.WithLabelValues("/resume", strconv.Itoa(code)).Inc() - }() - - if body.ExpectedID != s.desc.AgentID { - logger.Warn("Request AgentID not found, server has a different one") - return nil, 404, fmt.Errorf("AgentID %q not found", body.ExpectedID) - } - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - if s.ExitStatus() != nil { - return nil, 404, errors.New("Server has already exited") - } - - // FIXME: Our handling of the protocol here is racy (because we might receive a /resume request - // before we've processed the response from our /register request). However, that's *probably* - // actually an issue with the protocol itself, rather than our handling. - - switch s.mode { - case InformantServerSuspended: - s.mode = InformantServerRunning - s.callbacks.setActive(true, func() { - logger.Info( - "Informant server mode updated", - zap.String("action", "resume"), - zap.String("oldMode", string(InformantServerSuspended)), - zap.String("newMode", string(InformantServerRunning)), - ) - }) - case InformantServerRunning: - internalErr := errors.New("Got /resume request for server, but it is already running") - logger.Warn("Protocol violation", zap.Error(internalErr)) - - // To be nice, we'll restart the server. We don't want to make a temporary error permanent. - s.exit(InformantServerExitStatus{ - Err: internalErr, - RetryShouldFix: true, - }) - - return nil, 400, errors.New("Cannot resume agent that is already running") - case InformantServerUnconfirmed: - internalErr := errors.New("Got /resume request for server, but it is unconfirmed") - logger.Warn("Protocol violation", zap.Error(internalErr)) - - // To be nice, we'll restart the server. We don't want to make a temporary error permanent. - s.exit(InformantServerExitStatus{ - Err: internalErr, - RetryShouldFix: true, - }) - - return nil, 400, errors.New("Cannot resume agent that is not yet registered") - default: - panic(fmt.Errorf("Unexpected InformantServerMode %q", s.mode)) - } - - return &api.AgentIdentificationMessage{ - Data: api.AgentIdentification{AgentID: s.desc.AgentID}, - SequenceNumber: s.incrementSequenceNumber(), - }, 200, nil -} - -// handleSuspend handles a request on the server's /suspend endpoint. This method should not be -// called outside of that context. -// -// Returns: response body (if successful), status code, error (if unsuccessful) -func (s *InformantServer) handleSuspend( - ctx context.Context, logger *zap.Logger, body *api.SuspendAgent, -) (_ *api.AgentIdentificationMessage, code int, _ error) { - defer func() { - s.runner.global.metrics.informantRequestsInbound.WithLabelValues("/suspend", strconv.Itoa(code)).Inc() - }() - - if body.ExpectedID != s.desc.AgentID { - logger.Warn("Request AgentID not found, server has a different one") - return nil, 404, fmt.Errorf("AgentID %q not found", body.ExpectedID) - } - - s.runner.lock.Lock() - locked := true - defer func() { - if locked { - s.runner.lock.Unlock() - } - }() - - if s.ExitStatus() != nil { - return nil, 404, errors.New("Server has already exited") - } - - switch s.mode { - case InformantServerRunning: - s.mode = InformantServerSuspended - s.callbacks.setActive(false, func() { - logger.Info( - "Informant server mode updated", - zap.String("action", "suspend"), - zap.String("oldMode", string(InformantServerRunning)), - zap.String("newMode", string(InformantServerSuspended)), - ) - }) - case InformantServerSuspended: - internalErr := errors.New("Got /suspend request for server, but it is already suspended") - logger.Warn("Protocol violation", zap.Error(internalErr)) - - // To be nice, we'll restart the server. We don't want to make a temporary error permanent. - s.exit(InformantServerExitStatus{ - Err: internalErr, - RetryShouldFix: true, - }) - - return nil, 400, errors.New("Cannot suspend agent that is already suspended") - case InformantServerUnconfirmed: - internalErr := errors.New("Got /suspend request for server, but it is unconfirmed") - logger.Warn("Protocol violation", zap.Error(internalErr)) - - // To be nice, we'll restart the server. We don't want to make a temporary error permanent. - s.exit(InformantServerExitStatus{ - Err: internalErr, - RetryShouldFix: true, - }) - - return nil, 400, errors.New("Cannot suspend agent that is not yet registered") - } - - locked = false - s.runner.lock.Unlock() - - // Acquire s.runner.requestLock so that when we return, we can guarantee that any future - // requests to NeonVM or the scheduler will first observe that the informant is suspended and - // exit early, before actually making the request. - if err := s.runner.requestLock.TryLock(ctx); err != nil { - err = fmt.Errorf("Context expired while trying to acquire requestLock: %w", err) - logger.Error("Failed to synchronize on requestLock", zap.Error(err)) - return nil, 500, err - } - s.runner.requestLock.Unlock() // don't actually hold the lock, we're just using it as a barrier. - - return &api.AgentIdentificationMessage{ - Data: api.AgentIdentification{AgentID: s.desc.AgentID}, - SequenceNumber: s.incrementSequenceNumber(), - }, 200, nil -} - -// handleTryUpscale handles a request on the server's /try-upscale endpoint. This method should not -// be called outside of that context. -// -// Returns: response body (if successful), status code, error (if unsuccessful) -func (s *InformantServer) handleTryUpscale( - ctx context.Context, - logger *zap.Logger, - body *api.MoreResourcesRequest, -) (_ *api.AgentIdentificationMessage, code int, _ error) { - defer func() { - s.runner.global.metrics.informantRequestsInbound.WithLabelValues("/upscale", strconv.Itoa(code)).Inc() - }() - - if body.ExpectedID != s.desc.AgentID { - logger.Warn("Request AgentID not found, server has a different one") - return nil, 404, fmt.Errorf("AgentID %q not found", body.ExpectedID) - } - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - if s.ExitStatus() != nil { - return nil, 404, errors.New("Server has already exited") - } - - switch s.mode { - case InformantServerRunning: - if !s.protoVersion.HasTryUpscale() { - err := fmt.Errorf("/try-upscale not supported for protocol version %v", *s.protoVersion) - return nil, 400, err - } - - s.callbacks.upscaleRequested(body.MoreResources, func() { - if !body.MoreResources.Cpu && !body.MoreResources.Memory { - logger.Warn("Received try-upscale request that has no resources selected") - } - - logger.Info( - "Updating requested upscale", - zap.Any("requested", body.MoreResources), - ) - }) - - return &api.AgentIdentificationMessage{ - Data: api.AgentIdentification{AgentID: s.desc.AgentID}, - SequenceNumber: s.incrementSequenceNumber(), - }, 200, nil - case InformantServerSuspended: - internalErr := errors.New("Got /try-upscale request for server, but server is suspended") - logger.Warn("Protocol violation", zap.Error(internalErr)) - - // To be nice, we'll restart the server. We don't want to make a temporary error permanent. - s.exit(InformantServerExitStatus{ - Err: internalErr, - RetryShouldFix: true, - }) - - return nil, 400, errors.New("Cannot process upscale while suspended") - case InformantServerUnconfirmed: - internalErr := errors.New("Got /try-upscale request for server, but server is suspended") - logger.Warn("Protocol violation", zap.Error(internalErr)) - - // To be nice, we'll restart the server. We don't want to make a temporary error permanent. - s.exit(InformantServerExitStatus{ - Err: internalErr, - RetryShouldFix: true, - }) - - return nil, 400, errors.New("Cannot process upscale while unconfirmed") - default: - panic(fmt.Errorf("unexpected server mode: %q", s.mode)) - } -} - -// HealthCheck makes a request to the informant's /health-check endpoint, using the server's ID. -// -// This method MUST be called while holding i.server.requestLock AND NOT i.server.runner.lock. -func (s *InformantServer) HealthCheck(ctx context.Context, logger *zap.Logger) (*api.InformantHealthCheckResp, error) { - err := func() error { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - return s.valid() - }() - // NB: we want to continue to perform health checks even if the informant server is not properly - // available for *normal* use. - // - // We only need to check for InformantServerSuspendedError because - // InformantServerUnconfirmedError will be handled by the retryRegister loop in - // serveInformantLoop. - if err != nil && !errors.Is(err, InformantServerSuspendedError) { - return nil, err - } - - logger = logger.With(zap.Object("server", s.desc)) - - timeout := time.Second * time.Duration(s.runner.global.config.Informant.RequestTimeoutSeconds) - id := api.AgentIdentification{AgentID: s.desc.AgentID} - - logger.Info("Sending health-check", zap.Any("id", id)) - resp, statusCode, err := doInformantRequest[api.AgentIdentification, api.InformantHealthCheckResp]( - ctx, logger, s, timeout, http.MethodPut, "/health-check", &id, - ) - if err != nil { - func() { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - s.setLastInformantError(fmt.Errorf("Health-check request failed: %w", err), true) - - if 400 <= statusCode && statusCode <= 599 { - s.exit(InformantServerExitStatus{ - Err: err, - RetryShouldFix: statusCode == 404, - }) - } - }() - return nil, err - } - - logger.Info("Received OK health-check result") - return resp, nil -} - -// Downscale makes a request to the informant's /downscale endpoint with the api.Resources -// -// This method MUST NOT be called while holding i.server.runner.lock. -func (s *InformantServer) Downscale(ctx context.Context, logger *zap.Logger, to api.Resources) (*api.DownscaleResult, error) { - err := func() error { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - return s.valid() - }() - if err != nil { - return nil, err - } - - logger = logger.With(zap.Object("server", s.desc)) - - logger.Info("Sending downscale", zap.Object("target", to)) - - timeout := time.Second * time.Duration(s.runner.global.config.Informant.DownscaleTimeoutSeconds) - id := api.AgentIdentification{AgentID: s.desc.AgentID} - rawResources := to.ConvertToRaw(s.runner.vm.Mem.SlotSize) - - var statusCode int - var resp *api.DownscaleResult - if s.protoVersion.SignsResourceUpdates() { - signedRawResources := api.ResourceMessage{RawResources: rawResources, Id: id} - reqData := api.AgentResourceMessage{Data: signedRawResources, SequenceNumber: s.incrementSequenceNumber()} - resp, statusCode, err = doInformantRequest[api.AgentResourceMessage, api.DownscaleResult]( - ctx, logger, s, timeout, http.MethodPut, "/downscale", &reqData, - ) - } else { - resp, statusCode, err = doInformantRequest[api.RawResources, api.DownscaleResult]( - ctx, logger, s, timeout, http.MethodPut, "/downscale", &rawResources, - ) - } - if err != nil { - func() { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - s.setLastInformantError(fmt.Errorf("Downscale request failed: %w", err), true) - - if 400 <= statusCode && statusCode <= 599 { - s.exit(InformantServerExitStatus{ - Err: err, - RetryShouldFix: statusCode == 404, - }) - } - }() - return nil, err - } - - logger.Info("Received downscale result") // already logged by doInformantRequest - return resp, nil -} - -func (s *InformantServer) Upscale(ctx context.Context, logger *zap.Logger, to api.Resources) error { - err := func() error { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - return s.valid() - }() - if err != nil { - return err - } - - logger = logger.With(zap.Object("server", s.desc)) - - logger.Info("Sending upscale", zap.Object("target", to)) - - timeout := time.Second * time.Duration(s.runner.global.config.Informant.DownscaleTimeoutSeconds) - id := api.AgentIdentification{AgentID: s.desc.AgentID} - rawResources := to.ConvertToRaw(s.runner.vm.Mem.SlotSize) - - var statusCode int - if s.protoVersion.SignsResourceUpdates() { - signedRawResources := api.ResourceMessage{RawResources: rawResources, Id: id} - reqData := api.AgentResourceMessage{Data: signedRawResources, SequenceNumber: s.incrementSequenceNumber()} - _, statusCode, err = doInformantRequest[api.AgentResourceMessage, struct{}]( - ctx, logger, s, timeout, http.MethodPut, "/upscale", &reqData, - ) - } else { - _, statusCode, err = doInformantRequest[api.RawResources, struct{}]( - ctx, logger, s, timeout, http.MethodPut, "/upscale", &rawResources, - ) - } - if err != nil { - func() { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - s.setLastInformantError(fmt.Errorf("Downscale request failed: %w", err), true) - - if 400 <= statusCode && statusCode <= 599 { - s.exit(InformantServerExitStatus{ - Err: err, - RetryShouldFix: statusCode == 404, - }) - } - }() - return err - } - - logger.Info("Received successful upscale result") - return nil -} diff --git a/pkg/agent/prommetrics.go b/pkg/agent/prommetrics.go index b68770eeb..cf9fdd09c 100644 --- a/pkg/agent/prommetrics.go +++ b/pkg/agent/prommetrics.go @@ -12,14 +12,15 @@ type PromMetrics struct { schedulerRequestedChange resourceChangePair schedulerApprovedChange resourceChangePair - informantRequestsOutbound *prometheus.CounterVec - informantRequestsInbound *prometheus.CounterVec - informantRequestedChange resourceChangePair - informantApprovedChange resourceChangePair + monitorRequestsOutbound *prometheus.CounterVec + monitorRequestsInbound *prometheus.CounterVec + monitorRequestedChange resourceChangePair + monitorApprovedChange resourceChangePair neonvmRequestsOutbound *prometheus.CounterVec neonvmRequestedChange resourceChangePair + runnersCount *prometheus.GaugeVec runnerFatalErrors prometheus.Counter runnerThreadPanics prometheus.Counter runnerStarts prometheus.Counter @@ -37,6 +38,15 @@ const ( directionValueDec = "dec" ) +type runnerMetricState string + +const ( + runnerMetricStateOk runnerMetricState = "ok" + runnerMetricStateStuck runnerMetricState = "stuck" + runnerMetricStateErrored runnerMetricState = "errored" + runnerMetricStatePanicked runnerMetricState = "panicked" +) + func makePrometheusParts(globalstate *agentState) (PromMetrics, *prometheus.Registry) { reg := prometheus.NewRegistry() @@ -92,49 +102,49 @@ func makePrometheusParts(globalstate *agentState) (PromMetrics, *prometheus.Regi )), }, - // ---- INFORMANT ---- - informantRequestsOutbound: util.RegisterMetric(reg, prometheus.NewCounterVec( + // ---- MONITOR ---- + monitorRequestsOutbound: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "autoscaling_agent_informant_outbound_requests_total", - Help: "Number of attempted HTTP requests to vm-informants by autoscaler-agents", + Name: "autoscaling_agent_monitor_outbound_requests_total", + Help: "Number of attempted HTTP requests to vm-monitors by autoscaler-agents", }, - []string{"code"}, + []string{"endpoint", "code"}, )), - informantRequestsInbound: util.RegisterMetric(reg, prometheus.NewCounterVec( + monitorRequestsInbound: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "autoscaling_agent_informant_inbound_requests_total", - Help: "Number of HTTP requests from vm-informants received by autoscaler-agents", + Name: "autoscaling_agent_monitor_inbound_requests_total", + Help: "Number of HTTP requests from vm-monitors received by autoscaler-agents", }, []string{"endpoint", "code"}, )), - informantRequestedChange: resourceChangePair{ + monitorRequestedChange: resourceChangePair{ cpu: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "autoscaling_agent_informant_requested_cpu_change_total", - Help: "Total change in CPU requested from the informant(s)", + Name: "autoscaling_agent_monitor_requested_cpu_change_total", + Help: "Total change in CPU requested from the vm-monitor(s)", }, []string{directionLabel}, )), mem: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "autoscaling_agent_informant_requested_mem_change_total", - Help: "Total change in memory (in MiB) requested from the informant(s)", + Name: "autoscaling_agent_monitor_requested_mem_change_total", + Help: "Total change in memory (in MiB) requested from the vm-monitor(s)", }, []string{directionLabel}, )), }, - informantApprovedChange: resourceChangePair{ + monitorApprovedChange: resourceChangePair{ cpu: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "autoscaling_agent_informant_approved_cpu_change_total", - Help: "Total change in CPU approved by the informant(s)", + Name: "autoscaling_agent_monitor_approved_cpu_change_total", + Help: "Total change in CPU approved by the vm-monitor(s)", }, []string{directionLabel}, )), mem: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "autoscaling_agent_informant_approved_mem_change_total", - Help: "Total change in memory (in MiB) approved by the informant(s)", + Name: "autoscaling_agent_monitor_approved_mem_change_total", + Help: "Total change in memory (in MiB) approved by the vm-monitor(s)", }, []string{directionLabel}, )), @@ -168,6 +178,14 @@ func makePrometheusParts(globalstate *agentState) (PromMetrics, *prometheus.Regi }, // ---- RUNNER LIFECYCLE ---- + runnersCount: util.RegisterMetric(reg, prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "autoscaling_agent_runners_current", + Help: "Number of per-VM runners, with associated metadata", + }, + // NB: is_endpoint ∈ ("true", "false"), state ∈ runnerMetricState = ("ok", "stuck", "errored", "panicked") + []string{"is_endpoint", "state"}, + )), runnerFatalErrors: util.RegisterMetric(reg, prometheus.NewCounter( prometheus.CounterOpts{ Name: "autoscaling_agent_runner_fatal_errors_total", @@ -201,9 +219,9 @@ func makePrometheusParts(globalstate *agentState) (PromMetrics, *prometheus.Regi // scheduler: metrics.schedulerRequestedChange, metrics.schedulerApprovedChange, - // informant: - metrics.informantRequestedChange, - metrics.informantApprovedChange, + // monitor: + metrics.monitorRequestedChange, + metrics.monitorApprovedChange, // neonvm: metrics.neonvmRequestedChange, } @@ -214,114 +232,16 @@ func makePrometheusParts(globalstate *agentState) (PromMetrics, *prometheus.Regi } } - // the remaining metrics are computed at scrape time by prom: - // register them directly. - reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Name: "autoscaling_errored_vm_runners_current", - Help: "Number of VMs whose per-VM runner has panicked (and not restarted)", - }, - func() float64 { - globalstate.lock.Lock() - defer globalstate.lock.Unlock() - - count := 0 - - for _, p := range globalstate.pods { - func() { - p.status.mu.Lock() - defer p.status.mu.Unlock() - - if p.status.endState != nil && p.status.endState.ExitKind == podStatusExitErrored { - count += 1 - } - }() - } - - return float64(count) - }, - )) - - reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Name: "autoscaling_panicked_vm_runners_current", - Help: "Number of VMs whose per-VM runner has panicked (and not restarted)", - }, - func() float64 { - globalstate.lock.Lock() - defer globalstate.lock.Unlock() - - count := 0 - - for _, p := range globalstate.pods { - func() { - p.status.mu.Lock() - defer p.status.mu.Unlock() - - if p.status.endState != nil && p.status.endState.ExitKind == podStatusExitPanicked { - count += 1 - } - }() - } - - return float64(count) - }, - )) - - reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Name: "autoscaling_agent_tracked_vms_current", - Help: "Number of autoscaling-enabled non-migrating VMs on the autoscaler-agent's node", - }, - func() float64 { - globalstate.lock.Lock() - defer globalstate.lock.Unlock() - - return float64(len(globalstate.pods)) - }, - )) - - reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Name: "autoscaling_vms_unsuccessful_communication_with_informant_current", - Help: "Number of VMs whose vm-informants aren't successfully communicating with the autoscaler-agent", - }, - func() float64 { - globalstate.lock.Lock() - defer globalstate.lock.Unlock() - - count := 0 - - for _, p := range globalstate.pods { - if p.status.informantIsUnhealthy(globalstate.config) { - count++ - } - } - - return float64(count) - }, - )) - - reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Name: "autoscaling_billed_vms_unsuccessful_communication_with_informant_current", - Help: "Number of VMs *getting billed* whose vm-informants aren't successfully communicating with the autoscaler-agent", - }, - func() float64 { - globalstate.lock.Lock() - defer globalstate.lock.Unlock() - - count := 0 - - for _, p := range globalstate.pods { - if p.status.endpointID != "" && p.status.informantIsUnhealthy(globalstate.config) { - count++ - } - } - - return float64(count) - }, - )) + runnerStates := []runnerMetricState{ + runnerMetricStateOk, + runnerMetricStateStuck, + runnerMetricStateErrored, + runnerMetricStatePanicked, + } + for _, s := range runnerStates { + metrics.runnersCount.WithLabelValues("true", string(s)).Set(0.0) + metrics.runnersCount.WithLabelValues("false", string(s)).Set(0.0) + } return metrics, reg } diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index f14a8ec81..f463f92bb 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -12,7 +12,7 @@ package agent // 1. It should be OK to panic, if an error is truly unrecoverable // 2. A single Runner's panic shouldn't bring down the entire autoscaler-agent¹ // 3. We want to expose a State() method to view (almost) all internal state -// 4. Some high-level actions (e.g., HTTP request to Informant; update VM to desired state) require +// 4. Some high-level actions (e.g., call to vm-monitor; update VM to desired state) require // that we have *at most* one such action running at a time. // // There are a number of possible solutions to this set of goals. All reasonable solutions require @@ -24,9 +24,7 @@ package agent // * "track scheduler" // * "get metrics" // * "handle VM resources" - using metrics, calculates target resources level and contacts -// scheduler, informant, and NeonVM -- the "scaling" part of "autoscaling". -// * "informant server loop" - keeps Runner.informant and Runner.server up-to-date. -// * ... and a few more. +// scheduler, vm-monitor, and NeonVM -- the "scaling" part of "autoscaling". // * Each thread makes *synchronous* HTTP requests while holding the necessary lock to prevent any other // thread from making HTTP requests to the same entity. For example: // * All requests to NeonVM and the scheduler plugin are guarded by Runner.requestLock, which @@ -84,7 +82,7 @@ type Runner struct { global *agentState // status provides the high-level status of the Runner. Reading or updating the status requires // holding podStatus.lock. Updates are typically done handled by the setStatus method. - status *podStatus + status *lockedPodStatus // shutdown provides a clean way to trigger all background Runner threads to shut down. shutdown // is set exactly once, by (*Runner).Run @@ -134,15 +132,9 @@ type Runner struct { // Each scheduler's info field is immutable. When a scheduler is replaced, only the pointer // value here is updated; the original Scheduler remains unchanged. scheduler atomic.Pointer[Scheduler] - server atomic.Pointer[InformantServer] - // informant holds the most recent InformantDesc that an InformantServer has received in its - // normal operation. If there has been at least one InformantDesc received, this field will not - // be nil. - // - // This field really should not be used except for providing RunnerState. The correct interface - // is through server.Informant(), which does all the appropriate error handling if the - // connection to the informant is not in a suitable state. - informant *api.InformantDesc + // monitor, if non nil, stores the current Dispatcher in use for communicating with the + // vm-monitor + monitor atomic.Pointer[Dispatcher] // computeUnit is the latest Compute Unit reported by a scheduler. It may be nil, if we haven't // been able to contact one yet. // @@ -157,12 +149,6 @@ type Runner struct { // to the current scheduler. This field is not nil only when scheduler is not nil. lastSchedulerError error - // lastInformantError provides the error that occurred - if any - during the most recent request - // to the VM informant. - // - // This field MUST NOT be updated without holding BOTH lock AND server.requestLock. - lastInformantError error - // backgroundWorkerCount tracks the current number of background workers. It is exclusively // updated by r.spawnBackgroundWorker backgroundWorkerCount atomic.Int64 @@ -205,22 +191,19 @@ type Scheduler struct { // fatal is used for signalling that fatalError has been set (and so we should look for a new // scheduler) - fatal util.SignalSender + fatal util.SignalSender[struct{}] } // RunnerState is the serializable state of the Runner, extracted by its State method type RunnerState struct { - PodIP string `json:"podIP"` - VM api.VmInfo `json:"vm"` - LastMetrics *api.Metrics `json:"lastMetrics"` - Scheduler *SchedulerState `json:"scheduler"` - Server *InformantServerState `json:"server"` - Informant *api.InformantDesc `json:"informant"` - ComputeUnit *api.Resources `json:"computeUnit"` - LastApproved *api.Resources `json:"lastApproved"` - LastSchedulerError error `json:"lastSchedulerError"` - LastInformantError error `json:"lastInformantError"` - BackgroundWorkerCount int64 `json:"backgroundWorkerCount"` + PodIP string `json:"podIP"` + VM api.VmInfo `json:"vm"` + LastMetrics *api.Metrics `json:"lastMetrics"` + Scheduler *SchedulerState `json:"scheduler"` + ComputeUnit *api.Resources `json:"computeUnit"` + LastApproved *api.Resources `json:"lastApproved"` + LastSchedulerError error `json:"lastSchedulerError"` + BackgroundWorkerCount int64 `json:"backgroundWorkerCount"` SchedulerRespondedWithMigration bool `json:"migrationStarted"` } @@ -247,28 +230,12 @@ func (r *Runner) State(ctx context.Context) (*RunnerState, error) { } } - var serverState *InformantServerState - if server := r.server.Load(); server != nil { - serverState = &InformantServerState{ - Desc: server.desc, - SeqNum: server.seqNum, - ReceivedIDCheck: server.receivedIDCheck, - MadeContact: server.madeContact, - ProtoVersion: server.protoVersion, - Mode: server.mode, - ExitStatus: server.exitStatus.Load(), - } - } - return &RunnerState{ LastMetrics: r.lastMetrics, Scheduler: scheduler, - Server: serverState, - Informant: r.informant, ComputeUnit: r.computeUnit, LastApproved: r.lastApproved, LastSchedulerError: r.lastSchedulerError, - LastInformantError: r.lastInformantError, VM: r.vm, PodIP: r.podIP, BackgroundWorkerCount: r.backgroundWorkerCount.Load(), @@ -283,12 +250,13 @@ func (r *Runner) Spawn(ctx context.Context, logger *zap.Logger, vmInfoUpdated ut defer func() { if err := recover(); err != nil { now := time.Now() - r.setStatus(func(stat *podStatus) { + r.status.update(r.global, func(stat podStatus) podStatus { stat.endState = &podStatusEndState{ ExitKind: podStatusExitPanicked, Error: fmt.Errorf("Runner %v panicked: %v", r.vm.NamespacedName(), err), Time: now, } + return stat }) } @@ -303,12 +271,13 @@ func (r *Runner) Spawn(ctx context.Context, logger *zap.Logger, vmInfoUpdated ut exitKind = podStatusExitErrored r.global.metrics.runnerFatalErrors.Inc() } - r.setStatus(func(stat *podStatus) { + r.status.update(r.global, func(stat podStatus) podStatus { stat.endState = &podStatusEndState{ ExitKind: exitKind, Error: err, Time: endTime, } + return stat }) if err != nil { @@ -319,12 +288,6 @@ func (r *Runner) Spawn(ctx context.Context, logger *zap.Logger, vmInfoUpdated ut }() } -func (r *Runner) setStatus(with func(*podStatus)) { - r.status.mu.Lock() - defer r.status.mu.Unlock() - with(r.status) -} - // Run is the main entrypoint to the long-running per-VM pod tasks func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util.CondChannelReceiver) error { ctx, r.shutdown = context.WithCancel(ctx) @@ -349,10 +312,10 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util coreExecLogger := execLogger.Named("core") executorCore := executor.NewExecutorCore(coreExecLogger.Named("state"), r.vm, executor.Config{ - DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, - PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), - InformantDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Informant.RetryDeniedDownscaleSeconds), - InformantRetryWait: time.Second * time.Duration(r.global.config.Informant.RetryFailedRequestSeconds), + DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, + PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), + MonitorDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Monitor.RetryDeniedDownscaleSeconds), + MonitorRetryWait: time.Second * time.Duration(r.global.config.Monitor.RetryFailedRequestSeconds), Warn: func(msg string, args ...any) { coreExecLogger.Warn(fmt.Sprintf(msg, args...)) }, @@ -360,13 +323,13 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util pluginIface := makePluginInterface(r, executorCore) neonvmIface := makeNeonVMInterface(r) - informantIface := makeInformantInterface(r, executorCore) + monitorIface := makeMonitorInterface(r, executorCore) // "ecwc" stands for "ExecutorCoreWithClients" ecwc := executorCore.WithClients(executor.ClientSet{ - Plugin: pluginIface, - NeonVM: neonvmIface, - Informant: informantIface, + Plugin: pluginIface, + NeonVM: neonvmIface, + Monitor: monitorIface, }) logger.Info("Starting background workers") @@ -375,6 +338,9 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util mainDeadlockChecker := r.lock.DeadlockChecker(250*time.Millisecond, time.Second) reqDeadlockChecker := r.requestLock.DeadlockChecker(5*time.Second, time.Second) + r.spawnBackgroundWorker(ctx, logger, "podStatus updater", func(c context.Context, l *zap.Logger) { + r.status.periodicallyRefreshState(c, l, r.global) + }) r.spawnBackgroundWorker(ctx, logger, "deadlock checker (main)", ignoreLogger(mainDeadlockChecker)) r.spawnBackgroundWorker(ctx, logger, "deadlock checker (request lock)", ignoreLogger(reqDeadlockChecker)) r.spawnBackgroundWorker(ctx, logger, "track scheduler", func(c context.Context, l *zap.Logger) { @@ -382,40 +348,29 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util ecwc.Updater().NewScheduler(withLock) }) }) - sendInformantUpd, recvInformantUpd := util.NewCondChannelPair() r.spawnBackgroundWorker(ctx, logger, "get metrics", func(c context.Context, l *zap.Logger) { - r.getMetricsLoop(c, l, recvInformantUpd, func(metrics api.Metrics, withLock func()) { + r.getMetricsLoop(c, l, func(metrics api.Metrics, withLock func()) { ecwc.Updater().UpdateMetrics(metrics, withLock) }) }) - r.spawnBackgroundWorker(ctx, logger, "informant server loop", func(c context.Context, l *zap.Logger) { - r.serveInformantLoop( - c, - l, - informantStateCallbacks{ - resetInformant: func(withLock func()) { - ecwc.Updater().ResetInformant(withLock) - }, - upscaleRequested: func(request api.MoreResources, withLock func()) { - ecwc.Updater().UpscaleRequested(request, withLock) - }, - registered: func(active bool, withLock func()) { - ecwc.Updater().InformantRegistered(active, func() { - sendInformantUpd.Send() - withLock() - }) - }, - setActive: func(active bool, withLock func()) { - ecwc.Updater().InformantActive(active, withLock) - }, + r.spawnBackgroundWorker(ctx, logger.Named("vm-monitor"), "vm-monitor reconnection loop", func(c context.Context, l *zap.Logger) { + r.connectToMonitorLoop(c, l, monitorStateCallbacks{ + reset: func(withLock func()) { + ecwc.Updater().ResetMonitor(withLock) }, - ) + upscaleRequested: func(request api.MoreResources, withLock func()) { + ecwc.Updater().UpscaleRequested(request, withLock) + }, + setActive: func(active bool, withLock func()) { + ecwc.Updater().MonitorActive(active, withLock) + }, + }) }) r.spawnBackgroundWorker(ctx, execLogger.Named("sleeper"), "executor: sleeper", ecwc.DoSleeper) r.spawnBackgroundWorker(ctx, execLogger.Named("plugin"), "executor: plugin", ecwc.DoPluginRequests) r.spawnBackgroundWorker(ctx, execLogger.Named("neonvm"), "executor: neonvm", ecwc.DoNeonVMRequests) - r.spawnBackgroundWorker(ctx, execLogger.Named("informant-downscale"), "executor: informant downscale", ecwc.DoInformantDownscales) - r.spawnBackgroundWorker(ctx, execLogger.Named("informant-upscale"), "executor: informant upscale", ecwc.DoInformantUpscales) + r.spawnBackgroundWorker(ctx, execLogger.Named("vm-monitor-downscale"), "executor: vm-monitor downscale", ecwc.DoMonitorDownscales) + r.spawnBackgroundWorker(ctx, execLogger.Named("vm-monitor-upscale"), "executor: vm-monitor upscale", ecwc.DoMonitorUpscales) // Note: Run doesn't terminate unless the parent context is cancelled - either because the VM // pod was deleted, or the autoscaler-agent is exiting. @@ -494,7 +449,6 @@ func (r *Runner) spawnBackgroundWorker(ctx context.Context, logger *zap.Logger, func (r *Runner) getMetricsLoop( ctx context.Context, logger *zap.Logger, - updatedInformant util.CondChannelReceiver, newMetrics func(metrics api.Metrics, withLock func()), ) { timeout := time.Second * time.Duration(r.global.config.Metrics.RequestTimeoutSeconds) @@ -504,7 +458,7 @@ func (r *Runner) getMetricsLoop( minWaitDuration := time.Second for { - metrics, err := r.doMetricsRequestIfEnabled(ctx, logger, timeout, updatedInformant.Consume) + metrics, err := r.doMetricsRequest(ctx, logger, timeout) if err != nil { logger.Error("Error making metrics request", zap.Error(err)) goto next @@ -526,158 +480,121 @@ func (r *Runner) getMetricsLoop( case <-minWait: } - // After waiting for the required minimum, allow shortcutting the normal wait if the - // informant was updated select { case <-ctx.Done(): return - case <-updatedInformant.Recv(): - logger.Info("Shortcutting normal metrics wait because informant was updated") case <-waitBetween: } - } } -type informantStateCallbacks struct { - resetInformant func(withLock func()) +type monitorStateCallbacks struct { + reset func(withLock func()) upscaleRequested func(request api.MoreResources, withLock func()) - registered func(active bool, withLock func()) setActive func(active bool, withLock func()) } -// serveInformantLoop repeatedly creates an InformantServer to handle communications with the VM -// informant -// -// This function directly sets the value of r.server and indirectly sets r.informant. -func (r *Runner) serveInformantLoop( +// connectToMonitorLoop does lifecycle management of the (re)connection to the vm-monitor +func (r *Runner) connectToMonitorLoop( ctx context.Context, logger *zap.Logger, - callbacks informantStateCallbacks, + callbacks monitorStateCallbacks, ) { - // variables set & accessed across loop iterations - var ( - normalRetryWait <-chan time.Time - minRetryWait <-chan time.Time - lastStart time.Time - ) + addr := fmt.Sprintf("ws://%s:%d/monitor", r.podIP, r.global.config.Monitor.ServerPort) - // Loop-invariant duration constants - minWait := time.Second * time.Duration(r.global.config.Informant.RetryServerMinWaitSeconds) - normalWait := time.Second * time.Duration(r.global.config.Informant.RetryServerNormalWaitSeconds) - retryRegister := time.Second * time.Duration(r.global.config.Informant.RegisterRetrySeconds) + minWait := time.Second * time.Duration(r.global.config.Monitor.ConnectionRetryMinWaitSeconds) + var lastStart time.Time -retryServer: - for { - if normalRetryWait != nil { - logger.Info("Retrying informant server after delay", zap.Duration("delay", normalWait)) - select { - case <-ctx.Done(): - return - case <-normalRetryWait: + for i := 0; ; i += 1 { + // Remove any prior Dispatcher from the Runner + if i != 0 { + func() { + r.lock.Lock() + defer r.lock.Unlock() + callbacks.reset(func() { + r.monitor.Store(nil) + logger.Info("Reset previous vm-monitor connection") + }) + }() + } + + // If the context was canceled, don't restart + if err := ctx.Err(); err != nil { + action := "attempt" + if i != 0 { + action = "retry " } + logger.Info( + fmt.Sprintf("Aborting vm-monitor connection %s because context is already canceled", action), + zap.Error(err), + ) + return } - if minRetryWait != nil { - select { - case <-minRetryWait: - logger.Info("Retrying informant server") - default: + // Delayed restart management, long because of friendly logging: + if i != 0 { + endTime := time.Now() + runtime := endTime.Sub(lastStart) + + if runtime > minWait { + logger.Info( + "Immediately retrying connection to vm-monitor", + zap.String("addr", addr), + zap.Duration("totalRuntime", runtime), + ) + } else { + delay := minWait - runtime logger.Info( - "Informant server ended quickly. Respecting minimum delay before restart", - zap.Duration("activeTime", time.Since(lastStart)), zap.Duration("delay", minWait), + "Connection to vm-monitor was not live for long, retrying after delay", + zap.Duration("delay", delay), + zap.Duration("totalRuntime", runtime), ) + select { + case <-time.After(delay): + logger.Info( + "Retrying connection to vm-monitor", + zap.Duration("delay", delay), + zap.Duration("waitTime", time.Since(endTime)), + zap.String("addr", addr), + ) case <-ctx.Done(): + logger.Info( + "Canceling retrying connection to vm-monitor", + zap.Duration("delay", delay), + zap.Duration("waitTime", time.Since(endTime)), + zap.Error(ctx.Err()), + ) return - case <-minRetryWait: } } + } else { + logger.Info("Connecting to vm-monitor", zap.String("addr", addr)) } - normalRetryWait = nil // only "long wait" if an error occurred - minRetryWait = time.After(minWait) - lastStart = time.Now() - - server, exited, err := NewInformantServer(ctx, logger, r, callbacks) - if ctx.Err() != nil { - if err != nil { - logger.Warn("Error starting informant server (but context canceled)", zap.Error(err)) - } - return - } else if err != nil { - normalRetryWait = time.After(normalWait) - logger.Error("Error starting informant server", zap.Error(err)) - continue retryServer + dispatcher, err := NewDispatcher(ctx, logger, addr, r, callbacks.upscaleRequested) + if err != nil { + logger.Error("Failed to connect to vm-monitor", zap.String("addr", addr), zap.Error(err)) + continue } - // Update r.server: + // Update runner to the new dispatcher func() { r.lock.Lock() defer r.lock.Unlock() - - var kind string - if r.server.Load() == nil { - kind = "Setting" - } else { - kind = "Updating" - } - - logger.Info(fmt.Sprintf("%s initial informant server", kind), zap.Object("server", server.desc)) - r.server.Store(server) + callbacks.setActive(true, func() { + r.monitor.Store(dispatcher) + logger.Info("Connected to vm-monitor") + }) }() - logger.Info("Registering with informant") - - // Try to register with the informant: - retryRegister: - for { - err := server.RegisterWithInformant(ctx, logger) - if err == nil { - break // all good; wait for the server to finish. - } else if ctx.Err() != nil { - if err != nil { - logger.Warn("Error registering with informant (but context cancelled)", zap.Error(err)) - } - return - } - - logger.Warn("Error registering with informant", zap.Error(err)) - - // Server exited; can't just retry registering. - if server.ExitStatus() != nil { - normalRetryWait = time.After(normalWait) - continue retryServer - } - - // Wait before retrying registering - logger.Info("Retrying registering with informant after delay", zap.Duration("delay", retryRegister)) - select { - case <-time.After(retryRegister): - continue retryRegister - case <-ctx.Done(): - return - } - } - - // Wait for the server to finish - select { - case <-ctx.Done(): - return - case <-exited.Recv(): - } + // Wait until the dispatcher is no longer running, either due to error or because the + // root-level Runner context was canceled. + <-dispatcher.ExitSignal() - // Server finished - exitStatus := server.ExitStatus() - if exitStatus == nil { - panic(errors.New("Informant server signalled end but ExitStatus() == nil")) + if err := dispatcher.ExitError(); err != nil { + logger.Error("Dispatcher for vm-monitor connection exited due to error", zap.Error(err)) } - - if !exitStatus.RetryShouldFix { - normalRetryWait = time.After(normalWait) - } - - continue retryServer } } @@ -696,7 +613,7 @@ func (r *Runner) trackSchedulerLoop( minWait time.Duration = 5 * time.Second // minimum time we have to wait between scheduler starts okForNew <-chan time.Time // channel that sends when we've waited long enough for a new scheduler currentInfo schedwatch.SchedulerInfo - fatal util.SignalReceiver + fatal util.SignalReceiver[struct{}] failed bool ) @@ -714,7 +631,7 @@ startScheduler: failed = false // Set the current scheduler - fatal = func() util.SignalReceiver { + fatal = func() util.SignalReceiver[struct{}] { logger := logger.With(zap.Object("scheduler", currentInfo)) verb := "Setting" @@ -722,7 +639,7 @@ startScheduler: verb = "Updating" } - sendFatal, recvFatal := util.NewSingleSignalPair() + sendFatal, recvFatal := util.NewSingleSignalPair[struct{}]() sched := &Scheduler{ runner: r, @@ -830,65 +747,13 @@ waitForNewScheduler: // Lower-level implementation functions // ////////////////////////////////////////// -// doMetricsRequestIfEnabled makes a single metrics request to the VM informant, returning it -// -// This method expects that the Runner is not locked. -func (r *Runner) doMetricsRequestIfEnabled( +// doMetricsRequest makes a single metrics request to the VM +func (r *Runner) doMetricsRequest( ctx context.Context, logger *zap.Logger, timeout time.Duration, - clearNewInformantSignal func(), ) (*api.Metrics, error) { - logger.Info("Attempting metrics request") - - // FIXME: the region where the lock is held should be extracted into a separate method, called - // something like buildMetricsRequest(). - - r.lock.Lock() - locked := true - defer func() { - if locked { - r.lock.Unlock() - } - }() - - // Only clear the signal once we've locked, so that we're not racing. - // - // We don't *need* to do this, but its only cost is a small amount of code complexity, and it's - // nice to have have the guarantees around not racing. - clearNewInformantSignal() - - if server := r.server.Load(); server == nil || server.mode != InformantServerRunning { - var state = "unset" - if server != nil { - state = string(server.mode) - } - - logger.Info(fmt.Sprintf("Cannot make metrics request because informant server is %s", state)) - return nil, nil - } - - if r.informant == nil { - panic(errors.New("r.informant == nil but r.server.mode == InformantServerRunning")) - } - - var url string - var handle func(body []byte) (*api.Metrics, error) - - switch { - case r.informant.MetricsMethod.Prometheus != nil: - url = fmt.Sprintf("http://%s:%d/metrics", r.podIP, r.informant.MetricsMethod.Prometheus.Port) - handle = func(body []byte) (*api.Metrics, error) { - m, err := api.ReadMetrics(body, r.global.config.Metrics.LoadMetricPrefix) - if err != nil { - err = fmt.Errorf("Error reading metrics from prometheus output: %w", err) - } - return &m, err - } - default: - // Ok to panic here because this should be handled by the informant server - panic(errors.New("server's InformantDesc has unknown metrics method")) - } + url := fmt.Sprintf("http://%s:%d/metrics", r.podIP, r.global.config.Metrics.Port) reqCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -898,10 +763,6 @@ func (r *Runner) doMetricsRequestIfEnabled( panic(fmt.Errorf("Error constructing metrics request to %q: %w", url, err)) } - // Unlock while we perform the request: - locked = false - r.lock.Unlock() - logger.Info("Making metrics request to VM", zap.String("url", url)) resp, err := http.DefaultClient.Do(req) @@ -921,7 +782,12 @@ func (r *Runner) doMetricsRequestIfEnabled( return nil, fmt.Errorf("Unsuccessful response status %d: %s", resp.StatusCode, string(body)) } - return handle(body) + m, err := api.ReadMetrics(body, r.global.config.Metrics.LoadMetricPrefix) + if err != nil { + return nil, fmt.Errorf("Error reading metrics from prometheus output: %w", err) + } + + return &m, nil } func (r *Runner) doNeonVMRequest(ctx context.Context, target api.Resources) error { @@ -990,6 +856,44 @@ func (r *Runner) recordResourceChange(current, target api.Resources, metrics res } } +func doMonitorDownscale( + ctx context.Context, + logger *zap.Logger, + dispatcher *Dispatcher, + target api.Resources, +) (*api.DownscaleResult, error) { + r := dispatcher.runner + rawResources := target.ConvertToAllocation(r.vm.Mem.SlotSize) + + timeout := time.Second * time.Duration(r.global.config.Monitor.ResponseTimeoutSeconds) + + res, err := dispatcher.Call(ctx, logger, timeout, "DownscaleRequest", api.DownscaleRequest{ + Target: rawResources, + }) + if err != nil { + return nil, err + } + + return res.Result, nil +} + +func doMonitorUpscale( + ctx context.Context, + logger *zap.Logger, + dispatcher *Dispatcher, + target api.Resources, +) error { + r := dispatcher.runner + rawResources := target.ConvertToAllocation(r.vm.Mem.SlotSize) + + timeout := time.Second * time.Duration(r.global.config.Monitor.ResponseTimeoutSeconds) + + _, err := dispatcher.Call(ctx, logger, timeout, "UpscaleNotification", api.UpscaleNotification{ + Granted: rawResources, + }) + return err +} + // DoRequest sends a request to the scheduler and does not validate the response. func (s *Scheduler) DoRequest( ctx context.Context, diff --git a/pkg/agent/schedwatch/trackcurrent.go b/pkg/agent/schedwatch/trackcurrent.go index 2e42e30a3..ace82c25f 100644 --- a/pkg/agent/schedwatch/trackcurrent.go +++ b/pkg/agent/schedwatch/trackcurrent.go @@ -24,7 +24,7 @@ type SchedulerWatch struct { cmd chan<- watchCmd using chan<- SchedulerInfo - stop util.SignalSender + stop util.SignalSender[struct{}] stopEventStream func() } @@ -42,7 +42,7 @@ func (w SchedulerWatch) Using(sched SchedulerInfo) { func (w SchedulerWatch) Stop() { w.stopEventStream() - w.stop.Send() + w.stop.Send(struct{}{}) } const schedulerNamespace string = "kube-system" @@ -69,7 +69,7 @@ func WatchSchedulerUpdates( deleted := make(chan SchedulerInfo) cmd := make(chan watchCmd) using := make(chan SchedulerInfo) - stopSender, stopListener := util.NewSingleSignalPair() + stopSender, stopListener := util.NewSingleSignalPair[struct{}]() state := schedulerWatchState{ queue: make([]WatchEvent, 0, 1), @@ -132,7 +132,7 @@ type schedulerWatchState struct { cmd <-chan watchCmd using <-chan SchedulerInfo - stop util.SignalReceiver + stop util.SignalReceiver[struct{}] logger *zap.Logger } diff --git a/pkg/api/VERSIONING.md b/pkg/api/VERSIONING.md index 899410d13..bbbd45f24 100644 --- a/pkg/api/VERSIONING.md +++ b/pkg/api/VERSIONING.md @@ -9,40 +9,14 @@ The table below should provide the necessary information. For each release, it g supported protocol versions by each component. The topmost line - "Current" - refers to the latest commit in this repository, possibly unreleased. -## agent<->informant protocol +## agent<->monitor protocol -| Release | autoscaler-agent | VM informant | -|---------|------------------|--------------| -| _Current_ | v1.0 - v2.0 | v2.0 - v2.0 | -| v0.11.0 | v1.0 - v2.0 | v2.0 - v2.0 | -| v0.10.0 | **v1.0 - v2.0** | **v2.0 - v2.0** | -| v0.9.0 | v1.0 - v1.2 | v1.1 - v1.2 | -| v0.8.0 | v1.0 - v1.2 | v1.1 - v1.2 | -| v0.7.2 | v1.0 - v1.2 | v1.1 - v1.2 | -| v0.7.1 | v1.0 - v1.2 | v1.1 - v1.2 | -| v0.7.0 | **v1.0 - v1.2** | **v1.1 - v1.2** | -| v0.6.0 | v1.0 - v1.1 | v1.1 only | -| v0.5.2 | v1.0 - v1.1 | v1.1 only | -| v0.5.1 | v1.0 - v1.1 | v1.1 only | -| v0.5.0 | v1.0 - v1.1 | v1.1 only | -| v0.1.17 | v1.0 - v1.1 | v1.1 only | -| v0.1.16 | v1.0 - v1.1 | v1.1 only | -| v0.1.15 | v1.0 - v1.1 | v1.1 only | -| v0.1.14 | v1.0 - v1.1 | v1.1 only | -| v0.1.13 | v1.0 - v1.1 | v1.1 only | -| v0.1.12 | v1.0 - v1.1 | v1.1 only | -| v0.1.11 | v1.0 - v1.1 | v1.1 only | -| v0.1.10 | v1.0 - v1.1 | v1.1 only | -| v0.1.9 | v1.0 - v1.1 | v1.1 only | -| v0.1.8 | v1.0 - v1.1 | v1.1 only | -| v0.1.7 | v1.0 - v1.1 | v1.1 only | -| v0.1.6 | v1.0 - v1.1 | v1.1 only | -| v0.1.5 | v1.0 - v1.1 | v1.1 only | -| v0.1.4 | **v1.0 - v1.1** | **v1.1** only | -| v0.1.3 | v1.0 only | v1.0 only | -| 0.1.2 | v1.0 only | v1.0 only | -| 0.1.1 | v1.0 only | v1.0 only | -| 0.1.0 | **v1.0** only | **v1.0** only | +| Release | autoscaler-agent | VM monitor | +|---------|------------------|------------| +| _Current_ | v1.0 only | v1.0 only | +| v0.17.0 | v1.0 only | v1.0 only | +| v0.16.0 | v1.0 only | v1.0 only | +| v0.15.0 | **v1.0** only | **v1.0** only | ## agent<->scheduler plugin protocol @@ -54,6 +28,19 @@ number. | Release | autoscaler-agent | Scheduler plugin | |---------|------------------|------------------| | _Current_ | v2.0 only | v1.0-v2.0 | +| v0.17.0 | v2.0 only | v1.0-v2.0 | +| v0.16.0 | v2.0 only | v1.0-v2.0 | +| v0.15.0 | v2.0 only | v1.0-v2.0 | +| v0.14.2 | v2.0 only | v1.0-v2.0 | +| v0.14.1 | v2.0 only | v1.0-v2.0 | +| v0.14.0 | v2.0 only | v1.0-v2.0 | +| v0.13.3 | v2.0 only | v1.0-v2.0 | +| v0.13.2 | v2.0 only | v1.0-v2.0 | +| v0.13.1 | v2.0 only | v1.0-v2.0 | +| v0.13.0 | v2.0 only | v1.0-v2.0 | +| v0.12.2 | v2.0 only | v1.0-v2.0 | +| v0.12.1 | v2.0 only | v1.0-v2.0 | +| v0.12.0 | v2.0 only | v1.0-v2.0 | | v0.11.0 | v2.0 only | v1.0-v2.0 | | v0.10.0 | v2.0 only | v1.0-v2.0 | | v0.9.0 | v2.0 only | v1.0-v2.0 | @@ -90,6 +77,19 @@ Note: Components v0.6.0 and below did not have a versioned protocol between the | Release | controller | runner | |---------|------------|--------| | _Current_ | 0 - 1 | 1 | +| v0.17.0 | 0 - 1 | 1 | +| v0.16.0 | 0 - 1 | 1 | +| v0.15.0 | 0 - 1 | 1 | +| v0.14.2 | 0 - 1 | 1 | +| v0.14.1 | 0 - 1 | 1 | +| v0.14.0 | 0 - 1 | 1 | +| v0.13.3 | 0 - 1 | 1 | +| v0.13.2 | 0 - 1 | 1 | +| v0.13.1 | 0 - 1 | 1 | +| v0.13.0 | 0 - 1 | 1 | +| v0.12.2 | 0 - 1 | 1 | +| v0.12.1 | 0 - 1 | 1 | +| v0.12.0 | 0 - 1 | 1 | | v0.11.0 | 0 - 1 | 1 | | v0.10.0 | 0 - 1 | 1 | | v0.9.0 | 0 - 1 | 1 | diff --git a/pkg/api/metrics.go b/pkg/api/metrics.go index 946799236..458be045b 100644 --- a/pkg/api/metrics.go +++ b/pkg/api/metrics.go @@ -11,8 +11,9 @@ import ( // Metrics gives the information pulled from node_exporter that the scheduler may use to prioritize // which pods it should migrate. type Metrics struct { - LoadAverage1Min float32 `json:"loadAvg1M"` - LoadAverage5Min float32 `json:"loadAvg5M"` + LoadAverage1Min float32 `json:"loadAvg1M"` + LoadAverage5Min float32 `json:"loadAvg5M"` + MemoryUsageBytes float32 `json:"memoryUsageBytes"` } // ReadMetrics generates Metrics from node_exporter output, or returns error on failure @@ -61,5 +62,17 @@ func ReadMetrics(nodeExporterOutput []byte, loadPrefix string) (m Metrics, err e return } + availableMem, err := getField(loadPrefix+"memory_available_bytes", "") + if err != nil { + return + } + totalMem, err := getField(loadPrefix+"memory_total_bytes", "") + if err != nil { + return + } + + // Add an extra 100 MiB to account for kernel memory usage + m.MemoryUsageBytes = totalMem - availableMem + 100*(1<<20) + return } diff --git a/pkg/api/types.go b/pkg/api/types.go index 77b3aac0b..c629405bf 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -1,10 +1,11 @@ package api import ( + "encoding/json" "errors" "fmt" + "reflect" - "github.com/google/uuid" "go.uber.org/zap/zapcore" "k8s.io/apimachinery/pkg/api/resource" @@ -232,11 +233,11 @@ func (r Resources) IncreaseFrom(old Resources) MoreResources { } } -// ConvertToRaw produces the RawResources equivalent to these Resources with the given slot size -func (r Resources) ConvertToRaw(memSlotSize *resource.Quantity) RawResources { - return RawResources{ - Cpu: r.VCPU.ToResourceQuantity(), - Memory: resource.NewQuantity(int64(r.Mem)*memSlotSize.Value(), resource.BinarySI), +// ConvertToRaw produces the Allocation equivalent to these Resources with the given slot size +func (r Resources) ConvertToAllocation(memSlotSize *resource.Quantity) Allocation { + return Allocation{ + Cpu: r.VCPU.ToResourceQuantity().AsApproximateFloat64(), + Mem: uint64(int64(r.Mem) * memSlotSize.Value()), } } @@ -273,251 +274,11 @@ type PluginResponse struct { // TODO: fill this with more information as required type MigrateResponse struct{} -/////////////////////////// -// VM Informant Messages // -/////////////////////////// - -// InformantProtoVersion represents a single version of the agent<->informant protocol -// -// Each version of the agent<->informant protocol is named independently from releases of the -// repository containing this code. Names follow semver, although this does not necessarily -// guarantee support - for example, the VM informant may only support versions above v1.1. -// -// Version compatibility is documented in the neighboring file VERSIONING.md. -type InformantProtoVersion uint32 - -const ( - // InformantProtoV1_0 represents v1.0 of the agent<->informant protocol - the initial version. - // - // Last used in release version 0.1.2. - InformantProtoV1_0 InformantProtoVersion = iota + 1 // +1 so we start from 1 - - // InformantProtoV1_1 represents v1.1 of the agent<->informant protocol. - // - // Changes from v1.0: - // - // * Adds /try-upscale endpoint to the autoscaler-agent. - // - // Last used in release version v0.6.0. - InformantProtoV1_1 - - // InformantProtoV1_2 represents v1.2 of the agent<->informant protocol. - // - // Changes from v1.1: - // - // * Adds /health-check endpoint to the vm-informant. - // - // Last used in release version v0.9.0 - InformantProtoV1_2 - - // InformantProtoV2_0 represents v2.0 of the agent<->informant protocol. - // - // Changes from v1.2: - // - // * Agents now return a AgentResourceMessage when notifying VM's of changes - // in resources on their /upscale and /downscale endpoints. Since - // RawResources (the response type in previous protocols) is not - // deserializable out of an AgentResourceMessage, this is a breaking - // change. - // - // Currently the latest version. - InformantProtoV2_0 - - // latestInformantProtoVersion represents the latest version of the agent<->informant protocol - // - // This value is kept private because it should not be used externally; any desired - // functionality that could be implemented with it should instead be a method on - // InformantProtoVersion. - latestInformantProtoVersion InformantProtoVersion = iota // excluding +1 makes it equal to previous -) - -func (v InformantProtoVersion) String() string { - var zero InformantProtoVersion - - switch v { - case zero: - return "" - case InformantProtoV1_0: - return "v1.0" - case InformantProtoV1_1: - return "v1.1" - case InformantProtoV1_2: - return "v1.2" - case InformantProtoV2_0: - return "v2.0" - default: - diff := v - latestInformantProtoVersion - return fmt.Sprintf("", latestInformantProtoVersion, diff) - } -} - -// IsValid returns whether the protocol version is valid. The zero value is not valid. -func (v InformantProtoVersion) IsValid() bool { - return uint(v) != 0 -} - -// HasTryUpscale returns whether this version of the protocol has the /try-upscale endpoint -// -// This is true for version v1.1 and greater. -func (v InformantProtoVersion) HasTryUpscale() bool { - return v >= InformantProtoV1_1 -} - -// AllowsHealthCheck returns whether this version of the protocol has the informant's /health-check -// endpoint -// -// This is true for version v1.2 and greater. -func (v InformantProtoVersion) AllowsHealthCheck() bool { - return v >= InformantProtoV1_2 -} - -// SignsResourceUpdates returns whether agents inform VMs of resource updates with an -// AgentResourceMessage in this version of the protocol -// -// This is true for version v2.0 and greater -func (v InformantProtoVersion) SignsResourceUpdates() bool { - return v >= InformantProtoV2_0 -} - -// AgentMessage is used for (almost) every message sent from the autoscaler-agent to the VM -// informant, and serves to wrap the type T with a SequenceNumber -// -// The SequenceNumber provides a total ordering of states, even if the ordering of HTTP requests and -// responses are out of order. Fundamentally this is required because we have bidirectional -// communication between the autoscaler-agent and VM informant — without it, we run the risk of racy -// behavior, which could *actually* result in data corruption. -type AgentMessage[T any] struct { - // Data is the content of the request or response - Data T `json:"data"` - - // SequenceNumber is a unique-per-instance monotonically increasing number passed in each - // non-initial message from the autoscaler-agent to the VM informant, both requests and - // responses. - SequenceNumber uint64 `json:"sequenceNumber"` -} - -// AgentDesc is the first message sent from an autoscaler-agent to a VM informant, describing some -// information about the autoscaler-agent -// -// Each time an autoscaler-agent (re)connects to a VM informant, it sends an AgentDesc to the -// "/register" endpoint. -// -// For more information on the agent<->informant protocol, refer to the top-level ARCHITECTURE.md -type AgentDesc struct { - // AgentID is a unique UUID for the current instance of the autoscaler-agent - // - // This is helpful so that we can distinguish between (incorrect) duplicate calls to /register - // and (correct) re-registering of an agent. - AgentID uuid.UUID `json:"agentID"` - - // ServeAddr gives the unique (per instance) - ServerAddr string `json:"agentServeAddr"` - - // MinProtoVersion is the minimum version of the agent<->informant protocol that the - // autoscaler-agent supports - // - // Protocol versions are always non-zero. - // - // AgentDesc must always have MinProtoVersion <= MaxProtoVersion. - MinProtoVersion InformantProtoVersion `json:"minProtoVersion"` - // MaxProtoVersion is the maximum version of the agent<->informant protocol that the - // autoscaler-agent supports, inclusive. - // - // Protocol versions are always non-zero. - // - // AgentDesc must always have MinProtoVersion <= MaxProtoVersion. - MaxProtoVersion InformantProtoVersion `json:"maxProtoVersion"` -} - -// MarshalLogObject implements zapcore.ObjectMarshaler, so that Resources can be used with zap.Object -func (d AgentDesc) MarshalLogObject(enc zapcore.ObjectEncoder) error { - enc.AddString("agentID", d.AgentID.String()) - enc.AddString("agentServeAddr", string(d.ServerAddr)) - enc.AddString("minProtoVersion", d.MinProtoVersion.String()) - enc.AddString("maxProtoVersion", d.MaxProtoVersion.String()) - return nil -} - -// ProtocolRange returns a VersionRange from d.MinProtoVersion to d.MaxProtoVersion. -func (d AgentDesc) ProtocolRange() VersionRange[InformantProtoVersion] { - return VersionRange[InformantProtoVersion]{ - Min: d.MinProtoVersion, - Max: d.MaxProtoVersion, - } -} - -type AgentIdentificationMessage = AgentMessage[AgentIdentification] - -// AgentIdentification affirms the AgentID of the autoscaler-agent in its initial response to a VM -// informant, on the /id endpoint. This response is always wrapped in an AgentMessage. A type alias -// for this is provided as AgentIdentificationMessage, for convenience. -type AgentIdentification struct { - // AgentID is the same AgentID as given in the AgentDesc initially provided to the VM informant - AgentID uuid.UUID `json:"agentID"` -} - -// InformantDesc describes the capabilities of a VM informant, in response to an autoscaler-agent's -// request on the /register endpoint -// -// For more information on the agent<->informant protocol, refer to the top-level ARCHITECTURE.md -type InformantDesc struct { - // ProtoVersion is the version of the agent<->informant protocol that the VM informant has - // selected - // - // If an autoscaler-agent is successfully registered, a well-behaved VM informant MUST respond - // with a ProtoVersion within the bounds of the agent's declared minimum and maximum protocol - // versions. If the VM informant does not use a protocol version within those bounds, then it - // MUST respond with an error status code. - ProtoVersion InformantProtoVersion `json:"protoVersion"` - - // MetricsMethod tells the autoscaler-agent how to fetch metrics from the VM - MetricsMethod InformantMetricsMethod `json:"metricsMethod"` -} - -// InformantMetricsMethod collects the options for ways the VM informant can report metrics -// -// At least one method *must* be provided in an InformantDesc, and more than one method gives the -// autoscaler-agent freedom to choose. -// -// We use this type so it's easier to ensure backwards compatibility with previous versions of the -// VM informant — at least during the rollout of new autoscaler-agent or VM informant versions. -type InformantMetricsMethod struct { - // Prometheus describes prometheus-format metrics, typically not through the informant itself - Prometheus *MetricsMethodPrometheus `json:"prometheus,omitempty"` -} - -// MetricsMethodPrometheus describes VM informant's metrics in the prometheus format, made available -// on a particular port -type MetricsMethodPrometheus struct { - Port uint16 `json:"port"` -} - -// InformantHealthCheckResp is the result of a successful request to a VM informant's /health-check -// endpoint. -type InformantHealthCheckResp struct{} - -// UnregisterAgent is the result of a successful request to a VM informant's /unregister endpoint -type UnregisterAgent struct { - // WasActive indicates whether the unregistered autoscaler-agent was the one in-use by the VM - // informant - WasActive bool `json:"wasActive"` -} - -// MoreResourcesRequest is the request type wrapping MoreResources that's sent by the VM informant -// to the autoscaler-agent's /try-upscale endpoint when the VM is urgently in need of more -// resources. -type MoreResourcesRequest struct { - MoreResources - - // ExpectedID is the expected AgentID of the autoscaler-agent - ExpectedID uuid.UUID `json:"expectedID"` -} - // MoreResources holds the data associated with a MoreResourcesRequest type MoreResources struct { - // Cpu is true if the VM informant is requesting more CPU + // Cpu is true if the vm-monitor is requesting more CPU Cpu bool `json:"cpu"` - // Memory is true if the VM informant is requesting more memory + // Memory is true if the vm-monitor is requesting more memory Memory bool `json:"memory"` } @@ -537,48 +298,6 @@ func (m MoreResources) And(cmp MoreResources) MoreResources { } } -// RawResources signals raw resource amounts, and is primarily used in communications with the VM -// informant because it doesn't know about things like memory slots. -// -// This is used in protocol versions <2. In later versions, AgentResourceMessage is used. -type RawResources struct { - Cpu *resource.Quantity `json:"cpu"` - Memory *resource.Quantity `json:"memory"` -} - -type AgentResourceMessage = AgentMessage[ResourceMessage] - -// Similar to RawResources, stores raw resource amounts. However, it also stores the ID of the agent -// notifying the VM of a change in resources. In protocol versions 2 and on, agents notify VM's of -// changes to their available resources with an AgentResourceMessage. This allows VM informants to verify -// the authenticity of the agent responding. -type ResourceMessage struct { - RawResources - Id AgentIdentification `json:"id"` -} - -// DownscaleResult is used by the VM informant to return whether it downscaled successfully, and -// some indication of its status when doing so -type DownscaleResult struct { - Ok bool - Status string -} - -// SuspendAgent is sent from the VM informant to the autoscaler-agent when it has been contacted by -// a new autoscaler-agent and wishes to switch to that instead -// -// Instead of just cutting off any connection(s) to the agent, the informant keeps it around in case -// the new one fails and it needs to fall back to the old one. -type SuspendAgent struct { - ExpectedID uuid.UUID `json:"expectedID"` -} - -// ResumeAgent is sent from the VM informant to the autoscaler-agent to resume contact when it was -// previously suspended. -type ResumeAgent struct { - ExpectedID uuid.UUID `json:"expectedID"` -} - //////////////////////////////////// // Controller <-> Runner Messages // //////////////////////////////////// @@ -606,3 +325,158 @@ const ( func (v RunnerProtoVersion) SupportsCgroupFractionalCPU() bool { return v >= RunnerProtoV1 } + +//////////////////////////////////// +// Agent <-> Monitor Messages // +//////////////////////////////////// + +// Represents the resources that a VM has been granted +type Allocation struct { + // Number of vCPUs + Cpu float64 `json:"cpu"` + + // Number of bytes + Mem uint64 `json:"mem"` +} + +// ** Types sent by monitor ** + +// This type is sent to the agent as a way to request immediate upscale. +// Since the agent cannot control if the agent will choose to upscale the VM, +// it does not return anything. If an upscale is granted, the agent will notify +// the monitor via an UpscaleConfirmation +type UpscaleRequest struct{} + +// This type is sent to the agent to confirm it successfully upscaled, meaning +// it increased its filecache and/or cgroup memory limits. The agent does not +// need to respond. +type UpscaleConfirmation struct{} + +// This type is sent to the agent to indicate if downscaling was successful. The +// agent does not need to respond. +type DownscaleResult struct { + Ok bool + Status string +} + +// ** Types sent by agent ** + +// This type is sent to the monitor to inform it that it has been granted a geater +// allocation. Once the monitor is done applying this new allocation (i.e, increasing +// file cache size, cgroup memory limits) it should reply with an UpscaleConfirmation. +type UpscaleNotification struct { + Granted Allocation `json:"granted"` +} + +// This type is sent to the monitor as a request to downscale its resource usage. +// Once the monitor has downscaled or failed to do so, it should respond with a +// DownscaleResult. +type DownscaleRequest struct { + Target Allocation `json:"target"` +} + +// ** Types shared by agent and monitor ** + +// This type can be sent by either party whenever they receive a message they +// cannot deserialize properly. +type InvalidMessage struct { + Error string `json:"error"` +} + +// This type can be sent by either party to signal that an error occured carrying +// out the other party's request, for example, the monitor erroring while trying +// to downscale. The receiving party can they log the error or propagate it as they +// see fit. +type InternalError struct { + Error string `json:"error"` +} + +// This type is sent as part of a bidirectional heartbeat between the monitor and +// agent. The check is initiated by the agent. +type HealthCheck struct{} + +// This function is used to prepare a message for serialization. Any data passed +// to the monitor should be serialized with this function. As of protocol v1.0, +// the following types maybe be sent to the monitor, and thus passed in: +// - DownscaleRequest +// - UpscaleNotification +// - InvalidMessage +// - InternalError +// - HealthCheck +func SerializeMonitorMessage(content any, id uint64) ([]byte, error) { + // The final type that gets sent over the wire + type Bundle struct { + Content any `json:"content"` + Type string `json:"type"` + Id uint64 `json:"id"` + } + + var typeStr string + switch content.(type) { + case DownscaleRequest: + typeStr = "DownscaleRequest" + case UpscaleNotification: + typeStr = "UpscaleNotification" + case InvalidMessage: + typeStr = "InvalidMessage" + case InternalError: + typeStr = "InternalError" + case HealthCheck: + typeStr = "HealthCheck" + default: + return nil, fmt.Errorf("unknown message type \"%s\"", reflect.TypeOf(content)) + } + + return json.Marshal(Bundle{ + Content: content, + Type: typeStr, + Id: id, + }) +} + +// MonitorProtoVersion represents a single version of the agent<->monitor protocol +// +// Each version of the agent<->monitor protocol is named independently from releases of the +// repository containing this code. Names follow semver, although this does not necessarily +// guarantee support - for example, the monitor may only support versions above v1.1. +// +// Version compatibility is documented in the neighboring file VERSIONING.md. +type MonitorProtoVersion uint32 + +const ( + // MonitorProtoV1_0 represents v1.0 of the agent<->monitor protocol - the initial version. + // + // Currently the lastest version. + MonitorProtoV1_0 = iota + 1 + + // latestMonitorProtoVersion represents the latest version of the agent<->Monitor protocol + // + // This value is kept private because it should not be used externally; any desired + // functionality that could be implemented with it should instead be a method on + // MonitorProtoVersion. + latestMonitorProtoVersion MonitorProtoVersion = iota // excluding +1 makes it equal to previous +) + +func (v MonitorProtoVersion) String() string { + var zero MonitorProtoVersion + + switch v { + case zero: + return "" + case MonitorProtoV1_0: + return "v1.0" + default: + diff := v - latestMonitorProtoVersion + return fmt.Sprintf("", latestMonitorProtoVersion, diff) + } +} + +// Sent back by the monitor after figuring out what protocol version we should use +type MonitorProtocolResponse struct { + // If `Error` is nil, contains the value of the settled on protocol version. + // Otherwise, will be set to 0 (MonitorProtocolVersion's zero value). + Version MonitorProtoVersion `json:"version,omitempty"` + + // Will be nil if no error occured. + Error *string `json:"error,omitempty"` +} diff --git a/pkg/api/versionutils.go b/pkg/api/versionutils.go index 1a79d8035..217cd5cb4 100644 --- a/pkg/api/versionutils.go +++ b/pkg/api/versionutils.go @@ -13,9 +13,12 @@ import ( // VersionRange is a helper type to represent a range of versions. // // The bounds are inclusive, representing all versions v with Min <= v <= Max. +// +// This type is sent directly to the monitor during the creation of a new +// Dispatcher as part of figuring out which protocol to use. type VersionRange[V constraints.Ordered] struct { - Min V - Max V + Min V `json:"min"` + Max V `json:"max"` } func (r VersionRange[V]) String() string { diff --git a/pkg/api/vminfo.go b/pkg/api/vminfo.go index ad5377a7a..8e2f37af7 100644 --- a/pkg/api/vminfo.go +++ b/pkg/api/vminfo.go @@ -213,7 +213,7 @@ func ExtractVmInfo(logger *zap.Logger, vm *vmapi.VirtualMachine) (*VmInfo, error } func (vm VmInfo) EqualScalingBounds(cmp VmInfo) bool { - return vm.Min() != cmp.Min() || vm.Max() != cmp.Max() + return vm.Min() == cmp.Min() && vm.Max() == cmp.Max() } func (vm *VmInfo) applyBounds(b ScalingBounds) { @@ -277,6 +277,11 @@ type ScalingConfig struct { // CPU, // scaling CPU to make this happen. LoadAverageFractionTarget float64 `json:"loadAverageFractionTarget"` + + // MemoryUsageFractionTarget sets the desired fraction of current memory that + // we would like to be using. For example, with a value of 0.7, on a 4GB VM + // we'd like to be using 2.8GB of memory. + MemoryUsageFractionTarget float64 `json:"memoryUsageFractionTarget"` } func (c *ScalingConfig) Validate() error { @@ -288,6 +293,10 @@ func (c *ScalingConfig) Validate() error { erc.Whenf(ec, c.LoadAverageFractionTarget < 0.0, "%s must be set to value >= 0", ".loadAverageFractionTarget") erc.Whenf(ec, c.LoadAverageFractionTarget >= 2.0, "%s must be set to value < 2 ", ".loadAverageFractionTarget") + // Make sure c.MemoryUsageFractionTarget is between 0 and 1 + erc.Whenf(ec, c.MemoryUsageFractionTarget < 0.0, "%s must be set to value >= 0", ".memoryUsageFractionTarget") + erc.Whenf(ec, c.MemoryUsageFractionTarget >= 1.0, "%s must be set to value < 1 ", ".memoryUsageFractionTarget") + // heads-up! some functions elsewhere depend on the concrete return type of this function. return ec.Resolve() } diff --git a/pkg/api/vminfo_test.go b/pkg/api/vminfo_test.go index a78d6d414..5404acacc 100644 --- a/pkg/api/vminfo_test.go +++ b/pkg/api/vminfo_test.go @@ -29,12 +29,13 @@ func TestFormatting(t *testing.T) { }, ScalingConfig: &api.ScalingConfig{ LoadAverageFractionTarget: 0.7, + MemoryUsageFractionTarget: 0.7, }, AlwaysMigrate: false, ScalingEnabled: true, }) - defaultFormat := "{Name:foo Namespace:bar Cpu:{Min:1 Max:5 Use:3.75} Mem:{Min:2 Max:6 Use:4 SlotSize:1Gi} ScalingConfig:&{LoadAverageFractionTarget:0.7} AlwaysMigrate:false ScalingEnabled:true}" - goSyntaxRepr := `api.VmInfo{Name:"foo", Namespace:"bar", Cpu:api.VmCpuInfo{Min:api.MilliCPU(1000), Max:api.MilliCPU(5000), Use:api.MilliCPU(3750)}, Mem:api.VmMemInfo{Min:2, Max:6, Use:4, SlotSize:&resource.Quantity{i:resource.int64Amount{value:1073741824, scale:0}, d:resource.infDecAmount{Dec:(*inf.Dec)(nil)}, s:"1Gi", Format:"BinarySI"}}, ScalingConfig:&api.ScalingConfig{LoadAverageFractionTarget:0.7}, AlwaysMigrate:false, ScalingEnabled:true}` + defaultFormat := "{Name:foo Namespace:bar Cpu:{Min:1 Max:5 Use:3.75} Mem:{Min:2 Max:6 Use:4 SlotSize:1Gi} ScalingConfig:&{LoadAverageFractionTarget:0.7 MemoryUsageFractionTarget:0.7} AlwaysMigrate:false ScalingEnabled:true}" + goSyntaxRepr := `api.VmInfo{Name:"foo", Namespace:"bar", Cpu:api.VmCpuInfo{Min:api.MilliCPU(1000), Max:api.MilliCPU(5000), Use:api.MilliCPU(3750)}, Mem:api.VmMemInfo{Min:2, Max:6, Use:4, SlotSize:&resource.Quantity{i:resource.int64Amount{value:1073741824, scale:0}, d:resource.infDecAmount{Dec:(*inf.Dec)(nil)}, s:"1Gi", Format:"BinarySI"}}, ScalingConfig:&api.ScalingConfig{LoadAverageFractionTarget:0.7, MemoryUsageFractionTarget:0.7}, AlwaysMigrate:false, ScalingEnabled:true}` cases := []struct { name string expected string diff --git a/pkg/billing/client.go b/pkg/billing/client.go index 2514744f2..6dfcf0334 100644 --- a/pkg/billing/client.go +++ b/pkg/billing/client.go @@ -11,6 +11,7 @@ import ( "time" "github.com/google/uuid" + "github.com/lithammer/shortuuid" ) type Client struct { @@ -27,58 +28,53 @@ func NewClient(url string, c *http.Client) Client { return Client{BaseURL: url, httpc: c, hostname: hostname} } -func (c Client) NewBatch() *Batch { return &Batch{c: c, events: nil} } - -type Batch struct { - // Q: does this need a mutex? - c Client - events []any -} - -// Count returns the number of events in the batch -func (b *Batch) Count() int { - return len(b.events) +func (c Client) Hostname() string { + return c.hostname } -func (b *Batch) idempotenize(key string) string { - if key != "" { - return key - } +type TraceID string - return fmt.Sprintf("Host<%s>:ID<%s>:T<%s>", b.c.hostname, uuid.NewString(), time.Now().Format(time.RFC3339)) +func (c Client) GenerateTraceID() TraceID { + return TraceID(shortuuid.New()) } -func (b *Batch) AddAbsoluteEvent(e AbsoluteEvent) { - e.Type = "absolute" - e.IdempotencyKey = b.idempotenize(e.IdempotencyKey) - b.events = append(b.events, &e) -} +// Enrich sets the event's Type and IdempotencyKey fields, so that users of this API don't need to +// manually set them +func Enrich[E Event](hostname string, event E) E { + event.setType() + + key := event.getIdempotencyKey() + if *key == "" { + *key = fmt.Sprintf("Host<%s>:ID<%s>:T<%s>", hostname, uuid.NewString(), time.Now().Format(time.RFC3339)) + } -func (b *Batch) AddIncrementalEvent(e IncrementalEvent) { - e.Type = "incremental" - e.IdempotencyKey = b.idempotenize(e.IdempotencyKey) - b.events = append(b.events, &e) + return event } -func (b *Batch) Send(ctx context.Context) error { - if len(b.events) == 0 { +// Send attempts to push the events to the remote endpoint. +// +// On failure, the error is guaranteed to be one of: JSONError, RequestError, or +// UnexpectedStatusCodeError. +func Send[E Event](ctx context.Context, client Client, traceID TraceID, events []E) error { + if len(events) == 0 { return nil } payload, err := json.Marshal(struct { - Events []any `json:"events"` - }{Events: b.events}) + Events []E `json:"events"` + }{Events: events}) if err != nil { return err } - r, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/usage_events", b.c.BaseURL), bytes.NewReader(payload)) + r, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("%s/usage_events", client.BaseURL), bytes.NewReader(payload)) if err != nil { return err } r.Header.Set("content-type", "application/json") + r.Header.Set("x-trace-id", string(traceID)) - resp, err := b.c.httpc.Do(r) + resp, err := client.httpc.Do(r) if err != nil { return err } @@ -87,9 +83,8 @@ func (b *Batch) Send(ctx context.Context) error { // theoretically if wanted/needed, we should use an http handler that // does the retrying, to avoid writing that logic here. if resp.StatusCode != http.StatusOK { - return fmt.Errorf("got code %d, posting %d events", resp.StatusCode, len(b.events)) + return fmt.Errorf("got code %d, posting %d events", resp.StatusCode, len(events)) } - b.events = nil return nil } diff --git a/pkg/billing/model.go b/pkg/billing/model.go index c3fcedb53..f66072bfb 100644 --- a/pkg/billing/model.go +++ b/pkg/billing/model.go @@ -4,6 +4,32 @@ import ( "time" ) +type Event interface { + *AbsoluteEvent | *IncrementalEvent + + // eventMethods must be separate from Event so that we can assert that *AbsoluteEvent and + // *IncrementalEvent both implement it - Go does not allow converting to a value of type Event + // because it contains "*AbsoluteEvent | *IncrementalEvent", and such constraints can only be + // used inside of generics. + eventMethods +} + +// eventMethods is a requirement for Event, but exists separately so that we can assert that the +// event types implement it. +// +// The reason this interface even exists in the first place is because we're not allowed to assume +// that a type E implementing Event actually has the common fields from AbsoluteEvent and +// IncrementalEvent, even though it's constrained to either of those types. +type eventMethods interface { + setType() + getIdempotencyKey() *string +} + +var ( + _ eventMethods = (*AbsoluteEvent)(nil) + _ eventMethods = (*IncrementalEvent)(nil) +) + type AbsoluteEvent struct { IdempotencyKey string `json:"idempotency_key"` MetricName string `json:"metric"` @@ -14,6 +40,16 @@ type AbsoluteEvent struct { Value int `json:"value"` } +// setType implements eventMethods +func (e *AbsoluteEvent) setType() { + e.Type = "absolute" +} + +// getIdempotencyKey implements eventMethods +func (e *AbsoluteEvent) getIdempotencyKey() *string { + return &e.IdempotencyKey +} + type IncrementalEvent struct { IdempotencyKey string `json:"idempotency_key"` MetricName string `json:"metric"` @@ -23,3 +59,13 @@ type IncrementalEvent struct { StopTime time.Time `json:"stop_time"` Value int `json:"value"` } + +// setType implements eventMethods +func (e *IncrementalEvent) setType() { + e.Type = "incremental" +} + +// getIdempotencyKey implements eventMethods +func (e *IncrementalEvent) getIdempotencyKey() *string { + return &e.IdempotencyKey +} diff --git a/pkg/informant/agent.go b/pkg/informant/agent.go deleted file mode 100644 index 87ba98f6f..000000000 --- a/pkg/informant/agent.go +++ /dev/null @@ -1,875 +0,0 @@ -package informant - -// This file contains the "client" methods for communicating with an autoscaler-agent - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "io" - "net/http" - "sync" - "time" - - "github.com/google/uuid" - "go.uber.org/zap" - "go.uber.org/zap/zapcore" - "golang.org/x/exp/slices" - - "github.com/neondatabase/autoscaling/pkg/api" - "github.com/neondatabase/autoscaling/pkg/util" -) - -// The VM informant currently supports v1.1 and v1.2 of the agent<->informant protocol. -// -// If you update either of these values, make sure to also update VERSIONING.md. -const ( - MinProtocolVersion api.InformantProtoVersion = api.InformantProtoV2_0 - MaxProtocolVersion api.InformantProtoVersion = api.InformantProtoV2_0 -) - -// AgentSet is the global state handling various autoscaler-agents that we could connect to -type AgentSet struct { - lock util.ChanMutex - - baseLogger *zap.Logger - - // current is the agent we're currently communciating with. If there is none, then this value is - // nil - // - // This value may (temporarily) be nil even when there are other agents waiting in byIDs/byTime, - // because we rely on tryNewAgents to handle setting the value here. - current *Agent - - // wantsMemoryUpscale is true if the most recent (internal) request for immediate upscaling has - // not yet been answered (externally) by notification of an upscale from the autoscaler-agent. - wantsMemoryUpscale bool - - // byIDs stores all of the agents, indexed by their unique IDs - byIDs map[uuid.UUID]*Agent - // byTime stores all of the *successfully registered* agents, sorted in increasing order of - // their initial /register request. Agents that we're currently in the process of handling will - // be present in byIDs, but not here. - byTime []*Agent - - tryNewAgent chan<- struct{} -} - -type Agent struct { - // lock is required for accessing the mutable fields of this struct: parent and lastSeqNumber. - lock sync.Mutex - - baseLogger *zap.Logger - - // parent is the AgentSet containing this Agent. It is always non-nil, up until this Agent is - // unregistered with EnsureUnregistered() - parent *AgentSet - - // suspended is true if this Agent was last sent a request on /suspend. This is only ever set by - suspended bool - - // unregistered signalled when the agent is unregistered (due to an error or an /unregister - // request) - unregistered util.SignalReceiver - // Sending half of unregistered — only used by EnsureUnregistered() - signalUnregistered util.SignalSender - - id uuid.UUID - serverAddr string - - protoVersion api.InformantProtoVersion - - // all sends on requestQueue are made through the doRequest method; all receives are made from - // the runHandler background task. - requestQueue chan agentRequest - lastSeqNumber uint64 -} - -type agentRequest struct { - ctx context.Context - done util.SignalSender - doRequest func(context.Context, *http.Client) -} - -// NewAgentSet creates a new AgentSet and starts the necessary background tasks -// -// On completion, the background tasks should be ended with the Stop method. -func NewAgentSet(logger *zap.Logger) *AgentSet { - tryNewAgent := make(chan struct{}) - - agents := &AgentSet{ - lock: util.NewChanMutex(), - baseLogger: logger.Named("agent-set"), - current: nil, - wantsMemoryUpscale: false, - byIDs: make(map[uuid.UUID]*Agent), - byTime: []*Agent{}, - tryNewAgent: tryNewAgent, - } - - go agents.lock.DeadlockChecker(CheckDeadlockTimeout, CheckDeadlockDelay)(context.TODO()) - go agents.tryNewAgents(agents.baseLogger.Named("try-new-agents"), tryNewAgent) - return agents -} - -// Helper function to construct a zap.Field giving the necessary context for a particular -// autoscaler-agent -func agentZapField(id uuid.UUID, addr string) zap.Field { - return zap.Object("agent", zapcore.ObjectMarshalerFunc(func(enc zapcore.ObjectEncoder) error { - enc.AddString("id", id.String()) - enc.AddString("addr", addr) - return nil - })) -} - -// abbreviation for agentZapField(a.id, a.serveAddr) for when you're working with an Agent object directly -func (a *Agent) zapField() zap.Field { - return agentZapField(a.id, a.serverAddr) -} - -func (s *AgentSet) tryNewAgents(logger *zap.Logger, signal <-chan struct{}) { - // note: we don't close this. Sending stops when the context is done, and every read from this - // channel also handles the context being cancelled. - aggregate := make(chan struct{}) - - // Helper function to coalesce repeated incoming signals into a single output, so that we don't - // block anything from sending on signal - go func() { - noSignal: - <-signal - - yesSignal: - select { - case <-signal: - goto yesSignal - case aggregate <- struct{}{}: - goto noSignal - } - }() - - for { - <-aggregate - - // Loop through applicable Agents - loopThroughAgents: - for { - // Remove any duplicate signals from aggregate if there are any - select { - case <-aggregate: - default: - } - - candidate := func() *Agent { - s.lock.Lock() - defer s.lock.Unlock() - - if len(s.byTime) == 0 || s.current == s.byTime[len(s.byTime)-1] { - return nil - } - - return s.byTime[len(s.byTime)-1] - }() - - // If there's no remaining candidates, stop trying. - if candidate == nil { - break loopThroughAgents - } - - // Do we need to resume the agent? We will use this later - shouldResume := func() bool { - candidate.lock.Lock() - defer candidate.lock.Unlock() - - wasSuspended := candidate.suspended - candidate.suspended = false - return !wasSuspended - }() - - // Get the current agent, which we would like to replace with the candidate. - // We should suspend the old agent. - oldCurrent := func() (old *Agent) { - s.lock.Lock() - defer s.lock.Unlock() - - if s.current != nil { - s.current.suspended = true - } - - return s.current - }() - - if oldCurrent != nil { - handleError := func(err error) { - if errors.Is(err, context.Canceled) { - return - } - - logger.Warn("Error suspending previous Agent", oldCurrent.zapField(), zap.Error(err)) - } - - // Suspend the old agent - oldCurrent.Suspend(logger, AgentSuspendTimeout, handleError) - } - - if shouldResume { - if err := candidate.Resume(logger, AgentResumeTimeout); err != nil { - // From Resume(): - // - // > If the Agent becomes unregistered [ ... ] this method will return - // > context.Canceled - if err == context.Canceled { //nolint:errorlint // explicit error value guarantee from Resume() - continue loopThroughAgents - } - - // From Resume(): - // - // > If the request fails, the Agent will be unregistered - // - // We don't have to worry about anything extra here; just keep trying. - if err != nil { - logger.Warn("Error on Agent resume", candidate.zapField(), zap.Error(err)) - continue loopThroughAgents - } - } - } - - // Set the new agent, and do an upscale if it was requested. - func() { - s.lock.Lock() - defer s.lock.Unlock() - - s.current = candidate - - if s.wantsMemoryUpscale { - s.current.SpawnRequestUpscale(logger, AgentUpscaleTimeout, func(err error) { - if errors.Is(err, context.Canceled) { - return - } - - // note: explicitly refer to candidate here instead of s.current, because - // the value of s.current could have changed by the time this function is - // called. - logger.Error("Error requesting upscale from Agent", candidate.zapField(), zap.Error(err)) - }) - } - }() - } - } -} - -// RegisterNewAgent instantiates our local information about the autsocaler-agent -// -// Returns: protocol version, status code, error (if unsuccessful) -func (s *AgentSet) RegisterNewAgent(logger *zap.Logger, info *api.AgentDesc) (api.InformantProtoVersion, int, error) { - expectedRange := api.VersionRange[api.InformantProtoVersion]{ - Min: MinProtocolVersion, - Max: MaxProtocolVersion, - } - - descProtoRange := info.ProtocolRange() - - protoVersion, matches := expectedRange.LatestSharedVersion(descProtoRange) - if !matches { - return 0, 400, fmt.Errorf( - "Protocol version mismatch: Need %v but got %v", expectedRange, descProtoRange, - ) - } - - unregisterSend, unregisterRecv := util.NewSingleSignalPair() - - agent := &Agent{ - lock: sync.Mutex{}, - - baseLogger: s.baseLogger.Named("agent").With(agentZapField(info.AgentID, info.ServerAddr)), - parent: s, - - suspended: false, - unregistered: unregisterRecv, - signalUnregistered: unregisterSend, - - id: info.AgentID, - serverAddr: info.ServerAddr, - - protoVersion: protoVersion, - - lastSeqNumber: 0, - requestQueue: make(chan agentRequest), - } - - // Try to add the agent, if we can. - isDuplicate := func() bool { - s.lock.Lock() - defer s.lock.Unlock() - - if _, ok := s.byIDs[info.AgentID]; ok { - return true - } - - s.byIDs[info.AgentID] = agent - return false - }() - - if isDuplicate { - return 0, 409, fmt.Errorf("Agent with ID %s is already registered", info.AgentID) - } - - go agent.runHandler() - go agent.runBackgroundChecker() - - if err := agent.CheckID(logger, AgentBackgroundCheckTimeout); err != nil { - return 0, 400, fmt.Errorf( - "Error checking ID for agent %s/%s: %w", agent.serverAddr, agent.id, err, - ) - } - - // note: At this point, the agent has been appropriately established, but we haven't added it to - // the AgentSet's list of successfully registered Agents - func() { - // We have to acquire a lock on the Agent state here so that we don't have a race from a - // concurrent call to EnsureUnregistered(). - agent.lock.Lock() - defer agent.lock.Unlock() - - if agent.parent == nil { - // Something caused the Agent to be unregistered. We don't know what, but it wasn't the - // fault of this request. Because there's no strict happens-before relation here, we can - // pretend like the error happened after the request was fully handled, and return a - // success. - logger.Warn("Agent was unregistered before register completed", agent.zapField()) - return - } - - s.lock.Lock() - defer s.lock.Unlock() - - s.byTime = append(s.byTime, agent) - s.tryNewAgent <- struct{}{} - }() - - return protoVersion, 200, nil -} - -// RequestUpscale requests an immediate upscale for more memory, if there's an agent currently -// enabled -// -// If there's no current agent, then RequestUpscale marks the upscale as desired, and will request -// upscaling from the next agent we connect to. -func (s *AgentSet) RequestUpscale(logger *zap.Logger) { - // FIXME: we should assign a timeout to these upscale requests, so that we don't continue trying - // to upscale after the demand has gone away. - - agent := func() *Agent { - s.lock.Lock() - defer s.lock.Unlock() - - // If we already have an ongoing request, don't create a new one. - if s.wantsMemoryUpscale { - return nil - } - - s.wantsMemoryUpscale = true - return s.current - }() - - if agent == nil { - return - } - - // FIXME: it's possible to block for an unbounded amount of time waiting for the request to get - // picked up by the message queue. We *do* want backpressure here, but we should ideally have a - // way to cancel an attempted request if it's taking too long. - agent.SpawnRequestUpscale(logger, AgentUpscaleTimeout, func(err error) { - if errors.Is(err, context.Canceled) { - return - } - - s.baseLogger.Error("Error requesting upscale from current Agent", agent.zapField(), zap.Error(err)) - }) -} - -// ReceivedUpscale marks any desired upscaling from a prior s.RequestUpscale() as resolved -// -// Typically, (*CgroupState).ReceivedUpscale() is also called alongside this method. -func (s *AgentSet) ReceivedUpscale() { - s.lock.Lock() - defer s.lock.Unlock() - - s.wantsMemoryUpscale = false -} - -// Returns the current agent, which can be nil -func (s *AgentSet) Current() *Agent { - s.lock.Lock() - defer s.lock.Unlock() - return s.current -} - -// Returns the id of the AgentSet's current agent as a string. If the current agent is nil, -// returns "" -func (s *AgentSet) CurrentIdStr() string { - if current := s.Current(); current == nil { - return "" - } else { - return current.id.String() - } -} - -// Get returns the requested Agent, if it exists -func (s *AgentSet) Get(id uuid.UUID) (_ *Agent, ok bool) { - s.lock.Lock() - defer s.lock.Unlock() - - agent, ok := s.byIDs[id] - return agent, ok -} - -// runHandler receives inputs from the requestSet and dispatches them -func (a *Agent) runHandler() { - logger := a.baseLogger.Named("request-dispatcher") - - client := http.Client{ - CheckRedirect: func(req *http.Request, via []*http.Request) error { - err := fmt.Errorf("Unexpected redirect getting %s", req.URL) - logger.Warn(err.Error()) - return err - }, - } - - defer client.CloseIdleConnections() - - for { - // Ignore items in the requestQueue if the Agent's been unregistered. - select { - case <-a.unregistered.Recv(): - return - default: - } - - select { - case <-a.unregistered.Recv(): - return - case req := <-a.requestQueue: - func() { - reqCtx, cancel := context.WithCancel(req.ctx) - defer cancel() - - done := make(chan struct{}) - go func() { - defer req.done.Send() - defer close(done) - req.doRequest(reqCtx, &client) - }() - - select { - case <-a.unregistered.Recv(): - cancel() - // Even if we've just cancelled it, we have to wait on done so that we know the - // http.Client won't be used by other goroutines - <-done - case <-done: - } - }() - } - } -} - -// runBackgroundChecker performs periodic checks that the Agent is still available -func (a *Agent) runBackgroundChecker() { - logger := a.baseLogger.Named("background-checker") - - for { - select { - case <-a.unregistered.Recv(): - return - case <-time.After(AgentBackgroundCheckDelay): - // all good - } - - done := func() bool { - if err := a.CheckID(logger, AgentBackgroundCheckTimeout); err != nil { - // If this request was cancelled (because the agent was unregistered), we're done. - // We can't check a.unregistered because CheckID will already unregister on failure - // anyways. - if errors.Is(err, context.Canceled) { - return true - } - - logger.Warn("Agent background check failed", zap.Error(err)) - return true - } - - return false - }() - - if done { - return - } - } -} - -// doRequest is the generic wrapper around requests to the autoscaler-agent to ensure that we're -// only sending one at a time AND we appropriately keep track of sequence numbers. -// -// We can only send one at a time because http.Client isn't thread-safe, and we want to re-use it -// between requests so that we can keep the TCP connections alive. -// -// There are no guarantees made about the equality or content of errors returned from this function. -func doRequest[B any, R any]( - agent *Agent, - timeout time.Duration, - method string, - path string, - body *B, -) (_ *R, old bool, _ error) { - return doRequestWithStartSignal[B, R]( - agent, timeout, nil, method, path, body, - ) -} - -func doRequestWithStartSignal[B any, R any]( - agent *Agent, - timeout time.Duration, - start *util.SignalSender, - method string, - path string, - body *B, -) (_ *R, old bool, _ error) { - logger := agent.baseLogger.Named("http") - - outerContext, cancel := context.WithTimeout(context.TODO(), timeout) - defer cancel() - - var ( - responseBody api.AgentMessage[R] - oldSeqNum bool - requestErr error - ) - - sendDone, recvDone := util.NewSingleSignalPair() - - url := fmt.Sprintf("http://%s%s", agent.serverAddr, path) - - req := agentRequest{ - ctx: outerContext, - done: sendDone, - doRequest: func(ctx context.Context, client *http.Client) { - bodyBytes, err := json.Marshal(body) - if err != nil { - requestErr = fmt.Errorf("Error encoding JSON body: %w", err) - return - } - - req, err := http.NewRequestWithContext(ctx, method, url, bytes.NewReader(bodyBytes)) - if err != nil { - requestErr = fmt.Errorf("Error creating request: %w", err) - return - } - - logger.Info("Sending request to agent", zap.String("path", path), zap.Any("request", body)) - - resp, err := client.Do(req) - if err != nil { - requestErr = err - return - } - - defer resp.Body.Close() - - respBodyBytes, err := io.ReadAll(resp.Body) - if err != nil { - requestErr = fmt.Errorf("Error reading response body: %w", err) - return - } - if resp.StatusCode != 200 { - requestErr = fmt.Errorf( - "Unsuccessful response status %d: %s", - resp.StatusCode, string(respBodyBytes), - ) - return - } - if err := json.Unmarshal(respBodyBytes, &responseBody); err != nil { - requestErr = fmt.Errorf("Error reading response as JSON: %w", err) - return - } - - logger.Info("Received response from agent", zap.String("path", path), zap.Any("response", responseBody)) - - if responseBody.SequenceNumber == 0 { - requestErr = errors.New("Got invalid sequence number 0") - return - } - - // Acquire the Agent's lock so we can check the sequence number - agent.lock.Lock() - defer agent.lock.Unlock() - - if agent.lastSeqNumber < responseBody.SequenceNumber { - agent.lastSeqNumber = responseBody.SequenceNumber - } else { - oldSeqNum = true - } - }, - } - - // Try to queue the request - select { - case <-outerContext.Done(): - // Timeout reached - return nil, false, outerContext.Err() - case <-agent.unregistered.Recv(): - return nil, false, context.Canceled - case agent.requestQueue <- req: - // Continue as normal - } - - if start != nil { - start.Send() - } - - // At this point, runHandler is appropriately handling the request, and will call - // sendDone.Send() the attempt at the request is finished. We don't need to worry about handling - // timeouts & unregistered Agents ourselves. - <-recvDone.Recv() - - if requestErr != nil { - return nil, oldSeqNum, requestErr - } else { - return &responseBody.Data, oldSeqNum, nil - } -} - -// EnsureUnregistered unregisters the Agent if it is currently registered, signalling the AgentSet -// to use a new Agent if it isn't already -// -// Returns whether the agent was the current Agent in use. -func (a *Agent) EnsureUnregistered(logger *zap.Logger) (wasCurrent bool) { - logger = logger.With(a.zapField()) - - a.lock.Lock() - defer a.lock.Unlock() - - if a.parent == nil { - return - } - - logger.Info("Unregistering agent") - - a.signalUnregistered.Send() - - a.parent.lock.Lock() - defer a.parent.lock.Unlock() - - if _, ok := a.parent.byIDs[a.id]; ok { - delete(a.parent.byIDs, a.id) - } else { - logger.DPanic("Invalid state. Ignoring and continuing.", zap.String("error", "agent is registered but not in parent's agents map")) - } - - if idx := slices.Index(a.parent.byTime, a); idx >= 0 { - a.parent.byTime = slices.Delete(a.parent.byTime, idx, idx+1) - } - - if a.parent.current == a { - wasCurrent = true - a.parent.current = nil - a.parent.tryNewAgent <- struct{}{} - } - - a.parent = nil - - return -} - -// CheckID checks that the Agent's ID matches what's expected -// -// If the agent has already been registered, then a failure in this method will unregister the -// agent. -// -// If the Agent is unregistered before the call to CheckID() completes, the request will be cancelled -// and this method will return context.Canceled. -func (a *Agent) CheckID(logger *zap.Logger, timeout time.Duration) error { - // Quick unregistered check: - select { - case <-a.unregistered.Recv(): - logger.Warn("CheckID called for Agent that is already unregistered (probably *not* a race?)", a.zapField()) - return context.Canceled - default: - } - - body := struct{}{} - id, _, err := doRequest[struct{}, api.AgentIdentification](a, timeout, http.MethodGet, "/id", &body) - - select { - case <-a.unregistered.Recv(): - return context.Canceled - default: - } - - if err != nil { - a.EnsureUnregistered(logger) - return err - } - - if id.AgentID != a.id { - a.EnsureUnregistered(logger) - return fmt.Errorf("Bad agent identification: expected %q but got %q", a.id, id.AgentID) - } - - return nil -} - -// Suspend signals to the Agent that it is not *currently* in use, sending a request to its /suspend -// endpoint -// -// If the Agent is unregistered before the call to Suspend() completes, the request will be -// cancelled and this method will return context.Canceled. -// -// If the request fails, the Agent will be unregistered. -func (a *Agent) Suspend(logger *zap.Logger, timeout time.Duration, handleError func(error)) { - // Quick unregistered check: - select { - case <-a.unregistered.Recv(): - logger.Warn("Suspend called for Agent that is already unregistered (probably *not* a race?)", a.zapField()) - handleError(context.Canceled) - return - default: - } - - body := api.SuspendAgent{ExpectedID: a.id} - id, _, err := doRequest[api.SuspendAgent, api.AgentIdentification]( - a, timeout, http.MethodPost, "/suspend", &body, - ) - - select { - case <-a.unregistered.Recv(): - handleError(context.Canceled) - return - default: - } - - if err != nil { - a.EnsureUnregistered(logger) - handleError(err) - return - } - - if id.AgentID != a.id { - a.EnsureUnregistered(logger) - handleError(fmt.Errorf("Bad agent identification: expected %q but got %q", a.id, id.AgentID)) - return - } - - a.suspended = false -} - -// Resume attempts to restore the Agent as the current one in use, sending a request to its /resume -// endpoint -// -// If the Agent is unregistered before the call to Resume() completes, the request will be cancelled -// and this method will return context.Canceled. -// -// If the request fails, the Agent will be unregistered. -func (a *Agent) Resume(logger *zap.Logger, timeout time.Duration) error { - // Quick unregistered check: - select { - case <-a.unregistered.Recv(): - logger.Warn("Resume called for Agent that is already unregistered (probably *not* a race?)", a.zapField()) - return context.Canceled - default: - } - - body := api.ResumeAgent{ExpectedID: a.id} - id, _, err := doRequest[api.ResumeAgent, api.AgentIdentification]( - a, timeout, http.MethodPost, "/resume", &body, - ) - - select { - case <-a.unregistered.Recv(): - return context.Canceled - default: - } - - if err != nil { - a.EnsureUnregistered(logger) - return err - } - - if id.AgentID != a.id { - a.EnsureUnregistered(logger) - return fmt.Errorf("Bad agent identification: expected %q but got %q", a.id, id.AgentID) - } - - return nil -} - -// SpawnRequestUpscale requests that the Agent increase the resource allocation to this VM -// -// This method blocks until the request is picked up by the message queue, and returns without -// waiting for the request to complete (it'll do that on its own). -// -// The timeout applies only once the request is in-flight. -// -// This method MUST NOT be called while holding a.parent.lock; if that happens, it may deadlock. -func (a *Agent) SpawnRequestUpscale(logger *zap.Logger, timeout time.Duration, handleError func(error)) { - // Quick unregistered check - select { - case <-a.unregistered.Recv(): - logger.Warn("RequestUpscale called for Agent that is already unregistered (probably *not* a race?)", a.zapField()) - handleError(context.Canceled) - return - default: - } - - sendDone, recvDone := util.NewSingleSignalPair() - - go func() { - // If we exit early, signal that we're done. - defer sendDone.Send() - - unsetWantsUpscale := func() { - // Unset s.wantsMemoryUpscale if the agent is still current. We want to allow further - // requests to try again. - a.parent.lock.Lock() - defer a.parent.lock.Unlock() - - if a.parent.current == a { - a.parent.wantsMemoryUpscale = false - } - } - - body := api.MoreResourcesRequest{ - MoreResources: api.MoreResources{Cpu: false, Memory: true}, - ExpectedID: a.id, - } - // Pass the signal sender into doRequestWithStartSignal so that the signalling on - // start-of-handling is done for us. - id, _, err := doRequestWithStartSignal[api.MoreResourcesRequest, api.AgentIdentification]( - a, timeout, &sendDone, http.MethodPost, "/try-upscale", &body, - ) - - select { - case <-a.unregistered.Recv(): - handleError(context.Canceled) - return - default: - } - - if err != nil { - unsetWantsUpscale() - a.EnsureUnregistered(logger) - handleError(err) - return - } - - if id.AgentID != a.id { - unsetWantsUpscale() - a.EnsureUnregistered(logger) - handleError(fmt.Errorf("Bad agent identification: expected %q but got %q", a.id, id.AgentID)) - return - } - }() - - <-recvDone.Recv() -} diff --git a/pkg/informant/cgroup.go b/pkg/informant/cgroup.go deleted file mode 100644 index 9b1d8fcde..000000000 --- a/pkg/informant/cgroup.go +++ /dev/null @@ -1,312 +0,0 @@ -package informant - -// Informant-specific usage and logic around cgroups, using CgroupManager. - -import ( - "fmt" - "sync" - "time" - - sysinfo "github.com/elastic/go-sysinfo" - sysinfotypes "github.com/elastic/go-sysinfo/types" - "go.uber.org/zap" - - "github.com/neondatabase/autoscaling/pkg/util" -) - -// CgroupState provides the high-level cgroup handling logic, building upon the low-level plumbing -// provided by CgroupManager. -type CgroupState struct { - // updateMemLimitsLock guards access to setting the cgroup's memory.high and memory.max - updateMemLimitsLock sync.Mutex - - mgr *CgroupManager - config CgroupConfig - - upscaleEventsSendr util.CondChannelSender - upscaleEventsRecvr util.CondChannelReceiver - - requestUpscale func(*zap.Logger) -} - -// CgroupConfig provides some configuration options for State cgroup handling -type CgroupConfig struct { - // OOMBufferBytes gives the target difference between the total memory reserved for the cgroup - // and the value of the cgroup's memory.high. - // - // In other words, memory.high + OOMBufferBytes will equal the total memory that the cgroup may - // use (equal to system memory, minus whatever's taken out for the file cache). - OOMBufferBytes uint64 - - // MemoryHighBufferBytes gives the amount of memory, in bytes, below a proposed new value for - // memory.high that the cgroup's memory usage must be for us to downscale - // - // In other words, we can downscale only when: - // - // memory.current + MemoryHighBufferBytes < (proposed) memory.high - // - // TODO: there's some minor issues with this approach -- in particular, that we might have - // memory in use by the kernel's page cache that we're actually ok with getting rid of. - MemoryHighBufferBytes uint64 - - // MaxUpscaleWaitMillis gives the maximum duration, in milliseconds, that we're allowed to pause - // the cgroup for while waiting for the autoscaler-agent to upscale us - MaxUpscaleWaitMillis uint - - // DoNotFreezeMoreOftenThanMillis gives a required minimum time, in milliseconds, that we must - // wait before re-freezing the cgroup while waiting for the autoscaler-agent to upscale us. - DoNotFreezeMoreOftenThanMillis uint - - // MemoryHighIncreaseByBytes gives the amount of memory, in bytes, that we should periodically - // increase memory.high by while waiting for the autoscaler-agent to upscale us. - // - // This exists to avoid the excessive throttling that happens when a cgroup is above its - // memory.high for too long. See more here: - // https://github.com/neondatabase/autoscaling/issues/44#issuecomment-1522487217 - MemoryHighIncreaseByBytes uint64 - - // MemoryHighIncreaseEveryMillis gives the period, in milliseconds, at which we should - // repeatedly increase the value of the cgroup's memory.high while we're waiting on upscaling - // and memory.high is still being hit. - // - // Technically speaking, this actually serves as a rate limit to moderate responding to - // memory.high events, but these are roughly equivalent if the process is still allocating - // memory. - MemoryHighIncreaseEveryMillis uint -} - -// ReceivedUpscale notifies s.upscaleEventsRecvr -// -// Typically, (*AgentSet).ReceivedUpscale() is also called alongside this method. -func (s *CgroupState) ReceivedUpscale() { - s.upscaleEventsSendr.Send() -} - -// mib is a helper function to format a quantity of bytes as a string -func mib(bytes uint64) string { - return fmt.Sprintf("%g MiB", float64(bytes)/float64(1<<20)) -} - -// setMemoryLimits updates the cgroup's value of memory.high and memory.max, according to the memory -// made available to the cgroup. -// -// This method MUST be called while holding s.updateMemLimitsLock. -func (s *CgroupState) setMemoryLimits(logger *zap.Logger, availableMemory uint64) error { - newMemHigh := s.config.calculateMemoryHighValue(availableMemory) - - logger.Info("Setting cgroup memory.high", - zap.String("availableMemory", mib(availableMemory)), - zap.String("target", mib(newMemHigh)), - ) - - s.mgr.MemoryHighEvent.Consume() - - memLimits := memoryLimits{ - highBytes: newMemHigh, - maxBytes: availableMemory, - } - if err := s.mgr.SetMemLimits(memLimits); err != nil { - return fmt.Errorf("Error setting cgroup %q memory limits: %w", s.mgr.name, err) - } - - logger.Info("Successfully set cgroup memory limits") - return nil -} - -// handleCgroupSignals is an internal function that handles "memory high" signals from the cgroup -func (s *CgroupState) handleCgroupSignalsLoop(logger *zap.Logger, config CgroupConfig) { - // FIXME: we should have "proper" error handling instead of just panicking. It's hard to - // determine what the correct behavior should be if a cgroup operation fails, though. - - waitingOnUpscale := false - - waitToIncreaseMemoryHigh := time.NewTimer(0) - waitToFreeze := time.NewTimer(0) - - // hey! Before reading this function, have a read through the fields of CgroupConfig - it'll be - // hard to understand the control flow that's going on here without that. - for { - // Wait for a new signal - select { - case err := <-s.mgr.ErrCh: - panic(fmt.Errorf("Error listening for cgroup signals: %w", err)) - case <-s.upscaleEventsRecvr.Recv(): - logger.Info("Received upscale event") - s.mgr.MemoryHighEvent.Consume() - - // note: Don't reset the timers. We still want to be precise about our rate limit, if - // upscale events are happening very frequently. - - case <-s.mgr.MemoryHighEvent.Recv(): - select { - case <-waitToFreeze.C: - var err error - - // Freeze the cgroup and request more memory (maybe duplicate - that'll be handled - // internally so we're not spamming the agent) - waitingOnUpscale, err = s.handleMemoryHighEvent(logger, config) - if err != nil { - panic(fmt.Errorf("Error handling memory high event: %w", err)) - } - waitToFreeze.Reset(time.Duration(config.DoNotFreezeMoreOftenThanMillis) * time.Millisecond) - default: - if !waitingOnUpscale { - logger.Info("Received memory.high event, but too soon to re-freeze. Requesting upscaling") - - // Too soon after the last freeze, but there's currently no unsatisfied - // upscaling requests. We should send a new one: - func() { - s.updateMemLimitsLock.Lock() - defer s.updateMemLimitsLock.Unlock() - - // Double-check we haven't already been upscaled (can happen if the agent - // independently decides to upscale us again) - select { - case <-s.upscaleEventsRecvr.Recv(): - logger.Info("No need to request upscaling because we were already upscaled") - return - default: - s.requestUpscale(logger) - } - }() - } else { - // Maybe increase memory.high to reduce throttling: - select { - case <-waitToIncreaseMemoryHigh.C: - logger.Info("Received memory.high event, too soon to re-freeze, but increasing memory.high") - - func() { - s.updateMemLimitsLock.Lock() - defer s.updateMemLimitsLock.Unlock() - - // Double-check we haven't already been upscaled (can happen if the - // agent independently decides to upscale us again) - select { - case <-s.upscaleEventsRecvr.Recv(): - logger.Info("No need to update memory.high because we were already upscaled") - return - default: - s.requestUpscale(logger) - } - - memHigh, err := s.mgr.FetchMemoryHighBytes() - if err != nil { - panic(fmt.Errorf("Error fetching memory.high: %w", err)) - } else if memHigh == nil { - panic(fmt.Errorf("memory.high is unset (equal to 'max') but should have been set to a value already")) - } - - newMemHigh := *memHigh + config.MemoryHighIncreaseByBytes - logger.Info( - "Updating memory.high", - zap.String("current", mib(*memHigh)), - zap.String("target", mib(newMemHigh)), - ) - - if err := s.mgr.SetMemHighBytes(newMemHigh); err != nil { - panic(fmt.Errorf("Error setting memory limits: %w", err)) - } - }() - - waitToIncreaseMemoryHigh.Reset(time.Duration(config.MemoryHighIncreaseEveryMillis) * time.Millisecond) - default: - // Can't do anything. - } - } - } - } - } -} - -// handleMemoryHighEvent performs the "freeze cgroup, request upscale, thaw cgroup" operation, in -// response to a "memory high" event for the cgroup -// -// This method waits on s.agents.UpscaleEvents(), so incorrect behavior will occur if it's called at -// the same time as anything else that waits on the upscale events. For that reason, both this -// function and s.setMemoryHigh() are dispatched from within s.handleCgroupSignalsLoop(). -func (s *CgroupState) handleMemoryHighEvent(logger *zap.Logger, config CgroupConfig) (waitingOnUpscale bool, _ error) { - locked := true - s.updateMemLimitsLock.Lock() - defer func() { - if locked { - s.updateMemLimitsLock.TryLock() - } - }() - - // If we've actually already received an upscale event, then we should ignore this memory.high - // event for the time being: - select { - case <-s.upscaleEventsRecvr.Recv(): - logger.Info("Skipping memory.high event because there was an upscale event") - return false, nil - default: - } - - logger.Info("Received memory high event. Freezing cgroup") - - // Immediately freeze the cgroup before doing anything else. - if err := s.mgr.Freeze(); err != nil { - return false, fmt.Errorf("Error freezing cgroup: %w", err) - } - - startTime := time.Now() - - // Start a timer for the maximum time we'll leave the cgroup frozen for: - maxWaitBeforeThaw := time.Millisecond * time.Duration(config.MaxUpscaleWaitMillis) - mustThaw := time.After(maxWaitBeforeThaw) - - logger.Info(fmt.Sprintf("Sending request for immediate upscaling, waiting for at most %s", maxWaitBeforeThaw)) - - s.requestUpscale(logger) - - // Unlock before waiting: - locked = false - s.updateMemLimitsLock.Unlock() - - var upscaled bool - - select { - case <-s.upscaleEventsRecvr.Recv(): - totalWait := time.Since(startTime) - logger.Info("Received notification that upscale occurred", zap.Duration("totalWait", totalWait)) - upscaled = true - case <-mustThaw: - totalWait := time.Since(startTime) - logger.Info("Timed out waiting for upscale", zap.Duration("totalWait", totalWait)) - } - - logger.Info("Thawing cgroup") - if err := s.mgr.Thaw(); err != nil { - return false, fmt.Errorf("Error thawing cgroup: %w", err) - } - - s.mgr.MemoryHighEvent.Consume() - - return !upscaled, nil -} - -// calculateMemoryHighValue calculates the new value for the cgroup's memory.high based on the total -// system memory. -func (c *CgroupConfig) calculateMemoryHighValue(totalSystemMem uint64) uint64 { - return util.SaturatingSub(totalSystemMem, c.OOMBufferBytes) -} - -// getCgroupCurrentMemory fetches the current total memory usgae of the cgroup, in bytes -func (s *CgroupState) getCurrentMemory() (uint64, error) { - return s.mgr.CurrentMemoryUsage() -} - -// getTotalSystemMemory fetches the system's total memory, in bytes -func getTotalSystemMemory() (*sysinfotypes.HostMemoryInfo, error) { - host, err := sysinfo.Host() - if err != nil { - return nil, fmt.Errorf("Error getting host info: %w", err) - } - - mem, err := host.Memory() - if err != nil { - return nil, fmt.Errorf("Error getting host memory info: %w", err) - } - - return mem, nil -} diff --git a/pkg/informant/cgroupmanager.go b/pkg/informant/cgroupmanager.go deleted file mode 100644 index 375d1fd80..000000000 --- a/pkg/informant/cgroupmanager.go +++ /dev/null @@ -1,331 +0,0 @@ -package informant - -// A lightweight wrapper around cgroup2.Manager, with a mix of convenience and extra functionality. - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "strconv" - "strings" - "sync/atomic" - "time" - - cgroups "github.com/containerd/cgroups/v3" - "github.com/containerd/cgroups/v3/cgroup2" - "go.uber.org/zap" - "go.uber.org/zap/zapcore" - - "github.com/neondatabase/autoscaling/pkg/util" -) - -type CgroupManager struct { - MemoryHighEvent util.CondChannelReceiver - ErrCh <-chan error - - name string - manager *cgroup2.Manager -} - -func NewCgroupManager(logger *zap.Logger, groupName string) (*CgroupManager, error) { - mode := cgroups.Mode() - if mode != cgroups.Unified && mode != cgroups.Hybrid { - var modeString string - switch mode { - case cgroups.Unavailable: - modeString = "Unavailable" - case cgroups.Legacy: - modeString = "cgroups v1 ONLY" - default: - panic(fmt.Errorf("unexpected cgroups mode value %d", mode)) - } - - return nil, fmt.Errorf("cgroups v2 are not enabled, mode = %q", modeString) - } - - // note: cgroup2.Load expects the cgroup "path" to start with '/', rooted at "/sys/fs/cgroup" - // - // The final path of the cgroup will be "/sys/fs/cgroup" + , where is what we give - // cgroup2.Load(). - manager, err := cgroup2.Load(fmt.Sprint("/", groupName)) - if err != nil { - return nil, fmt.Errorf("Error loading cgroup: %w", err) - } - sendEvent, recvEvent := util.NewCondChannelPair() - - highEventCount := &atomic.Uint64{} - errCh := make(chan error, 1) - - cgm := &CgroupManager{ - MemoryHighEvent: recvEvent, - ErrCh: errCh, - name: groupName, - manager: manager, - } - - // Long-running handler task for memory events - go func() { - // FIXME: make this configurable - minWaitDuration := time.Second - var minWait <-chan time.Time - - // Restart the event loop whenever it gets closed. - // - // This can happen, for instance, when the last task in the cgroup ends. - for { - if minWait != nil { - select { - case <-minWait: - default: - logger.Warn( - "Respecting minimum wait delay before restarting memory.events listener", - zap.Duration("delay", minWaitDuration), - ) - <-minWait - } - logger.Info("Restarting memory.events listener") - } - - minWait = time.After(minWaitDuration) - - // FIXME: There's currently no way to stop the goroutine spawned by EventChan, so it - // doesn't yet make sense to provide a way to cancel the goroutine to handle its events. - // Eventually, we should either patch containerd/cgroups or write our own implementation - // here. - memEvents, eventErrCh := manager.EventChan() - - select { - case event := <-memEvents: - // This is *kind of* on the hot path — we actually do want this to be pretty quick. - // So it makes reasonable sense to use zap.Object instead of zap.Any, event though - // there's some boilerplate required for it. - logger.Info("New memory.events", zap.Object("events", marshalMemoryEvents(event))) - highCount := event.High - oldHighCount := util.AtomicMax(highEventCount, highCount) - - if highCount > oldHighCount { - sendEvent.Send() - } - case err, ok := <-eventErrCh: - if err == nil && !ok { - errCh <- errors.New("Memory event channel closed without error") - } else { - errCh <- fmt.Errorf("Error while waiting for memory events: %w", err) - } - return - } - } - }() - - // Fetch the current "memory high" count - current, err := parseMemoryEvents(logger, groupName) - if err != nil { - return nil, fmt.Errorf("Error getting current memory events: %w", err) - } - - logger.Info("Initial memory.events", zap.Object("events", marshalMemoryEvents(*current))) - - util.AtomicMax(highEventCount, current.High) - recvEvent.Consume() // Clear events - - return cgm, nil -} - -func marshalMemoryEvents(events cgroup2.Event) zapcore.ObjectMarshalerFunc { - return zapcore.ObjectMarshalerFunc(func(enc zapcore.ObjectEncoder) error { - // NB: we're using lower snake-case names that are present in the actual - // memory.events file, instead of the field names from cgroup2.Event - enc.AddUint64("low", events.Low) - enc.AddUint64("high", events.High) - enc.AddUint64("max", events.Max) - enc.AddUint64("oom", events.OOM) - enc.AddUint64("oom_kill", events.OOMKill) - return nil - }) -} - -// TODO: no way to do this with github.com/containerd/cgroups ? Seems like that should be -// exposed to the user... We *can* just parse it directly, but it's a bit annoying. -func parseMemoryEvents(logger *zap.Logger, groupName string) (*cgroup2.Event, error) { - path := cgroupPath(groupName, "memory.events") - content, err := os.ReadFile(path) - if err != nil { - return nil, fmt.Errorf("Error reading file at %q: %w", path, err) - } - - // note: When we read the memory.events file, it tends to look something like: - // - // low 1 - // high 5 - // max 3 - // oom 1 - // oom_kill 0 - // - // (numbers are made up) - // - // This map represents the field names we know about. Newer versions of the Linux kernel *might* - // add new fields, but that'll probably happen slowly, so we emit warnings only when the field - // name isn't recognized. For each entry in the map: v is the value of the field, set is true if - // we've already parsed the value, and required is true if we need the value in order to build a - // cgroup2.Event. - valueMap := map[string]struct { - v uint64 - set bool - required bool - }{ - "low": {0, false, true}, - "high": {0, false, true}, - "max": {0, false, true}, - "oom": {0, false, true}, - "oom_kill": {0, false, true}, - "oom_group_kill": {0, false, false}, // Added in 5.17 - } - - lines := strings.Split(strings.TrimSpace(string(content)), "\n") - for i, line := range lines { - fields := strings.Fields(line) - if len(fields) != 2 { - return nil, fmt.Errorf( - "Line %d of %q is not expected format: has %d fields", i, path, len(fields), - ) - } - - name := fields[0] - value, err := strconv.ParseUint(fields[1], 10, 64) - if err != nil { - return nil, fmt.Errorf( - "Error parsing field on line %d of %q as integer: %w", i, path, err, - ) - } - - pair, ok := valueMap[name] - if !ok { - logger.Warn("Unrecognized memory.events field (is the kernel new?)", zap.String("field", name)) - continue - } else if pair.set { - return nil, fmt.Errorf("Duplicate field %q", name) - } - pair.v = value - pair.set = true - valueMap[name] = pair - } - - var unset []string - - // Check if there's any unset fields - for name, pair := range valueMap { - if !pair.set && pair.required { - unset = append(unset, name) - } - } - - if len(unset) != 0 { - return nil, fmt.Errorf("Some required fields not provided: %+v", unset) - } - - return &cgroup2.Event{ - Low: valueMap["low"].v, - High: valueMap["high"].v, - Max: valueMap["max"].v, - OOM: valueMap["oom"].v, - OOMKill: valueMap["oom_kill"].v, - }, nil -} - -// TODO: Open a PR in github.com/containerd/cgroups to expose this publicly. This function is -// *basically* just copied from there. -func fetchState(groupName string) (cgroup2.State, error) { - path := cgroupPath(groupName, "cgroup.freeze") - content, err := os.ReadFile(path) - if err != nil { - return cgroup2.Unknown, fmt.Errorf("Error reading file at %q: %w", path, err) - } - switch strings.TrimSpace(string(content)) { - case "1": - return cgroup2.Frozen, nil - case "0": - return cgroup2.Thawed, nil - default: - return cgroup2.Unknown, errors.New("Unexpected file content") - } -} - -// TODO: not great that we're implementing this function ourselves. It's required for fetchState and -// parseMemoryEvents, which we'd also like to get rid of. -func cgroupPath(groupName string, file string) string { - // note: it's ok to use slashes, because this can only run on linux anyways. - return filepath.Join("/sys/fs/cgroup", groupName, file) //nolint:gocritic // see comment above. -} - -type memoryLimits struct { - highBytes uint64 - maxBytes uint64 -} - -// SetMemLimits sets the cgroup's memory.high and memory.max to the values provided by the -// memoryLimits. -func (c *CgroupManager) SetMemLimits(limits memoryLimits) error { - // convert uint64 -> int64 so we can produce pointers - hb := int64(limits.highBytes) - mb := int64(limits.maxBytes) - return c.manager.Update(&cgroup2.Resources{ - Memory: &cgroup2.Memory{High: &hb, Max: &mb}, - }) -} - -func (c *CgroupManager) SetMemHighBytes(bytes uint64) error { - high := int64(bytes) - return c.manager.Update(&cgroup2.Resources{ - Memory: &cgroup2.Memory{ - High: &high, - }, - }) -} - -func (c *CgroupManager) FetchMemoryHighBytes() (*uint64, error) { - path := cgroupPath(c.name, "memory.high") - content, err := os.ReadFile(path) - if err != nil { - return nil, fmt.Errorf("Error reading file at %q: %w", path, err) - } - - stringContent := strings.TrimSpace(string(content)) - if stringContent == "max" { - return nil, nil - } - - amount, err := strconv.ParseUint(stringContent, 10, 64) - if err != nil { - return nil, fmt.Errorf("Error parsing as uint64: %w", err) - } - return &amount, nil -} - -// FetchState returns a cgroup2.State indicating whether the cgroup is currently frozen -func (c *CgroupManager) FetchState() (cgroup2.State, error) { - return fetchState(c.name) -} - -// CurrentMemoryUsage returns the value at memory.current -- the cgroup's current memory usage. -func (c *CgroupManager) CurrentMemoryUsage() (uint64, error) { - path := cgroupPath(c.name, "memory.current") - content, err := os.ReadFile(path) - if err != nil { - return 0, fmt.Errorf("Error reading file at %q: %w", path, err) - } - - amount, err := strconv.ParseUint(strings.TrimSpace(string(content)), 10, 64) - if err != nil { - return 0, fmt.Errorf("Error parsing as uint64: %w", err) - } - return amount, nil -} - -func (c *CgroupManager) Freeze() error { - return c.manager.Freeze() -} - -func (c *CgroupManager) Thaw() error { - return c.manager.Thaw() -} diff --git a/pkg/informant/consts.go b/pkg/informant/consts.go deleted file mode 100644 index 76045d232..000000000 --- a/pkg/informant/consts.go +++ /dev/null @@ -1,48 +0,0 @@ -package informant - -// Assorted constants that aren't worth having a configuration file for - -import ( - "time" -) - -const ( - PrometheusPort uint16 = 9100 - - CheckDeadlockDelay time.Duration = 1 * time.Second - CheckDeadlockTimeout time.Duration = 250 * time.Millisecond - - AgentBackgroundCheckDelay time.Duration = 10 * time.Second - AgentBackgroundCheckTimeout time.Duration = 250 * time.Millisecond - - AgentResumeTimeout time.Duration = 100 * time.Millisecond - AgentSuspendTimeout time.Duration = 5 * time.Second // may take a while; it /suspend intentionally waits - AgentUpscaleTimeout time.Duration = 400 * time.Millisecond // does not include waiting for /upscale response -) - -var ( - // DefaultStateConfig is the default state passed to NewState - DefaultStateConfig StateConfig = StateConfig{ - SysBufferBytes: 100 * (1 << 20), // 100 MiB - } - - // DefaultCgroupConfig is the default CgroupConfig used for cgroup interaction logic - DefaultCgroupConfig CgroupConfig = CgroupConfig{ - OOMBufferBytes: 100 * (1 << 20), // 100 MiB - MemoryHighBufferBytes: 100 * (1 << 20), // 100 MiB - // while waiting for upscale, don't freeze for more than 20ms every 1s - MaxUpscaleWaitMillis: 20, // 20ms - DoNotFreezeMoreOftenThanMillis: 1000, // 1s - // while waiting for upscale, increase memory.high by 10 MiB every 25ms - MemoryHighIncreaseByBytes: 10 * (1 << 20), // 10 MiB - MemoryHighIncreaseEveryMillis: 25, // 25ms - } - - // DefaultFileCacheConfig is the default FileCacheConfig used for managing the file cache - DefaultFileCacheConfig FileCacheConfig = FileCacheConfig{ - InMemory: true, - ResourceMultiplier: 0.75, // 75% - MinRemainingAfterCache: 640 * (1 << 20), // 640 MiB ; 640 = 512 + 128 - SpreadFactor: 0.1, // ensure any increase in file cache size is split 90-10 with 10% to other memory - } -) diff --git a/pkg/informant/endpoints.go b/pkg/informant/endpoints.go deleted file mode 100644 index a0ddd6ffd..000000000 --- a/pkg/informant/endpoints.go +++ /dev/null @@ -1,493 +0,0 @@ -package informant - -// This file contains the high-level handlers for various HTTP endpoints - -import ( - "context" - "errors" - "fmt" - "strings" - "sync" - "time" - - "go.uber.org/zap" - - "github.com/neondatabase/autoscaling/pkg/api" - "github.com/neondatabase/autoscaling/pkg/util" -) - -// State is the global state of the informant -type State struct { - config StateConfig - agents *AgentSet - cgroup *CgroupState - fileCache *FileCacheState - - // memReservedForFileCache stores the amount of memory that's currently reserved for the file - // cache. - // - // This field is mostly used during initialization, where it allows us to pass state from the - // file cache's startup hook to the cgroup's hook. - // - // There's definitely better ways of doing this, but the solution we have will work for now. - memReservedForFileCache uint64 -} - -type StateConfig struct { - // SysBufferBytes gives the estimated amount of memory, in bytes, that the kernel uses before - // handing out the rest to userspace. This value is the estimated difference between the - // *actual* physical memory and the amount reported by `grep MemTotal /proc/meminfo`. - // - // For more information, refer to `man 5 proc`, which defines MemTotal as "Total usable RAM - // (i.e., physical RAM minus a few reserved bits and the kernel binary code)". - // - // We only use SysBufferBytes when calculating the system memory from the *external* memory - // size, rather than the self-reported memory size, according to the kernel. - // - // TODO: this field is only necessary while we still have to trust the autoscaler-agent's - // upscale resource amounts (because we might not *actually* have been upscaled yet). This field - // should be removed once we have a better solution there. - SysBufferBytes uint64 -} - -// NewStateOpts are individual options provided to NewState -type NewStateOpts struct { - kind newStateOptKind - setFields func(*State) - post func(_ *zap.Logger, s *State, memTotal uint64) error -} - -type newStateOptKind int - -const ( - optCgroup newStateOptKind = iota - optFileCache -) - -// NewState instantiates a new State object, starting whatever background processes might be -// required -// -// Optional configuration may be provided by NewStateOpts - see WithCgroup and -// WithPostgresFileCache. -func NewState(logger *zap.Logger, agents *AgentSet, config StateConfig, opts ...NewStateOpts) (*State, error) { - if config.SysBufferBytes == 0 { - panic("invalid StateConfig: SysBufferBytes cannot be zero") - } - - s := &State{ - config: config, - agents: agents, - cgroup: nil, - fileCache: nil, - memReservedForFileCache: 0, - } - for _, opt := range opts { - opt.setFields(s) - } - - memInfo, err := getTotalSystemMemory() - if err != nil { - return nil, fmt.Errorf("Error getting system meminfo: %w", err) - } - - // We need to process file cache initialization before cgroup initialization, so that the memory - // allocated to the file cache is appropriately taken into account when we decide the cgroup's - // memory limits. - // - // TODO: this should be made cleaner, but it's mostly ok when there's only two options. - for _, kind := range []newStateOptKind{optFileCache, optCgroup} { - for _, opt := range opts { - if opt.kind == kind { - if err := opt.post(logger, s, memInfo.Total); err != nil { - return nil, err - } - } - } - } - - return s, nil -} - -// WithCgroup creates a NewStateOpts that sets its CgroupHandler -// -// This function will panic if the provided CgroupConfig is invalid. -func WithCgroup(cgm *CgroupManager, config CgroupConfig) NewStateOpts { - if config.OOMBufferBytes == 0 { - panic("invalid CgroupConfig: OOMBufferBytes == 0") - } else if config.MaxUpscaleWaitMillis == 0 { - panic("invalid CgroupConfig: MaxUpscaleWaitMillis == 0") - } - - return NewStateOpts{ - kind: optCgroup, - setFields: func(s *State) { - if s.cgroup != nil { - panic("WithCgroupHandler option provided more than once") - } - - upscaleEventsSendr, upscaleEventsRecvr := util.NewCondChannelPair() - s.cgroup = &CgroupState{ - updateMemLimitsLock: sync.Mutex{}, - mgr: cgm, - config: config, - upscaleEventsSendr: upscaleEventsSendr, - upscaleEventsRecvr: upscaleEventsRecvr, - requestUpscale: func(l *zap.Logger) { s.agents.RequestUpscale(l) }, - } - }, - post: func(logger *zap.Logger, s *State, memTotal uint64) error { - logger = logger.With(zap.String("cgroup", s.cgroup.mgr.name)) - - available := memTotal - s.memReservedForFileCache - - // FIXME: This is technically racy across restarts. The sequence would be: - // 1. Respond "ok" to a downscale request - // 2. Restart - // 3. Read system memory - // 4. Get downscaled (as approved earlier) - // A potential way to fix this would be writing to a file to record approved downscale - // operations. - if err := s.cgroup.setMemoryLimits(logger, available); err != nil { - return fmt.Errorf("Error setting initial cgroup memory limits: %w", err) - } - go s.cgroup.handleCgroupSignalsLoop(logger.Named("signal-handler"), config) - return nil - }, - } -} - -// WithPostgresFileCache creates a NewStateOpts that enables connections to the postgres file cache -func WithPostgresFileCache(connStr string, config FileCacheConfig) NewStateOpts { - if err := config.Validate(); err != nil { - panic(fmt.Errorf("invalid FileCacheConfig: %w", err)) - } - - return NewStateOpts{ - kind: optFileCache, - setFields: func(s *State) { - if s.fileCache != nil { - panic("WithPostgresFileCache option provided more than once") - } - - s.fileCache = &FileCacheState{ - connStr: connStr, - config: config, - } - }, - post: func(logger *zap.Logger, s *State, memTotal uint64) error { - if !config.InMemory { - panic("file cache not in-memory unimplemented") - } - - // FIXME: make the timeout configurable - ctx, cancel := context.WithTimeout(context.TODO(), time.Second) - defer cancel() - - // Check that we have permissions to set the file cache's size. - size, err := s.fileCache.GetFileCacheSize(ctx) - if err != nil { - return fmt.Errorf("Error getting file cache size: %w", err) - } - - newSize := s.fileCache.config.CalculateCacheSize(memTotal) - logger.Info("Setting initial file cache size", zap.String("current", mib(size)), zap.String("target", mib(newSize))) - - // note: Even if newSize == size, we want to explicitly set it *anwyays*, just to verify - // that we have the necessary permissions to do so. - - actualSize, err := s.fileCache.SetFileCacheSize(ctx, logger, newSize) - if err != nil { - return fmt.Errorf("Error setting file cache size: %w", err) - } - s.memReservedForFileCache = actualSize - - return nil - }, - } -} - -// RegisterAgent registers a new or updated autoscaler-agent -// -// Returns: body (if successful), status code, error (if unsuccessful) -func (s *State) RegisterAgent(ctx context.Context, logger *zap.Logger, info *api.AgentDesc) (*api.InformantDesc, int, error) { - logger = logger.With(agentZapField(info.AgentID, info.ServerAddr)) - - protoVersion, status, err := s.agents.RegisterNewAgent(logger, info) - if err != nil { - return nil, status, err - } - - desc := api.InformantDesc{ - ProtoVersion: protoVersion, - MetricsMethod: api.InformantMetricsMethod{ - Prometheus: &api.MetricsMethodPrometheus{Port: PrometheusPort}, - }, - } - - return &desc, 200, nil -} - -// HealthCheck is a dummy endpoint that allows the autoscaler-agent to check that (a) the informant -// is up and running, and (b) the agent is still registered. -// -// Returns: body (if successful), status code, error (if unsuccessful) -func (s *State) HealthCheck(ctx context.Context, logger *zap.Logger, info *api.AgentIdentification) (*api.InformantHealthCheckResp, int, error) { - agent, ok := s.agents.Get(info.AgentID) - if !ok { - return nil, 404, fmt.Errorf("No Agent with ID %s registered", info.AgentID) - } else if !agent.protoVersion.AllowsHealthCheck() { - return nil, 400, fmt.Errorf("health checks are not supported in protocol version %v", agent.protoVersion) - } - - return &api.InformantHealthCheckResp{}, 200, nil -} - -// TryDownscale tries to downscale the VM's current resource usage, returning whether the proposed -// amount is ok -// -// Returns: body (if successful), status code and error (if unsuccessful) -func (s *State) TryDownscale(ctx context.Context, logger *zap.Logger, target *api.AgentResourceMessage) (*api.DownscaleResult, int, error) { - currentId := s.agents.CurrentIdStr() - incomingId := target.Data.Id.AgentID.String() - - // First verify agent's authenticity before doing anything. - // Note: if the current agent is nil, its id string will be "", which - // does not match any valid UUID - if incomingId != currentId { - return nil, 400, fmt.Errorf("Agent ID %s is not the active Agent", incomingId) - } - - // Helper functions for abbreviating returns. - resultFromStatus := func(ok bool, status string) (*api.DownscaleResult, int, error) { - return &api.DownscaleResult{Ok: ok, Status: status}, 200, nil - } - internalError := func(err error) (*api.DownscaleResult, int, error) { - logger.Error("Internal error handling downscale request", zap.Error(err)) - return nil, 500, errors.New("Internal error") - } - - // If we aren't interacting with something that should be adjusted, then we don't need to do anything. - if s.cgroup == nil && s.fileCache == nil { - logger.Info("No action needed for downscale (no cgroup or file cache enabled)") - return resultFromStatus(true, "No action taken (no cgroup or file cache enabled)") - } - - requestedMem := uint64(target.Data.Memory.Value()) - usableSystemMemory := util.SaturatingSub(requestedMem, s.config.SysBufferBytes) - - // Get the file cache's expected contribution to the memory usage - var expectedFileCacheMemUsage uint64 - if s.fileCache != nil && s.fileCache.config.InMemory { - expectedFileCacheMemUsage = s.fileCache.config.CalculateCacheSize(usableSystemMemory) - } - - mib := float64(1 << 20) // 1 MiB = 2^20 bytes. We'll use this for pretty-printing. - - // Check whether this downscaling would be ok for the cgroup. - // - // Also, lock changing the cgroup between the initial calculations and later using them. - var newCgroupMemHigh uint64 - if s.cgroup != nil { - s.cgroup.updateMemLimitsLock.Lock() - defer s.cgroup.updateMemLimitsLock.Unlock() - - newCgroupMemHigh = s.cgroup.config.calculateMemoryHighValue(usableSystemMemory - expectedFileCacheMemUsage) - - current, err := s.cgroup.getCurrentMemory() - if err != nil { - return internalError(fmt.Errorf("Error fetching getting cgroup memory: %w", err)) - } - - // For an explanation, refer to the documentation of CgroupConfig.MemoryHighBufferBytes - // - // TODO: this should be a method on (*CgroupConfig). - if newCgroupMemHigh < current+s.cgroup.config.MemoryHighBufferBytes { - verdict := "Calculated memory.high too low" - status := fmt.Sprintf( - "%s: %g MiB (new high) < %g MiB (current usage) + %g MiB (buffer)", - verdict, - float64(newCgroupMemHigh)/mib, float64(current)/mib, - float64(s.cgroup.config.MemoryHighBufferBytes)/mib, - ) - - return resultFromStatus(false, status) - } - } - - var statusParts []string - - var fileCacheMemUsage uint64 - - // The downscaling has been approved. Downscale the file cache, then the cgroup. - if s.fileCache != nil && s.fileCache.config.InMemory { - if !s.fileCache.config.InMemory { - panic("file cache not in-memory unimplemented") - } - - // FIXME: make the timeout configurablek - dbCtx, cancel := context.WithTimeout(ctx, time.Second) // for talking to the DB - defer cancel() - - actualUsage, err := s.fileCache.SetFileCacheSize(dbCtx, logger, expectedFileCacheMemUsage) - if err != nil { - return internalError(fmt.Errorf("Error setting file cache size: %w", err)) - } - - fileCacheMemUsage = actualUsage - status := fmt.Sprintf("Set file cache size to %g MiB", float64(actualUsage)/mib) - statusParts = append(statusParts, status) - } - - if s.cgroup != nil { - availableMemory := usableSystemMemory - fileCacheMemUsage - - if fileCacheMemUsage != expectedFileCacheMemUsage { - newCgroupMemHigh = s.cgroup.config.calculateMemoryHighValue(availableMemory) - } - - memLimits := memoryLimits{ - highBytes: newCgroupMemHigh, - maxBytes: availableMemory, - } - - // TODO: see similar note above. We shouldn't call methods on s.cgroup.mgr from here. - if err := s.cgroup.mgr.SetMemLimits(memLimits); err != nil { - return internalError(fmt.Errorf("Error setting cgroup memory.high: %w", err)) - } - - status := fmt.Sprintf( - "Set cgroup memory.high to %g MiB, of new max %g MiB", - float64(newCgroupMemHigh)/mib, float64(availableMemory)/mib, - ) - statusParts = append(statusParts, status) - } - - return resultFromStatus(true, strings.Join(statusParts, "; ")) -} - -// NotifyUpscale signals that the VM's resource usage has been increased to the new amount -// -// Returns: body (if successful), status code and error (if unsuccessful) -func (s *State) NotifyUpscale( - ctx context.Context, - logger *zap.Logger, - newResources *api.AgentResourceMessage, -) (*struct{}, int, error) { - // FIXME: we shouldn't just trust what the agent says - // - // Because of race conditions like in , - // it's possible for us to receive a notification on /upscale *before* NeonVM actually adds the - // memory. - // - // So until the race condition described in #23 is fixed, we have to just trust that the agent - // is telling the truth, *especially because it might not be*. - - currentId := s.agents.CurrentIdStr() - incomingId := newResources.Data.Id.AgentID.String() - - // First verify agent's authenticity before doing anything. - // Note: if the current agent is nil, its id string will be "", which - // does not match any valid UUID - if incomingId != currentId { - return nil, 400, fmt.Errorf("Agent ID %s is not the active Agent", incomingId) - } - - // Helper function for abbreviating returns. - internalError := func(err error) (*struct{}, int, error) { - logger.Error("Error handling upscale request", zap.Error(err)) - return nil, 500, errors.New("Internal error") - } - - if s.cgroup == nil && s.fileCache == nil { - logger.Info("No action needed for upscale (no cgroup or file cache enabled)") - return &struct{}{}, 200, nil - } - - newMem := uint64(newResources.Data.Memory.Value()) - usableSystemMemory := util.SaturatingSub(newMem, s.config.SysBufferBytes) - - if s.cgroup != nil { - s.cgroup.updateMemLimitsLock.Lock() - defer s.cgroup.updateMemLimitsLock.Unlock() - } - - s.agents.ReceivedUpscale() - - // Get the file cache's expected contribution to the memory usage - var fileCacheMemUsage uint64 - if s.fileCache != nil { - logger := logger.With(zap.String("fileCacheConnstr", s.fileCache.connStr)) - - if !s.fileCache.config.InMemory { - panic("file cache not in-memory unimplemented") - } - - // FIXME: make the timeout configurable - dbCtx, cancel := context.WithTimeout(ctx, time.Second) // for talking to the DB - defer cancel() - - // Update the size of the file cache - expectedUsage := s.fileCache.config.CalculateCacheSize(usableSystemMemory) - - logger.Info("Updating file cache size", zap.String("target", mib(expectedUsage)), zap.String("totalMemory", mib(newMem))) - - actualUsage, err := s.fileCache.SetFileCacheSize(dbCtx, logger, expectedUsage) - if err != nil { - return internalError(fmt.Errorf("Error setting file cache size: %w", err)) - } - - if actualUsage != expectedUsage { - logger.Warn( - "File cache size was set to a different value than we wanted", - zap.String("target", mib(expectedUsage)), - zap.String("actual", mib(actualUsage)), - ) - } - - fileCacheMemUsage = actualUsage - } - - if s.cgroup != nil { - logger := logger.With(zap.String("cgroup", s.cgroup.mgr.name)) - - availableMemory := usableSystemMemory - fileCacheMemUsage - - newMemHigh := s.cgroup.config.calculateMemoryHighValue(availableMemory) - logger.Info("Updating cgroup memory.high", zap.String("target", mib(newMemHigh)), zap.String("totalMemory", mib(newMem))) - - memLimits := memoryLimits{ - highBytes: newMemHigh, - maxBytes: availableMemory, - } - - if err := s.cgroup.mgr.SetMemLimits(memLimits); err != nil { - return internalError(fmt.Errorf("Error setting cgroup memory.high: %w", err)) - } - - s.cgroup.upscaleEventsSendr.Send() - } - - return &struct{}{}, 200, nil -} - -// UnregisterAgent unregisters the autoscaler-agent given by info, if it is currently registered -// -// If a different autoscaler-agent is currently registered, this method will do nothing. -// -// Returns: body (if successful), status code and error (if unsuccessful) -func (s *State) UnregisterAgent(ctx context.Context, logger *zap.Logger, info *api.AgentDesc) (*api.UnregisterAgent, int, error) { - agent, ok := s.agents.Get(info.AgentID) - if !ok { - return nil, 404, fmt.Errorf("No agent with ID %q", info.AgentID) - } else if agent.serverAddr != info.ServerAddr { - // On our side, log the address we're expecting, but don't give that to the client - logger.Warn(fmt.Sprintf( - "Agent serverAddr is incorrect, got %q but expected %q", - info.ServerAddr, agent.serverAddr, - )) - return nil, 400, fmt.Errorf("Agent serverAddr is incorrect, got %q", info.ServerAddr) - } - - wasActive := agent.EnsureUnregistered(logger) - return &api.UnregisterAgent{WasActive: wasActive}, 200, nil -} diff --git a/pkg/informant/filecache.go b/pkg/informant/filecache.go deleted file mode 100644 index 7653d2eb1..000000000 --- a/pkg/informant/filecache.go +++ /dev/null @@ -1,187 +0,0 @@ -package informant - -// Integration with Neon's postgres local file cache - -import ( - "context" - "database/sql" - "fmt" - - _ "github.com/lib/pq" - "go.uber.org/zap" - - "github.com/neondatabase/autoscaling/pkg/util" -) - -type FileCacheState struct { - connStr string - config FileCacheConfig -} - -type FileCacheConfig struct { - // InMemory indicates whether the file cache is *actually* stored in memory (e.g. by writing to - // a tmpfs or shmem file). If true, the size of the file cache will be counted against the - // memory available for the cgroup. - InMemory bool - - // ResourceMultiplier gives the size of the file cache, in terms of the size of the resource it - // consumes (currently: only memory) - // - // For example, setting ResourceMultiplier = 0.75 gives the cache a target size of 75% of total - // resources. - // - // This value must be strictly between 0 and 1. - ResourceMultiplier float64 - - // MinRemainingAfterCache gives the required minimum amount of memory, in bytes, that must - // remain available after subtracting the file cache. - // - // This value must be non-zero. - MinRemainingAfterCache uint64 - - // SpreadFactor controls the rate of increase in the file cache's size as it grows from zero - // (when total resources equals MinRemainingAfterCache) to the desired size based on - // ResourceMultiplier. - // - // A SpreadFactor of zero means that all additional resources will go to the cache until it - // reaches the desired size. Setting SpreadFactor to N roughly means "for every 1 byte added to - // the cache's size, N bytes are reserved for the rest of the system, until the cache gets to - // its desired size". - // - // This value must be >= 0, and must retain an increase that is more than what would be given by - // ResourceMultiplier. For example, setting ResourceMultiplier = 0.75 but SpreadFactor = 1 would - // be invalid, because SpreadFactor would induce only 50% usage - never reaching the 75% as - // desired by ResourceMultiplier. - // - // SpreadFactor is too large if (SpreadFactor+1) * ResourceMultiplier is >= 1. - SpreadFactor float64 -} - -func (c *FileCacheConfig) Validate() error { - // Check single-field validity - if !(0.0 < c.ResourceMultiplier && c.ResourceMultiplier < 1.0) { - return fmt.Errorf("ResourceMultiplier must be between 0.0 and 1.0, exclusive. Got %g", c.ResourceMultiplier) - } else if !(c.SpreadFactor >= 0.0) { - return fmt.Errorf("SpreadFactor must be >= 0, got: %g", c.SpreadFactor) - } else if c.MinRemainingAfterCache == 0 { - return fmt.Errorf("MinRemainingAfterCache must not be 0") - } - - // Check that ResourceMultiplier and SpreadFactor are valid w.r.t. each other. - // - // As shown in CalculateCacheSize, we have two lines resulting from ResourceMultiplier and - // SpreadFactor, respectively. They are: - // - // total MinRemainingAfterCache - // size = —————————————————— - ———————————————————————— - // SpreadFactor + 1 SpreadFactor + 1 - // - // and - // - // size = ResourceMultiplier × total - // - // .. where 'total' is the total resources. These are isomorphic to the typical 'y = mx + b' - // form, with y = "size" and x = "total". - // - // These lines intersect at: - // - // MinRemainingAfterCache - // ————————————————————————————————————————————— - // 1 - ResourceMultiplier × (SpreadFactor + 1) - // - // We want to ensure that this value (a) exists, and (b) is >= MinRemainingAfterCache. This is - // guaranteed when 'ResourceMultiplier × (SpreadFactor + 1)' is less than 1. - // (We also need it to be >= 0, but that's already guaranteed.) - - intersectFactor := c.ResourceMultiplier * (c.SpreadFactor + 1) - if !(intersectFactor < 1.0) { - return fmt.Errorf("incompatible ResourceMultiplier and SpreadFactor") - } - - return nil -} - -// CalculateCacheSize returns the desired size of the cache, given the total memory. -func (c *FileCacheConfig) CalculateCacheSize(total uint64) uint64 { - available := util.SaturatingSub(total, c.MinRemainingAfterCache) - - if available == 0 { - return 0 - } - - sizeFromSpread := uint64(util.Max(0, int64(float64(available)/(1.0+c.SpreadFactor)))) - // ^^^^^^^^^^^^^^^^^^^^^^^^ make sure we don't overflow from floating-point ops - sizeFromNormal := uint64(float64(total) * c.ResourceMultiplier) - - byteSize := util.Min(sizeFromSpread, sizeFromNormal) - var mib uint64 = 1 << 20 // 1 MiB = 1^20 bytes. - - // The file cache operates in units of mebibytes, so the sizes we produce should be rounded to a - // mebibyte. We round down to be conservative. - return byteSize / mib * mib -} - -// GetFileCacheSize returns the current size of the file cache, in bytes -func (s *FileCacheState) GetFileCacheSize(ctx context.Context) (uint64, error) { - db, err := sql.Open("postgres", s.connStr) - if err != nil { - return 0, fmt.Errorf("Error connecting to postgres: %w", err) - } - defer db.Close() - - // The file cache GUC variable is in MiB, but the conversion with pg_size_bytes means that the - // end result we get is in bytes. - var sizeInBytes uint64 - if err := db.QueryRowContext(ctx, `SELECT pg_size_bytes(current_setting('neon.file_cache_size_limit'));`).Scan(&sizeInBytes); err != nil { - return 0, fmt.Errorf("Error querying file cache size: %w", err) - } - - return sizeInBytes, nil -} - -// SetFileCacheSize sets the size of the file cache, returning the actual size it was set to -func (s *FileCacheState) SetFileCacheSize(ctx context.Context, logger *zap.Logger, sizeInBytes uint64) (uint64, error) { - db, err := sql.Open("postgres", s.connStr) - if err != nil { - return 0, fmt.Errorf("Error connecting to postgres: %w", err) - } - defer db.Close() - - logger.Info("Fetching maximum file cache size") - - var maxSizeInBytes uint64 - err = db.QueryRowContext(ctx, `SELECT pg_size_bytes(current_setting('neon.max_file_cache_size'));`). - Scan(&maxSizeInBytes) - if err != nil { - return 0, fmt.Errorf("Error querying max file cache size: %w", err) - } - - var maybeCapped string - if sizeInBytes > maxSizeInBytes { - sizeInBytes = maxSizeInBytes - maybeCapped = " (capped by maximum size)" - } - - logger.Info( - fmt.Sprintf("Updating file cache size %s", maybeCapped), - zap.String("size", mib(sizeInBytes)), - zap.String("max", mib(maxSizeInBytes)), - ) - - // note: even though the normal ways to get the cache size produce values with trailing "MB" - // (hence why we call pg_size_bytes in GetFileCacheSize's query), the format it expects to set - // the value is "integer number of MB" without trailing units. For some reason, this *really* - // wasn't working with normal arguments, so that's why we're constructing the query here. - sizeInMB := sizeInBytes / (1 << 20) - setQuery := fmt.Sprintf(`ALTER SYSTEM SET neon.file_cache_size_limit = %d;`, sizeInMB) - if _, err := db.ExecContext(ctx, setQuery); err != nil { - return 0, fmt.Errorf("Error changing cache setting: %w", err) - } - - // must use pg_reload_conf to have the settings change take effect - if _, err := db.ExecContext(ctx, `SELECT pg_reload_conf();`); err != nil { - return 0, fmt.Errorf("Error reloading config: %w", err) - } - - return sizeInMB * (1 << 20), nil -} diff --git a/pkg/plugin/config.go b/pkg/plugin/config.go index 40f3ff093..79a349758 100644 --- a/pkg/plugin/config.go +++ b/pkg/plugin/config.go @@ -7,6 +7,8 @@ import ( "math" "os" + "golang.org/x/exp/slices" + "k8s.io/apimachinery/pkg/api/resource" vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" @@ -39,12 +41,39 @@ type Config struct { // version handled. SchedulerName string `json:"schedulerName"` + // RandomizeScores, if true, will cause the scheduler to score a node with a random number in + // the range [minScore + 1, trueScore], instead of the trueScore + RandomizeScores bool `json:"randomizeScores"` + + // MigrationDeletionRetrySeconds gives the duration, in seconds, we should wait between retrying + // a failed attempt to delete a VirtualMachineMigration that's finished. + MigrationDeletionRetrySeconds uint `json:"migrationDeletionRetrySeconds"` + // DoMigration, if provided, allows VM migration to be disabled // // This flag is intended to be temporary, just until NeonVM supports mgirations and we can // re-enable it. DoMigration *bool `json:"doMigration"` + // K8sNodeGroupLabel, if provided, gives the label to use when recording k8s node groups in the + // metrics (like for autoscaling_plugin_node_{cpu,mem}_resources_current) + K8sNodeGroupLabel string `json:"k8sNodeGroupLabel"` + + // K8sAvailabilityZoneLabel, if provided, gives the label to use when recording nodes' + // availability zones in the metrics (like for autoscaling_plugin_node_{cpu,mem}_resources_current) + K8sAvailabilityZoneLabel string `json:"k8sAvailabilityZoneLabel"` + + // IgnoreNamespaces, if provided, gives a list of namespaces that the plugin should completely + // ignore, as if pods from those namespaces do not exist. + // + // This is specifically designed for our "overprovisioning" namespace, which creates paused pods + // to trigger cluster-autoscaler. + // + // The only exception to this rule is during Filter method calls, where we do still count the + // resources from such pods. The reason to do that is so that these overprovisioning pods can be + // evicted, which will allow cluster-autoscaler to trigger scale-up. + IgnoreNamespaces []string `json:"ignoreNamespaces"` + // DumpState, if provided, enables a server to dump internal state DumpState *dumpStateConfig `json:"dumpState"` @@ -61,6 +90,28 @@ type nodeConfig struct { Cpu resourceConfig `json:"cpu"` Memory resourceConfig `json:"memory"` ComputeUnit api.Resources `json:"computeUnit"` + + // Details about node scoring: + // See also: https://www.desmos.com/calculator/wg8s0yn63s + // In the desmos, the value f(x,s) gives the score (from 0 to 1) of a node that's x amount full + // (where x is a fraction from 0 to 1), with a total size that is equal to the maximum size node + // times s (i.e. s (or: "scale") gives the ratio between this nodes's size and the biggest one). + + // MinUsageScore gives the ratio of the score at the minimum usage (i.e. 0) relative to the + // score at the midpoint, which will have the maximum. + // + // This corresponds to y₀ in the desmos link above. + MinUsageScore float64 `json:"minUsageScore"` + // MaxUsageScore gives the ratio of the score at the maximum usage (i.e. full) relative to the + // score at the midpoint, which will have the maximum. + // + // This corresponds to y₁ in the desmos link above. + MaxUsageScore float64 `json:"maxUsageScore"` + // ScorePeak gives the fraction at which the "target" or highest score should be, with the score + // sloping down on either side towards MinUsageScore at 0 and MaxUsageScore at 1. + // + // This corresponds to xₚ in the desmos link. + ScorePeak float64 `json:"scorePeak"` } // resourceConfig configures the amount of a particular resource we're willing to allocate to VMs, @@ -74,9 +125,6 @@ type resourceConfig struct { // The word "watermark" was originally used by @zoete as a temporary stand-in term during a // meeting, and so it has intentionally been made permanent to spite the concept of "temporary" 😛 Watermark float32 `json:"watermark,omitempty"` - // System is the absolute amount of the resource allocated to non-user node functions, like - // Kubernetes daemons - System resource.Quantity `json:"system,omitempty"` } func (c *Config) migrationEnabled() bool { @@ -115,6 +163,10 @@ func (c *Config) validate() (string, error) { } } + if c.MigrationDeletionRetrySeconds == 0 { + return "migrationDeletionRetrySeconds", errors.New("value must be > 0") + } + return "", nil } @@ -131,32 +183,34 @@ func (s *overrideSet) validate() (string, error) { } func (c *nodeConfig) validate() (string, error) { - if path, err := c.Cpu.validate(false); err != nil { + if path, err := c.Cpu.validate(); err != nil { return fmt.Sprintf("cpu.%s", path), err } - if path, err := c.Memory.validate(true); err != nil { + if path, err := c.Memory.validate(); err != nil { return fmt.Sprintf("memory.%s", path), err } if err := c.ComputeUnit.ValidateNonZero(); err != nil { return "computeUnit", err } + if c.MinUsageScore < 0 || c.MinUsageScore > 1 { + return "minUsageScore", errors.New("value must be between 0 and 1, inclusive") + } else if c.MaxUsageScore < 0 || c.MaxUsageScore > 1 { + return "maxUsageScore", errors.New("value must be between 0 and 1, inclusive") + } else if c.ScorePeak < 0 || c.ScorePeak > 1 { + return "scorePeak", errors.New("value must be between 0 and 1, inclusive") + } + return "", nil } -func (c *resourceConfig) validate(isMemory bool) (string, error) { +func (c *resourceConfig) validate() (string, error) { if c.Watermark <= 0.0 { return "watermark", errors.New("value must be > 0") } else if c.Watermark > 1.0 { return "watermark", errors.New("value must be <= 1") } - if c.System.Value() <= 0 { - return "system", errors.New("value must be > 0") - } else if isMemory && c.System.Value() < math.MaxInt64 && c.System.MilliValue()%1000 != 0 { - return "system", errors.New("value cannot have milli-precision") - } - return "", nil } @@ -191,6 +245,11 @@ func ReadConfig(path string) (*Config, error) { // HELPER METHODS FOR USING CONFIGS // ////////////////////////////////////// +// ignoredNamespace returns whether items in the namespace should be treated as if they don't exist +func (c *Config) ignoredNamespace(namespace string) bool { + return slices.Contains(c.IgnoreNamespaces, namespace) +} + // forNode returns the individual nodeConfig for a node with a particular name, taking override // settings into account func (c *Config) forNode(nodeName string) *nodeConfig { @@ -206,32 +265,13 @@ func (c *Config) forNode(nodeName string) *nodeConfig { } func (c *nodeConfig) vCpuLimits(total *resource.Quantity) (_ nodeResourceState[vmapi.MilliCPU], margin *resource.Quantity, _ error) { - // We check both Value and MilliValue here in case the value overflows an int64 when - // multiplied by 1000, which is possible if c.Cpu.System is not in units of milli-CPU - if c.Cpu.System.Value() > total.Value() || c.Cpu.System.MilliValue() > total.MilliValue() { - err := fmt.Errorf("desired system vCPU %v greater than node total %v", &c.Cpu.System, total) - return nodeResourceState[vmapi.MilliCPU]{}, nil, err - } + totalMilli := total.MilliValue() - totalRounded := total.MilliValue() / 1000 - - // system CPU usage isn't measured directly, but as the number of additional *full* CPUs - // reserved for system functions *that we'd otherwise have available*. - // - // So if c.Cpu.System is less than the difference between total.MilliValue() and - // 1000*total.Value(), then systemCpus will be zero. - systemCpus := totalRounded - (total.MilliValue()-c.Cpu.System.MilliValue())/1000 - - reservableCpus := totalRounded - systemCpus - unreservableCpuMillis := total.MilliValue() - 1000*reservableCpus - - margin = resource.NewMilliQuantity(unreservableCpuMillis, c.Cpu.System.Format) - margin.Sub(c.Cpu.System) + margin = resource.NewMilliQuantity(0, total.Format) return nodeResourceState[vmapi.MilliCPU]{ - Total: vmapi.MilliCPU(totalRounded * 1000), - System: vmapi.MilliCPU(systemCpus * 1000), - Watermark: vmapi.MilliCPU(c.Cpu.Watermark * float32(reservableCpus) * 1000), + Total: vmapi.MilliCPU(totalMilli), + Watermark: vmapi.MilliCPU(c.Cpu.Watermark * float32(totalMilli)), Reserved: 0, Buffer: 0, CapacityPressure: 0, @@ -243,13 +283,7 @@ func (c *nodeConfig) memoryLimits( total *resource.Quantity, slotSize *resource.Quantity, ) (_ nodeResourceState[uint16], margin *resource.Quantity, _ error) { - if c.Memory.System.Cmp(*total) == 1 /* if c.Memory.System > total */ { - err := fmt.Errorf( - "desired system memory %v greater than node total %v", - &c.Memory.System, total, - ) - return nodeResourceState[uint16]{}, nil, err - } else if slotSize.Cmp(*total) == 1 /* if slotSize > total */ { + if slotSize.Cmp(*total) == 1 /* if slotSize > total */ { err := fmt.Errorf("slotSize %v greater than node total %v", slotSize, total) return nodeResourceState[uint16]{}, nil, err } @@ -260,23 +294,13 @@ func (c *nodeConfig) memoryLimits( return nodeResourceState[uint16]{}, nil, err } - // systemSlots isn't measured directly, but as the number of additional slots reserved for - // system functions *that we'd otherwise have available*. - // - // So if c.Memory.System is less than the leftover space between totalSlots*slotSize and total, - // then systemSlots will be zero. - systemSlots := totalSlots - (total.Value()-c.Memory.System.Value())/slotSize.Value() - - reservableSlots := totalSlots - systemSlots - unreservable := total.Value() - slotSize.Value()*reservableSlots + unreservable := total.Value() - slotSize.Value()*totalSlots margin = resource.NewQuantity(unreservable, total.Format) - margin.Sub(c.Memory.System) return nodeResourceState[uint16]{ Total: uint16(totalSlots), - System: uint16(systemSlots), - Watermark: uint16(c.Memory.Watermark * float32(reservableSlots)), + Watermark: uint16(c.Memory.Watermark * float32(totalSlots)), Reserved: 0, Buffer: 0, CapacityPressure: 0, diff --git a/pkg/plugin/dumpstate.go b/pkg/plugin/dumpstate.go index 0f9da2570..d4e297e56 100644 --- a/pkg/plugin/dumpstate.go +++ b/pkg/plugin/dumpstate.go @@ -99,6 +99,8 @@ type keyed[K any, V any] struct { } type pluginStateDump struct { + OngoingMigrationDeletions []keyed[util.NamespacedName, int] `json:"ongoingMigrationDeletions"` + Nodes []keyed[string, nodeStateDump] `json:"nodes"` VMPods []podNameAndPointer `json:"vmPods"` @@ -118,13 +120,15 @@ type podNameAndPointer struct { type pointerString string type nodeStateDump struct { - Obj pointerString `json:"obj"` - Name string `json:"name"` - VCPU nodeResourceState[vmapi.MilliCPU] `json:"vCPU"` - MemSlots nodeResourceState[uint16] `json:"memSlots"` - Pods []keyed[util.NamespacedName, podStateDump] `json:"pods"` - OtherPods []keyed[util.NamespacedName, otherPodStateDump] `json:"otherPods"` - Mq []*podNameAndPointer `json:"mq"` + Obj pointerString `json:"obj"` + Name string `json:"name"` + NodeGroup string `json:"nodeGroup"` + AvailabilityZone string `json:"availabilityZone"` + VCPU nodeResourceState[vmapi.MilliCPU] `json:"vCPU"` + MemSlots nodeResourceState[uint16] `json:"memSlots"` + Pods []keyed[util.NamespacedName, podStateDump] `json:"pods"` + OtherPods []keyed[util.NamespacedName, otherPodStateDump] `json:"otherPods"` + Mq []*podNameAndPointer `json:"mq"` } type podStateDump struct { @@ -189,7 +193,14 @@ func (s *pluginState) dump(ctx context.Context) (*pluginStateDump, error) { return kvx.Key < kvy.Key }) + ongoingMigrationDeletions := make([]keyed[util.NamespacedName, int], 0, len(s.ongoingMigrationDeletions)) + for k, count := range s.ongoingMigrationDeletions { + ongoingMigrationDeletions = append(ongoingMigrationDeletions, keyed[util.NamespacedName, int]{Key: k, Value: count}) + } + sortSliceByPodName(ongoingMigrationDeletions, func(kv keyed[util.NamespacedName, int]) util.NamespacedName { return kv.Key }) + return &pluginStateDump{ + OngoingMigrationDeletions: ongoingMigrationDeletions, Nodes: nodes, VMPods: vmPods, OtherPods: otherPods, @@ -223,13 +234,15 @@ func (s *nodeState) dump() nodeStateDump { } return nodeStateDump{ - Obj: makePointerString(s), - Name: s.name, - VCPU: s.vCPU, - MemSlots: s.memSlots, - Pods: pods, - OtherPods: otherPods, - Mq: mq, + Obj: makePointerString(s), + Name: s.name, + NodeGroup: s.nodeGroup, + AvailabilityZone: s.availabilityZone, + VCPU: s.vCPU, + MemSlots: s.memSlots, + Pods: pods, + OtherPods: otherPods, + Mq: mq, } } diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go index f6e9eb47b..ad9521f2c 100644 --- a/pkg/plugin/plugin.go +++ b/pkg/plugin/plugin.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "math/rand" "time" "github.com/tychoish/fun/pubsub" @@ -25,6 +26,7 @@ import ( const Name = "AutoscaleEnforcer" const LabelVM = vmapi.VirtualMachineNameLabel +const LabelPluginCreatedMigration = "autoscaling.neon.tech/created-by-scheduler" const ConfigMapNamespace = "kube-system" const ConfigMapName = "scheduler-plugin-config" const ConfigMapKey = "autoscaler-enforcer-config.json" @@ -100,8 +102,9 @@ func makeAutoscaleEnforcerPlugin( vmClient: vmClient, // remaining fields are set by p.readClusterState and p.makePrometheusRegistry state: pluginState{ //nolint:exhaustruct // see above. - lock: util.NewChanMutex(), - conf: config, + lock: util.NewChanMutex(), + ongoingMigrationDeletions: make(map[util.NamespacedName]int), + conf: config, }, metrics: PromMetrics{}, //nolint:exhaustruct // set by makePrometheusRegistry vmStore: IndexedVMStore{}, //nolint:exhaustruct // set below @@ -130,6 +133,9 @@ func makeAutoscaleEnforcerPlugin( }, } pwc := podWatchCallbacks{ + submitPodStarted: func(logger *zap.Logger, pod *corev1.Pod) { + pushToQueue(logger, func() { p.handlePodStarted(hlogger, pod) }) + }, submitVMDeletion: func(logger *zap.Logger, pod util.NamespacedName) { pushToQueue(logger, func() { p.handleVMDeletion(hlogger, pod) }) }, @@ -154,6 +160,13 @@ func makeAutoscaleEnforcerPlugin( pushToQueue(logger, func() { p.handleNonAutoscalingUsageChange(hlogger, vm, podName) }) }, } + mwc := migrationWatchCallbacks{ + submitMigrationFinished: func(vmm *vmapi.VirtualMachineMigration) { + // When cleaning up migrations, we don't want to process those events synchronously. + // So instead, we'll spawn a goroutine to delete the completed migration. + go p.cleanupMigration(hlogger, vmm) + }, + } watchMetrics := watch.NewMetrics("autoscaling_plugin_watchers") @@ -166,16 +179,24 @@ func makeAutoscaleEnforcerPlugin( p.nodeStore = watch.NewIndexedStore(nodeStore, watch.NewFlatNameIndex[corev1.Node]()) logger.Info("Starting pod watcher") - if err := p.watchPodEvents(ctx, logger, watchMetrics, pwc); err != nil { + podStore, err := p.watchPodEvents(ctx, logger, watchMetrics, pwc) + if err != nil { return nil, fmt.Errorf("Error starting pod watcher: %w", err) } + podIndex := watch.NewIndexedStore(podStore, watch.NewNameIndex[corev1.Pod]()) + logger.Info("Starting VM watcher") - vmStore, err := p.watchVMEvents(ctx, logger, watchMetrics, vwc) + vmStore, err := p.watchVMEvents(ctx, logger, watchMetrics, vwc, podIndex) if err != nil { return nil, fmt.Errorf("Error starting VM watcher: %w", err) } + logger.Info("Starting VM Migration watcher") + if _, err := p.watchMigrationEvents(ctx, logger, watchMetrics, mwc); err != nil { + return nil, fmt.Errorf("Error starting VM Migration watcher: %w", err) + } + p.vmStore = watch.NewIndexedStore(vmStore, watch.NewNameIndex[vmapi.VirtualMachine]()) // makePrometheusRegistry sets p.metrics, which we need to do before calling readClusterState, @@ -190,14 +211,15 @@ func makeAutoscaleEnforcerPlugin( } go func() { - iter := queue.Iterator() - for iter.Next(ctx) { - callback := iter.Value() + for { + callback, err := queue.Wait(ctx) // NB: Wait pulls from the front of the queue + if err != nil { + logger.Info("Stopped waiting on pod/VM queue", zap.Error(err)) + break + } + callback() } - if err := iter.Close(); err != nil { - logger.Info("Stopped waiting on pod/VM queue", zap.Error(err)) - } }() if err := util.StartPrometheusMetricsServer(ctx, logger.Named("prometheus"), 9100, promReg); err != nil { @@ -233,7 +255,7 @@ func (e *AutoscaleEnforcer) Name() string { // getVmInfo is a helper for the plugin-related functions // // This function returns nil, nil if the pod is not associated with a NeonVM virtual machine. -func getVmInfo(logger *zap.Logger, vmStore IndexedVMStore, pod *corev1.Pod) (*api.VmInfo, error) { +func (e *AutoscaleEnforcer) getVmInfo(logger *zap.Logger, pod *corev1.Pod, action string) (*api.VmInfo, error) { var vmName util.NamespacedName vmName.Namespace = pod.Namespace @@ -247,7 +269,7 @@ func getVmInfo(logger *zap.Logger, vmStore IndexedVMStore, pod *corev1.Pod) (*ap return index.Get(vmName.Namespace, vmName.Name) } - vm, ok := vmStore.GetIndexed(accessor) + vm, ok := e.vmStore.GetIndexed(accessor) if !ok { logger.Warn( "VM is missing from local store. Relisting", @@ -264,13 +286,13 @@ func getVmInfo(logger *zap.Logger, vmStore IndexedVMStore, pod *corev1.Pod) (*ap defer timer.Stop() select { - case <-vmStore.Relist(): + case <-e.vmStore.Relist(): case <-timer.C: return nil, fmt.Errorf("Timed out waiting on VM store relist (timeout = %s)", timeout) } // retry fetching the VM, now that we know it's been synced. - vm, ok = vmStore.GetIndexed(accessor) + vm, ok = e.vmStore.GetIndexed(accessor) if !ok { // if the VM is still not present after relisting, then either it's already been deleted // or there's a deeper problem. @@ -280,6 +302,15 @@ func getVmInfo(logger *zap.Logger, vmStore IndexedVMStore, pod *corev1.Pod) (*ap vmInfo, err := api.ExtractVmInfo(logger, vm) if err != nil { + e.handle.EventRecorder().Eventf( + vm, // regarding + pod, // related + "Warning", // eventtype + "ExtractVmInfo", // reason + action, // action + "Failed to extract autoscaling info about VM: %s", // node + err, + ) return nil, fmt.Errorf("Error extracting VM info: %w", err) } @@ -310,9 +341,11 @@ func (e *AutoscaleEnforcer) PreFilter( state *framework.CycleState, pod *corev1.Pod, ) (_ *framework.PreFilterResult, status *framework.Status) { - e.metrics.pluginCalls.WithLabelValues("PreFilter").Inc() + ignored := e.state.conf.ignoredNamespace(pod.Namespace) + + e.metrics.IncMethodCall("PreFilter", ignored) defer func() { - e.metrics.IncFailIfNotSuccess("PreFilter", status) + e.metrics.IncFailIfNotSuccess("PreFilter", ignored, status) }() return nil, nil @@ -338,11 +371,16 @@ func (e *AutoscaleEnforcer) PostFilter( pod *corev1.Pod, filteredNodeStatusMap framework.NodeToStatusMap, ) (_ *framework.PostFilterResult, status *framework.Status) { - e.metrics.pluginCalls.WithLabelValues("PostFilter").Inc() + ignored := e.state.conf.ignoredNamespace(pod.Namespace) + + e.metrics.IncMethodCall("PostFilter", ignored) defer func() { - e.metrics.IncFailIfNotSuccess("PostFilter", status) + e.metrics.IncFailIfNotSuccess("PostFilter", ignored, status) }() + logger := e.logger.With(zap.String("method", "Filter"), util.PodNameFields(pod)) + logger.Error("Pod rejected by all Filter method calls") + return nil, nil // PostFilterResult is optional, nil Status is success. } @@ -355,9 +393,11 @@ func (e *AutoscaleEnforcer) Filter( pod *corev1.Pod, nodeInfo *framework.NodeInfo, ) (status *framework.Status) { - e.metrics.pluginCalls.WithLabelValues("Filter").Inc() + ignored := e.state.conf.ignoredNamespace(pod.Namespace) + + e.metrics.IncMethodCall("Filter", ignored) defer func() { - e.metrics.IncFailIfNotSuccess("Filter", status) + e.metrics.IncFailIfNotSuccess("Filter", ignored, status) }() nodeName := nodeInfo.Node().Name // TODO: nodes also have namespaces? are they used at all? @@ -365,7 +405,11 @@ func (e *AutoscaleEnforcer) Filter( logger := e.logger.With(zap.String("method", "Filter"), zap.String("node", nodeName), util.PodNameFields(pod)) logger.Info("Handling Filter request") - vmInfo, err := getVmInfo(logger, e.vmStore, pod) + if ignored { + logger.Warn("Received Filter request for pod in ignored namespace, continuing anyways.") + } + + vmInfo, err := e.getVmInfo(logger, pod, "Filter") if err != nil { logger.Error("Error getting VM info for Pod", zap.Error(err)) return framework.NewStatus( @@ -429,17 +473,82 @@ func (e *AutoscaleEnforcer) Filter( otherResources.MarginCPU = node.otherResources.MarginCPU otherResources.MarginMemory = node.otherResources.MarginMemory + // As we process all pods, we should record all the pods that aren't present in both nodeInfo + // and e.state's maps, so that we can log any inconsistencies instead of silently using + // *potentially* bad data. Some differences are expected, but on the whole this extra + // information should be helpful. + missedPods := make(map[util.NamespacedName]struct{}) + for name := range node.pods { + missedPods[name] = struct{}{} + } + for name := range node.otherPods { + missedPods[name] = struct{}{} + } + + var includedIgnoredPods []util.NamespacedName + for _, podInfo := range nodeInfo.Pods { pn := util.NamespacedName{Name: podInfo.Pod.Name, Namespace: podInfo.Pod.Namespace} if podState, ok := e.state.podMap[pn]; ok { totalNodeVCPU += podState.vCPU.Reserved totalNodeMem += podState.memSlots.Reserved + delete(missedPods, pn) } else if otherPodState, ok := e.state.otherPods[pn]; ok { oldRes := otherResources otherResources = oldRes.addPod(&e.state.conf.MemSlotSize, otherPodState.resources) totalNodeVCPU += otherResources.ReservedCPU - oldRes.ReservedCPU totalNodeMem += otherResources.ReservedMemSlots - oldRes.ReservedMemSlots + delete(missedPods, pn) + } else { + name := util.GetNamespacedName(podInfo.Pod) + + if util.PodCompleted(podInfo.Pod) { + logger.Warn( + "Skipping completed Pod in Filter node's pods", + zap.Object("pod", name), + zap.String("phase", string(podInfo.Pod.Status.Phase)), + ) + continue + } + + if !e.state.conf.ignoredNamespace(podInfo.Pod.Namespace) { + // FIXME: this gets us duplicated "pod" fields. Not great. But we're using + // logger.With pretty pervasively, and it's hard to avoid this while using that. + // For now, we can get around this by including the pod name in an error. + logger.Error( + "Unknown-but-not-ignored Pod in Filter node's pods", + zap.Object("pod", name), + zap.Error(fmt.Errorf("Pod %v is unknown but not ignored", name)), + ) + } else { + includedIgnoredPods = append(includedIgnoredPods, name) + } + + // We *also* need to count pods in ignored namespaces + resources, err := extractPodOtherPodResourceState(podInfo.Pod) + if err != nil { + // FIXME: Same duplicate "pod" field issue as above; same temporary solution. + logger.Error( + "Error extracting resource state for non-VM Pod", + zap.Object("pod", name), + zap.Error(fmt.Errorf("Error extracting resource state for %v: %w", name, err)), + ) + continue + } + + oldRes := otherResources + otherResources = oldRes.addPod(&e.state.conf.MemSlotSize, resources) + totalNodeVCPU += otherResources.ReservedCPU - oldRes.ReservedCPU + totalNodeMem += otherResources.ReservedMemSlots - oldRes.ReservedMemSlots + } + } + + if len(missedPods) != 0 { + var missedPodsList []util.NamespacedName + for name := range missedPods { + missedPodsList = append(missedPodsList, name) } + logger.Warn("Some known Pods weren't included in Filter NodeInfo", zap.Objects("missedPods", missedPodsList)) } nodeTotalReservableCPU := node.totalReservableCPU() @@ -506,14 +615,18 @@ func (e *AutoscaleEnforcer) Filter( } var message string + var logFunc func(string, ...zap.Field) if allowing { message = "Allowing Pod" + logFunc = logger.Info } else { message = "Rejecting Pod" + logFunc = logger.Warn } - logger.Info( + logFunc( message, + zap.Objects("includedIgnoredPods", includedIgnoredPods), zap.Object("verdict", verdictSet{ cpu: cpuMsg, mem: memMsg, @@ -545,9 +658,11 @@ func (e *AutoscaleEnforcer) Score( pod *corev1.Pod, nodeName string, ) (_ int64, status *framework.Status) { - e.metrics.pluginCalls.WithLabelValues("Score").Inc() + ignored := e.state.conf.ignoredNamespace(pod.Namespace) + + e.metrics.IncMethodCall("Score", ignored) defer func() { - e.metrics.IncFailIfNotSuccess("Score", status) + e.metrics.IncFailIfNotSuccess("Score", ignored, status) }() logger := e.logger.With(zap.String("method", "Score"), zap.String("node", nodeName), util.PodNameFields(pod)) @@ -555,7 +670,7 @@ func (e *AutoscaleEnforcer) Score( scoreLen := framework.MaxNodeScore - framework.MinNodeScore - vmInfo, err := getVmInfo(logger, e.vmStore, pod) + vmInfo, err := e.getVmInfo(logger, pod, "Score") if err != nil { logger.Error("Error getting VM info for Pod", zap.Error(err)) return 0, framework.NewStatus(framework.Error, "Error getting info for pod") @@ -583,32 +698,122 @@ func (e *AutoscaleEnforcer) Score( (vmInfo.Cpu.Use > node.remainingReservableCPU() || vmInfo.Mem.Use > node.remainingReservableMemSlots()) if noRoom { - return framework.MinNodeScore, nil + score := framework.MinNodeScore + logger.Warn("No room on node, giving minimum score (typically handled by Filter method)", zap.Int64("score", score)) + return score, nil } - totalMilliCpu := int64(node.totalReservableCPU()) - totalMem := int64(node.totalReservableMemSlots()) - maxTotalMilliCpu := int64(e.state.maxTotalReservableCPU) - maxTotalMem := int64(e.state.maxTotalReservableMemSlots) + cpuRemaining := node.remainingReservableCPU() + cpuTotal := node.totalReservableCPU() + memRemaining := node.remainingReservableMemSlots() + memTotal := node.totalReservableMemSlots() + + cpuFraction := 1 - cpuRemaining.AsFloat64()/cpuTotal.AsFloat64() + memFraction := 1 - float64(memRemaining)/float64(memTotal) + cpuScale := node.totalReservableCPU().AsFloat64() / e.state.maxTotalReservableCPU.AsFloat64() + memScale := float64(node.totalReservableMemSlots()) / float64(e.state.maxTotalReservableMemSlots) + + nodeConf := e.state.conf.forNode(nodeName) + + // Refer to the comments in nodeConfig for more. Also, see: https://www.desmos.com/calculator/wg8s0yn63s + calculateScore := func(fraction, scale float64) (float64, int64) { + y0 := nodeConf.MinUsageScore + y1 := nodeConf.MaxUsageScore + xp := nodeConf.ScorePeak + + score := float64(1) // if fraction == nodeConf.ScorePeak + if fraction < nodeConf.ScorePeak { + score = y0 + (1-y0)/xp*fraction + } else if fraction > nodeConf.ScorePeak { + score = y1 + (1-y1)/(1-xp)*(1-fraction) + } - // The ordering of multiplying before dividing is intentional; it allows us to get an exact - // result, because scoreLen and total will both be small (i.e. their product fits within an int64) - scoreCpu := framework.MinNodeScore + scoreLen*totalMilliCpu/maxTotalMilliCpu - scoreMem := framework.MinNodeScore + scoreLen*totalMem/maxTotalMem + score *= scale - // return the minimum of the two resources scores - if scoreCpu < scoreMem { - return scoreCpu, nil - } else { - return scoreMem, nil + return score, framework.MinNodeScore + int64(float64(scoreLen)*score) } + + cpuFScore, cpuIScore := calculateScore(cpuFraction, cpuScale) + memFScore, memIScore := calculateScore(memFraction, memScale) + + score := util.Min(cpuIScore, memIScore) + logger.Info( + "Scored pod placement for node", + zap.Int64("score", score), + zap.Object("verdict", verdictSet{ + cpu: fmt.Sprintf( + "%d remaining reservable of %d total => fraction=%g, scale=%g => score=(%g :: %d)", + cpuRemaining, cpuTotal, cpuFraction, cpuScale, cpuFScore, cpuIScore, + ), + mem: fmt.Sprintf( + "%d remaining reservable of %d total => fraction=%g, scale=%g => score=(%g :: %d)", + memRemaining, memTotal, memFraction, memScale, memFScore, memIScore, + ), + }), + ) + + return score, nil } -// ScoreExtensions is required for framework.ScorePlugin, and can return nil if it's not used -func (e *AutoscaleEnforcer) ScoreExtensions() framework.ScoreExtensions { +// NormalizeScore weights scores uniformly in the range [minScore, trueScore], where +// minScore is framework.MinNodeScore + 1. +func (e *AutoscaleEnforcer) NormalizeScore( + ctx context.Context, + state *framework.CycleState, + pod *corev1.Pod, + scores framework.NodeScoreList, +) (status *framework.Status) { + ignored := e.state.conf.ignoredNamespace(pod.Namespace) + + e.metrics.IncMethodCall("NormalizeScore", ignored) + defer func() { + e.metrics.IncFailIfNotSuccess("NormalizeScore", ignored, status) + }() + + logger := e.logger.With(zap.String("method", "NormalizeScore"), util.PodNameFields(pod)) + logger.Info("Handling NormalizeScore request") + + for _, node := range scores { + nodeScore := node.Score + nodeName := node.Name + + // rand.Intn will panic if we pass in 0 + if nodeScore == 0 { + logger.Info("Ignoring node as it was assigned a score of 0", zap.String("node", nodeName)) + continue + } + + // This is different from framework.MinNodeScore. We use framework.MinNodeScore + // to indicate that a pod should not be placed on a node. The lowest + // actual score we assign a node is thus framework.MinNodeScore + 1 + minScore := framework.MinNodeScore + 1 + + // We want to pick a score in the range [minScore, score], so use + // score _+ 1_ - minscore, as rand.Intn picks a number in the _half open_ + // range [0, n) + newScore := int64(rand.Intn(int(nodeScore+1-minScore))) + minScore + logger.Info( + "Randomly choosing newScore from range [minScore, trueScore]", + zap.String("node", nodeName), + zap.Int64("newScore", newScore), + zap.Int64("minScore", minScore), + zap.Int64("trueScore", nodeScore), + ) + node.Score = newScore + } return nil } +// ScoreExtensions is required for framework.ScorePlugin, and can return nil if it's not used. +// However, we do use it, to randomize scores. +func (e *AutoscaleEnforcer) ScoreExtensions() framework.ScoreExtensions { + if e.state.conf.RandomizeScores { + return e + } else { + return nil + } +} + // Reserve signals to our plugin that a particular pod will (probably) be bound to a node, giving us // a chance to both (a) reserve the resources it needs within the node and (b) reject the pod if // there aren't enough. @@ -620,9 +825,11 @@ func (e *AutoscaleEnforcer) Reserve( pod *corev1.Pod, nodeName string, ) (status *framework.Status) { - e.metrics.pluginCalls.WithLabelValues("Reserve").Inc() + ignored := e.state.conf.ignoredNamespace(pod.Namespace) + + e.metrics.IncMethodCall("Reserve", ignored) defer func() { - e.metrics.IncFailIfNotSuccess("Reserve", status) + e.metrics.IncFailIfNotSuccess("Reserve", ignored, status) }() pName := util.GetNamespacedName(pod) @@ -633,7 +840,13 @@ func (e *AutoscaleEnforcer) Reserve( logger.Info("Handling Reserve request") - vmInfo, err := getVmInfo(logger, e.vmStore, pod) + if ignored { + // Generally, we shouldn't be getting plugin requests for resources that are ignored. + logger.Warn("Ignoring Reserve request for pod in ignored namespace") + return nil // success; allow the Pod onto the node. + } + + vmInfo, err := e.getVmInfo(logger, pod, "Reserve") if err != nil { logger.Error("Error getting VM info for pod", zap.Error(err)) return framework.NewStatus( @@ -737,12 +950,12 @@ func (e *AutoscaleEnforcer) Reserve( } cpuVerdict := fmt.Sprintf( - "need %v vCPU (%v -> %v raw), have %v available (%s)", - addCpu, &oldNodeRes.RawCPU, &newNodeRes.RawCPU, node.remainingReservableCPU(), cpuShortVerdict, + "need %v (%v -> %v raw), %v of %v used, so %v available (%s)", + addCpu, &oldNodeRes.RawCPU, &newNodeRes.RawCPU, node.vCPU.Reserved, node.totalReservableCPU(), node.remainingReservableCPU(), cpuShortVerdict, ) memVerdict := fmt.Sprintf( - "need %v mem slots (%v -> %v raw), have %d available (%s)", - addMem, &oldNodeRes.RawMemory, &newNodeRes.RawMemory, node.remainingReservableMemSlots(), memShortVerdict, + "need %v (%v -> %v raw), %v of %v used, so %v available (%s)", + addMem, &oldNodeRes.RawMemory, &newNodeRes.RawMemory, node.memSlots.Reserved, node.totalReservableMemSlots(), node.remainingReservableMemSlots(), memShortVerdict, ) logger.Error( @@ -819,8 +1032,14 @@ func (e *AutoscaleEnforcer) Reserve( memShortVerdict = "OK" } - cpuVerdict := fmt.Sprintf("need %v vCPU, have %v available (%s)", vmInfo.Cpu.Use, node.remainingReservableCPU(), cpuShortVerdict) - memVerdict := fmt.Sprintf("need %v mem slots, have %v available (%s)", vmInfo.Mem.Use, node.remainingReservableMemSlots(), memShortVerdict) + cpuVerdict := fmt.Sprintf( + "need %v, %v of %v used, so %v available (%s)", + vmInfo.Cpu.Use, node.vCPU.Reserved, node.totalReservableCPU(), node.remainingReservableCPU(), cpuShortVerdict, + ) + memVerdict := fmt.Sprintf( + "need %v, %v of %v used, so %v available (%s)", + vmInfo.Mem.Use, node.memSlots.Reserved, node.totalReservableMemSlots(), node.remainingReservableMemSlots(), memShortVerdict, + ) logger.Error( "Can't reserve VM pod (not enough resources)", @@ -846,13 +1065,20 @@ func (e *AutoscaleEnforcer) Unreserve( pod *corev1.Pod, nodeName string, ) { - e.metrics.pluginCalls.WithLabelValues("Unreserve").Inc() + ignored := e.state.conf.ignoredNamespace(pod.Namespace) + e.metrics.IncMethodCall("Unreserve", ignored) podName := util.GetNamespacedName(pod) logger := e.logger.With(zap.String("method", "Unreserve"), zap.String("node", nodeName), util.PodNameFields(pod)) logger.Info("Handling Unreserve request") + if ignored { + // Generally, we shouldn't be getting plugin requests for resources that are ignored. + logger.Warn("Ignoring Unreserve request for pod in ignored namespace") + return + } + e.state.lock.Lock() defer e.state.lock.Unlock() @@ -898,7 +1124,7 @@ func (e *AutoscaleEnforcer) Unreserve( ps.node.updateMetrics(e.metrics, e.state.memSlotSizeBytes()) } else { - logger.Warn("Cannot find pod in podMap in otherPods") + logger.Warn("Cannot find pod in podMap or otherPods") return } } diff --git a/pkg/plugin/prommetrics.go b/pkg/plugin/prommetrics.go index 870479d9b..28ef2f46b 100644 --- a/pkg/plugin/prommetrics.go +++ b/pkg/plugin/prommetrics.go @@ -3,6 +3,8 @@ package plugin // defines prometheus metrics and provides the server, via (*AutoscaleEnforcer).startPrometheusServer() import ( + "strconv" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" @@ -18,6 +20,10 @@ type PromMetrics struct { validResourceRequests *prometheus.CounterVec nodeCPUResources *prometheus.GaugeVec nodeMemResources *prometheus.GaugeVec + migrationCreations prometheus.Counter + migrationDeletions *prometheus.CounterVec + migrationCreateFails prometheus.Counter + migrationDeleteFails *prometheus.CounterVec } func (p *AutoscaleEnforcer) makePrometheusRegistry() *prometheus.Registry { @@ -38,14 +44,14 @@ func (p *AutoscaleEnforcer) makePrometheusRegistry() *prometheus.Registry { Name: "autoscaling_plugin_extension_calls_total", Help: "Number of calls to scheduler plugin extension points", }, - []string{"method"}, + []string{"method", "ignored_namespace"}, )), pluginCallFails: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "autoscaling_plugin_extension_call_fails_total", Help: "Number of unsuccessful calls to scheduler plugin extension points", }, - []string{"method", "status"}, + []string{"method", "ignored_namespace", "status"}, )), resourceRequests: util.RegisterMetric(reg, prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -66,24 +72,54 @@ func (p *AutoscaleEnforcer) makePrometheusRegistry() *prometheus.Registry { Name: "autoscaling_plugin_node_cpu_resources_current", Help: "Current amount of CPU for 'nodeResourceState' fields", }, - []string{"node", "field"}, + []string{"node", "node_group", "availability_zone", "field"}, )), nodeMemResources: util.RegisterMetric(reg, prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "autoscaling_plugin_node_mem_resources_current", Help: "Current amount of memory (in bytes) for 'nodeResourceState' fields", }, - []string{"node", "field"}, + []string{"node", "node_group", "availability_zone", "field"}, + )), + migrationCreations: util.RegisterMetric(reg, prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "autoscaling_plugin_migrations_created_total", + Help: "Number of successful VirtualMachineMigration Create requests by the plugin", + }, + )), + migrationDeletions: util.RegisterMetric(reg, prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "autoscaling_plugin_migrations_deleted_total", + Help: "Number of successful VirtualMachineMigration Delete requests by the plugin", + }, + []string{"phase"}, + )), + migrationCreateFails: util.RegisterMetric(reg, prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "autoscaling_plugin_migration_create_fails_total", + Help: "Number of failed VirtualMachineMigration Create requests by the plugin", + }, + )), + migrationDeleteFails: util.RegisterMetric(reg, prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "autoscaling_plugin_migration_delete_fails_total", + Help: "Number of failed VirtualMachineMigration Delete requests by the plugin", + }, + []string{"phase"}, )), } return reg } -func (m *PromMetrics) IncFailIfNotSuccess(method string, status *framework.Status) { +func (m *PromMetrics) IncMethodCall(method string, ignored bool) { + m.pluginCalls.WithLabelValues(method, strconv.FormatBool(ignored)).Inc() +} + +func (m *PromMetrics) IncFailIfNotSuccess(method string, ignored bool, status *framework.Status) { if !status.IsSuccess() { return } - m.pluginCallFails.WithLabelValues(method, status.Code().String()) + m.pluginCallFails.WithLabelValues(method, strconv.FormatBool(ignored), status.Code().String()) } diff --git a/pkg/plugin/run.go b/pkg/plugin/run.go index 90ec312ba..33870d94b 100644 --- a/pkg/plugin/run.go +++ b/pkg/plugin/run.go @@ -180,11 +180,18 @@ func (e *AutoscaleEnforcer) handleAgentRequest( var migrateDecision *api.MigrateResponse if mustMigrate { - migrateDecision = &api.MigrateResponse{} - err = e.state.startMigration(context.Background(), logger, pod, e.vmClient) + created, err := e.startMigration(context.Background(), logger, pod) if err != nil { return nil, 500, fmt.Errorf("Error starting migration for pod %v: %w", pod.name, err) } + + // We should only signal to the autoscaler-agent that we've started migrating if we actually + // *created* the migration. We're not *supposed* to receive requests for a VM that's already + // migrating, so receiving one means that *something*'s gone wrong. If that's on us, we + // should try to avoid + if created { + migrateDecision = &api.MigrateResponse{} + } } resp := api.PluginResponse{ diff --git a/pkg/plugin/state.go b/pkg/plugin/state.go index 66c5772da..6dc8cedfd 100644 --- a/pkg/plugin/state.go +++ b/pkg/plugin/state.go @@ -19,7 +19,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" - vmclient "github.com/neondatabase/autoscaling/neonvm/client/clientset/versioned" "github.com/neondatabase/autoscaling/pkg/api" "github.com/neondatabase/autoscaling/pkg/util" @@ -33,6 +32,8 @@ import ( type pluginState struct { lock util.ChanMutex + ongoingMigrationDeletions map[util.NamespacedName]int + podMap map[util.NamespacedName]*podState nodeMap map[string]*nodeState @@ -60,6 +61,12 @@ type nodeState struct { // name is the name of the node, guaranteed by kubernetes to be unique name string + // nodeGroup, if present, gives the node group that this node belongs to. + nodeGroup string + + // availabilityZone, if present, gives the availability zone that this node is in. + availabilityZone string + // vCPU tracks the state of vCPU resources -- what's available and how vCPU nodeResourceState[vmapi.MilliCPU] // memSlots tracks the state of memory slots -- what's available and how @@ -90,7 +97,6 @@ type nodeResourceStateField[T any] struct { func (s *nodeResourceState[T]) fields() []nodeResourceStateField[T] { return []nodeResourceStateField[T]{ {"Total", s.Total}, - {"System", s.System}, {"Watermark", s.Watermark}, {"Reserved", s.Reserved}, {"Buffer", s.Buffer}, @@ -100,15 +106,21 @@ func (s *nodeResourceState[T]) fields() []nodeResourceStateField[T] { } func (s *nodeState) updateMetrics(metrics PromMetrics, memSlotSizeBytes uint64) { - s.vCPU.updateMetrics(metrics.nodeCPUResources, s.name, vmapi.MilliCPU.AsFloat64) - s.memSlots.updateMetrics(metrics.nodeMemResources, s.name, func(memSlots uint16) float64 { + s.vCPU.updateMetrics(metrics.nodeCPUResources, s.name, s.nodeGroup, s.availabilityZone, vmapi.MilliCPU.AsFloat64) + s.memSlots.updateMetrics(metrics.nodeMemResources, s.name, s.nodeGroup, s.availabilityZone, func(memSlots uint16) float64 { return float64(uint64(memSlots) * memSlotSizeBytes) // convert memSlots -> bytes }) } -func (s *nodeResourceState[T]) updateMetrics(metric *prometheus.GaugeVec, nodeName string, convert func(T) float64) { +func (s *nodeResourceState[T]) updateMetrics( + metric *prometheus.GaugeVec, + nodeName string, + nodeGroup string, + availabilityZone string, + convert func(T) float64, +) { for _, f := range s.fields() { - metric.WithLabelValues(nodeName, f.valueName).Set(convert(f.value)) + metric.WithLabelValues(nodeName, nodeGroup, availabilityZone, f.valueName).Set(convert(f.value)) } } @@ -118,7 +130,7 @@ func (s *nodeState) removeMetrics(metrics PromMetrics) { for _, g := range gauges { for _, f := range fields { - g.DeleteLabelValues(s.name, f.valueName) + g.DeleteLabelValues(s.name, s.nodeGroup, s.availabilityZone, f.valueName) } } } @@ -127,23 +139,15 @@ func (s *nodeState) removeMetrics(metrics PromMetrics) { type nodeResourceState[T any] struct { // Total is the Total amount of T available on the node. This value does not change. Total T `json:"total"` - // System is the amount of T pre-reserved for system functions, and cannot be handed out to pods - // on the node. This amount CAN change on config updates, which may result in more of T than - // we'd like being already provided to the pods. - // - // This is equivalent to the value of this resource's resourceConfig.System, rounded up to the - // nearest size of the units of T. - System T `json:"system"` // Watermark is the amount of T reserved to pods above which we attempt to reduce usage via // migration. Watermark T `json:"watermark"` // Reserved is the current amount of T reserved to pods. It SHOULD be less than or equal to - // (Total - System), and we take active measures reduce it once it is above watermark. + // Total), and we take active measures reduce it once it is above Watermark. // // Reserved MAY be greater than Total on scheduler restart (because of buffering with VM scaling // maximums), but (Reserved - Buffer) MUST be less than Total. In general, (Reserved - Buffer) - // SHOULD be less than or equal to (Total - System), but this can be temporarily violated after - // restart or config change. + // SHOULD be less than or equal to Total, but this can be temporarily violated after restart. // // For more information, refer to the ARCHITECTURE.md file in this directory. // @@ -183,7 +187,7 @@ type nodeOtherResourceState struct { ReservedMemSlots uint16 `json:"reservedMemSlots"` // MarginCPU and MarginMemory track the amount of other resources we can get "for free" because - // they were left out when rounding the System usage to fit in integer units of CPUs or memory + // they were left out when rounding the Total usage to fit in integer units of CPUs or memory // slots // // These values are both only changed by configuration changes. @@ -377,27 +381,25 @@ func (r *nodeOtherResourceState) calculateReserved(memSlotSize *resource.Quantit } } -// totalReservableCPU returns the amount of node CPU that may be allocated to VM pods -- i.e., -// excluding the CPU pre-reserved for system tasks. +// totalReservableCPU returns the amount of node CPU that may be allocated to VM pods func (s *nodeState) totalReservableCPU() vmapi.MilliCPU { - return s.vCPU.Total - s.vCPU.System + return s.vCPU.Total } -// totalReservableMemSlots returns the number of memory slots that may be allocated to VM pods -- -// i.e., excluding the memory pre-reserved for system tasks. +// totalReservableMemSlots returns the number of memory slots that may be allocated to VM pods func (s *nodeState) totalReservableMemSlots() uint16 { - return s.memSlots.Total - s.memSlots.System + return s.memSlots.Total } // remainingReservableCPU returns the remaining CPU that can be allocated to VM pods func (s *nodeState) remainingReservableCPU() vmapi.MilliCPU { - return s.totalReservableCPU() - s.vCPU.Reserved + return util.SaturatingSub(s.totalReservableCPU(), s.vCPU.Reserved) } // remainingReservableMemSlots returns the remaining number of memory slots that can be allocated to // VM pods func (s *nodeState) remainingReservableMemSlots() uint16 { - return s.totalReservableMemSlots() - s.memSlots.Reserved + return util.SaturatingSub(s.totalReservableMemSlots(), s.memSlots.Reserved) } // tooMuchPressure is used to signal whether the node should start migrating pods out in order to @@ -615,12 +617,32 @@ func buildInitialNodeState(logger *zap.Logger, node *corev1.Node, conf *Config) return nil, fmt.Errorf("Error calculating memory slot limits for node %s: %w", node.Name, err) } + var nodeGroup string + if conf.K8sNodeGroupLabel != "" { + var ok bool + nodeGroup, ok = node.Labels[conf.K8sNodeGroupLabel] + if !ok { + logger.Warn("Node does not have node group label", zap.String("label", conf.K8sNodeGroupLabel)) + } + } + + var availabilityZone string + if conf.K8sAvailabilityZoneLabel != "" { + var ok bool + availabilityZone, ok = node.Labels[conf.K8sAvailabilityZoneLabel] + if !ok { + logger.Warn("Node does not have availability zone label", zap.String("label", conf.K8sAvailabilityZoneLabel)) + } + } + n := &nodeState{ - name: node.Name, - vCPU: vCPU, - memSlots: memSlots, - pods: make(map[util.NamespacedName]*podState), - otherPods: make(map[util.NamespacedName]*otherPodState), + name: node.Name, + nodeGroup: nodeGroup, + availabilityZone: availabilityZone, + vCPU: vCPU, + memSlots: memSlots, + pods: make(map[util.NamespacedName]*podState), + otherPods: make(map[util.NamespacedName]*otherPodState), otherResources: nodeOtherResourceState{ RawCPU: resource.Quantity{}, RawMemory: resource.Quantity{}, @@ -666,28 +688,14 @@ func extractPodOtherPodResourceState(pod *corev1.Pod) (podOtherResourceState, er var cpu resource.Quantity var mem resource.Quantity - for i, container := range pod.Spec.Containers { - // For each resource, use requests if it's provided, or fallback on the limit. - - cpuRequest := container.Resources.Requests.Cpu() - cpuLimit := container.Resources.Limits.Cpu() - if cpuRequest.IsZero() && cpuLimit.IsZero() { - err := fmt.Errorf("containers[%d] (%q) missing resources.requests.cpu AND resources.limits.cpu", i, container.Name) - return podOtherResourceState{}, err - } else if cpuRequest.IsZero() /* && !cpuLimit.IsZero() */ { - cpuRequest = cpuLimit - } - cpu.Add(*cpuRequest) - - memRequest := container.Resources.Requests.Memory() - memLimit := container.Resources.Limits.Memory() - if memRequest.IsZero() && memLimit.IsZero() { - err := fmt.Errorf("containers[%d] (%q) missing resources.limits.memory", i, container.Name) - return podOtherResourceState{}, err - } else if memRequest.IsZero() /* && !memLimit.IsZero() */ { - memRequest = memLimit - } - mem.Add(*memRequest) + for _, container := range pod.Spec.Containers { + // For each resource, add the requests, if they're provided. We use this because it matches + // what cluster-autoscaler uses. + // + // NB: .Cpu() returns a pointer to a value equal to zero if the resource is not present. So + // we can just add it either way. + cpu.Add(*container.Resources.Requests.Cpu()) + mem.Add(*container.Resources.Requests.Memory()) } return podOtherResourceState{RawCPU: cpu, RawMemory: mem}, nil @@ -733,6 +741,90 @@ func (e *AutoscaleEnforcer) handleNodeDeletion(logger *zap.Logger, nodeName stri logger.Info("Deleted node") } +func (e *AutoscaleEnforcer) handlePodStarted(logger *zap.Logger, pod *corev1.Pod) { + podName := util.GetNamespacedName(pod) + nodeName := pod.Spec.NodeName + + logger = logger.With( + zap.String("action", "Pod started"), + zap.Object("pod", podName), + zap.String("node", nodeName), + ) + + if pod.Spec.SchedulerName == e.state.conf.SchedulerName { + logger.Info("Got non-VM pod start event for pod assigned to this scheduler; nothing to do") + return + } + + logger.Info("Handling non-VM pod start event") + + podResources, err := extractPodOtherPodResourceState(pod) + if err != nil { + logger.Error("Error extracting resource state for non-VM pod", zap.Error(err)) + return + } + + e.state.lock.Lock() + defer e.state.lock.Unlock() + + if _, ok := e.state.otherPods[podName]; ok { + logger.Info("Pod is already known") // will happen during startup + return + } + + // Pod is not known - let's get information about the node! + node, err := e.state.getOrFetchNodeState(context.TODO(), logger, e.metrics, e.nodeStore, nodeName) + if err != nil { + logger.Error("Failed to state for node", zap.Error(err)) + } + + // TODO: this is pretty similar to the Reserve method. Maybe we should join them into one. + oldNodeRes := node.otherResources + newNodeRes := node.otherResources.addPod(&e.state.conf.MemSlotSize, podResources) + + addCPU := newNodeRes.ReservedCPU - oldNodeRes.ReservedCPU + addMem := newNodeRes.ReservedMemSlots - oldNodeRes.ReservedMemSlots + + oldNodeCPUReserved := node.vCPU.Reserved + oldNodeMemReserved := node.memSlots.Reserved + + node.otherResources = newNodeRes + node.vCPU.Reserved += addCPU + node.memSlots.Reserved += addMem + + ps := &otherPodState{ + name: podName, + node: node, + resources: podResources, + } + node.otherPods[podName] = ps + e.state.otherPods[podName] = ps + + cpuVerdict := fmt.Sprintf( + "node reserved %d -> %d / %d, node other resources %d -> %d rounded (%v -> %v raw, %v margin)", + oldNodeCPUReserved, node.vCPU.Reserved, node.vCPU.Total, oldNodeRes.ReservedCPU, newNodeRes.ReservedCPU, &oldNodeRes.RawCPU, &newNodeRes.RawCPU, newNodeRes.MarginCPU, + ) + memVerdict := fmt.Sprintf( + "node reserved %d -> %d / %d, node other resources %d -> %d slots (%v -> %v raw, %v margin)", + oldNodeMemReserved, node.memSlots.Reserved, node.memSlots.Total, oldNodeRes.ReservedMemSlots, newNodeRes.ReservedMemSlots, &oldNodeRes.RawMemory, &newNodeRes.RawMemory, newNodeRes.MarginMemory, + ) + + log := logger.Info + if node.vCPU.Reserved > node.vCPU.Total || node.memSlots.Reserved > node.memSlots.Total { + log = logger.Warn + } + + log( + "Handled new non-VM pod", + zap.Object("verdict", verdictSet{ + cpu: cpuVerdict, + mem: memVerdict, + }), + ) + + node.updateMetrics(e.metrics, e.state.memSlotSizeBytes()) +} + // This method is /basically/ the same as e.Unreserve, but the API is different and it has different // logs, so IMO it's worthwhile to have this separate. func (e *AutoscaleEnforcer) handleVMDeletion(logger *zap.Logger, podName util.NamespacedName) { @@ -986,6 +1078,101 @@ func (e *AutoscaleEnforcer) handleNonAutoscalingUsageChange(logger *zap.Logger, ) } +// NB: expected to be run in its own thread. +func (e *AutoscaleEnforcer) cleanupMigration(logger *zap.Logger, vmm *vmapi.VirtualMachineMigration) { + vmmName := util.GetNamespacedName(vmm) + + logger = logger.With( + // note: use the "virtualmachinemigration" key here for just the name, because it mirrors + // what we log in startMigration. + zap.Object("virtualmachinemigration", vmmName), + // also include the VM, for better association. + zap.Object("virtualmachine", util.NamespacedName{ + Name: vmm.Spec.VmName, + Namespace: vmm.Namespace, + }), + ) + // Failed migrations should be noisy. Everything to do with cleaning up a failed migration + // should be logged at "Warn" or higher. + var logInfo func(string, ...zap.Field) + if vmm.Status.Phase == vmapi.VmmSucceeded { + logInfo = logger.Info + } else { + logInfo = logger.Warn + } + logInfo( + "Going to delete VirtualMachineMigration", + // Explicitly include "phase" here because we have metrics for it. + zap.String("phase", string(vmm.Status.Phase)), + // ... and then log the rest of the information about the migration: + zap.Any("spec", vmm.Spec), + zap.Any("status", vmm.Status), + ) + + // mark the operation as ongoing + func() { + e.state.lock.Lock() + defer e.state.lock.Unlock() + + newCount := e.state.ongoingMigrationDeletions[vmmName] + 1 + if newCount != 1 { + // context included by logger + logger.Error( + "More than one ongoing deletion for VirtualMachineMigration", + zap.Int("count", newCount), + ) + } + e.state.ongoingMigrationDeletions[vmmName] = newCount + }() + // ... and remember to clean up when we're done: + defer func() { + e.state.lock.Lock() + defer e.state.lock.Unlock() + + newCount := e.state.ongoingMigrationDeletions[vmmName] - 1 + if newCount == 0 { + delete(e.state.ongoingMigrationDeletions, vmmName) + } else { + // context included by logger + logger.Error( + "More than one ongoing deletion for VirtualMachineMigration", + zap.Int("count", newCount), + ) + e.state.ongoingMigrationDeletions[vmmName] = newCount + } + }() + + // Continually retry the operation, until we're successful (or the VM doesn't exist anymore) + + retryWait := time.Second * time.Duration(e.state.conf.MigrationDeletionRetrySeconds) + + for { + logInfo("Attempting to delete VirtualMachineMigration") + err := e.vmClient.NeonvmV1(). + VirtualMachineMigrations(vmmName.Namespace). + Delete(context.TODO(), vmmName.Name, metav1.DeleteOptions{}) + if err == nil /* NB! This condition is inverted! */ { + logInfo("Successfully deleted VirtualMachineMigration") + e.metrics.migrationDeletions.WithLabelValues(string(vmm.Status.Phase)).Inc() + return + } else if apierrors.IsNotFound(err) { + logger.Warn("Deletion was handled for us; VirtualMachineMigration no longer exists") + return + } + + logger.Error( + "Failed to delete VirtualMachineMigration, will try again after delay", + zap.Duration("delay", retryWait), + zap.Error(err), + ) + e.metrics.migrationDeleteFails.WithLabelValues(string(vmm.Status.Phase)).Inc() + + // retry after a delay + time.Sleep(retryWait) + continue + } +} + func (s *podState) isBetterMigrationTarget(other *podState) bool { // TODO: this deprioritizes VMs whose metrics we can't collect. Maybe we don't want that? if s.metrics == nil || other.metrics == nil { @@ -1000,14 +1187,14 @@ func (s *podState) isBetterMigrationTarget(other *podState) bool { // send requests to the API server // // A lock will ALWAYS be held on return from this function. -func (s *pluginState) startMigration(ctx context.Context, logger *zap.Logger, pod *podState, vmClient *vmclient.Clientset) error { +func (e *AutoscaleEnforcer) startMigration(ctx context.Context, logger *zap.Logger, pod *podState) (created bool, _ error) { if pod.currentlyMigrating() { - return fmt.Errorf("Pod is already migrating") + return false, fmt.Errorf("Pod is already migrating") } // Unlock to make the API request(s), then make sure we're locked on return. - s.lock.Unlock() - defer s.lock.Lock() + e.state.lock.Unlock() + defer e.state.lock.Lock() vmmName := util.NamespacedName{ Name: fmt.Sprintf("schedplugin-%s", pod.vmName.Name), @@ -1024,17 +1211,23 @@ func (s *pluginState) startMigration(ctx context.Context, logger *zap.Logger, po // We technically don't *need* this additional request here (because we can check the return // from the Create request with apierrors.IsAlreadyExists). However: the benefit we get from // this is that the logs are significantly clearer. - _, err := vmClient.NeonvmV1(). + _, err := e.vmClient.NeonvmV1(). VirtualMachineMigrations(pod.name.Namespace). Get(ctx, vmmName.Name, metav1.GetOptions{}) if err == nil { logger.Warn("VirtualMachineMigration already exists, nothing to do") - return nil + return false, nil } else if !apierrors.IsNotFound(err) { // We're *expecting* to get IsNotFound = true; if err != nil and isn't NotFound, then // there's some unexpected error. logger.Error("Unexpected error doing Get request to check if migration already exists", zap.Error(err)) - return fmt.Errorf("Error checking if migration exists: %w", err) + return false, fmt.Errorf("Error checking if migration exists: %w", err) + } + + gitVersion := util.GetBuildInfo().GitInfo + // FIXME: make this not depend on GetBuildInfo() internals. + if gitVersion == "" { + gitVersion = "unknown" } vmm := &vmapi.VirtualMachineMigration{ @@ -1043,6 +1236,14 @@ func (s *pluginState) startMigration(ctx context.Context, logger *zap.Logger, po // should do if that happens. Name: vmmName.Name, Namespace: pod.name.Namespace, + Labels: map[string]string{ + // NB: There's requirements on what constitutes a valid label. Thankfully, the + // output of `git describe` always will. + // + // See also: + // https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set + LabelPluginCreatedMigration: gitVersion, + }, }, Spec: vmapi.VirtualMachineMigrationSpec{ VmName: pod.vmName.Name, @@ -1059,15 +1260,17 @@ func (s *pluginState) startMigration(ctx context.Context, logger *zap.Logger, po } logger.Info("Migration doesn't already exist, creating one for VM", zap.Any("spec", vmm.Spec)) - _, err = vmClient.NeonvmV1().VirtualMachineMigrations(pod.name.Namespace).Create(ctx, vmm, metav1.CreateOptions{}) + _, err = e.vmClient.NeonvmV1().VirtualMachineMigrations(pod.name.Namespace).Create(ctx, vmm, metav1.CreateOptions{}) if err != nil { + e.metrics.migrationCreateFails.Inc() // log here, while the logger's fields are in scope logger.Error("Unexpected error doing Create request for new migration", zap.Error(err)) - return fmt.Errorf("Error creating migration: %w", err) + return false, fmt.Errorf("Error creating migration: %w", err) } + e.metrics.migrationCreations.Inc() logger.Info("VM migration request successful") - return nil + return true, nil } // readClusterState sets the initial node and pod maps for the plugin's state, getting its @@ -1154,8 +1357,8 @@ func (p *AutoscaleEnforcer) readClusterState(ctx context.Context, logger *zap.Lo skippedVms += 1 } - if pod.Spec.SchedulerName != p.state.conf.SchedulerName { - logSkip("Spec.SchedulerName %q != our config.SchedulerName %q", pod.Spec.SchedulerName, p.state.conf.SchedulerName) + if p.state.conf.ignoredNamespace(pod.Namespace) { + logSkip("VM is in ignored namespace") continue } else if pod.Spec.NodeName == "" { logSkip("VM pod's Spec.NodeName = \"\" (maybe it hasn't been scheduled yet?)") @@ -1236,7 +1439,9 @@ func (p *AutoscaleEnforcer) readClusterState(ctx context.Context, logger *zap.Lo oldNodeMemBuffer := ns.memSlots.Buffer ns.vCPU.Reserved += ps.vCPU.Reserved + ns.vCPU.Buffer += ps.vCPU.Buffer ns.memSlots.Reserved += ps.memSlots.Reserved + ns.memSlots.Buffer += ps.memSlots.Buffer cpuVerdict := fmt.Sprintf( "pod = %v/%v (node %v -> %v / %v, %v -> %v buffer)", @@ -1255,6 +1460,8 @@ func (p *AutoscaleEnforcer) readClusterState(ctx context.Context, logger *zap.Lo }), ) + ns.updateMetrics(p.metrics, p.state.memSlotSizeBytes()) + ns.pods[podName] = ps p.state.podMap[podName] = ps } @@ -1288,8 +1495,8 @@ func (p *AutoscaleEnforcer) readClusterState(ctx context.Context, logger *zap.Lo if _, ok := p.state.podMap[podName]; ok { continue - } else if pod.Spec.SchedulerName != p.state.conf.SchedulerName { - logSkip("Spec.SchedulerName %q != our config.SchedulerName %q", pod.Spec.SchedulerName, p.state.conf.SchedulerName) + } else if p.state.conf.ignoredNamespace(pod.Namespace) { + logSkip("non-VM pod is in ignored namespace") continue } @@ -1347,6 +1554,8 @@ func (p *AutoscaleEnforcer) readClusterState(ctx context.Context, logger *zap.Lo }), ) + ns.updateMetrics(p.metrics, p.state.memSlotSizeBytes()) + ns.otherPods[podName] = ps p.state.otherPods[podName] = ps } diff --git a/pkg/plugin/trans.go b/pkg/plugin/trans.go index 9734f40ba..6cc6482a2 100644 --- a/pkg/plugin/trans.go +++ b/pkg/plugin/trans.go @@ -85,7 +85,7 @@ func (s verdictSet) MarshalLogObject(enc zapcore.ObjectEncoder) error { // // A pretty-formatted summary of the outcome is returned as the verdict, for logging. func (r resourceTransition[T]) handleRequested(requested T, startingMigration bool, onlyThousands bool) (verdict string) { - totalReservable := r.node.Total - r.node.System + totalReservable := r.node.Total // note: it's possible to temporarily have reserved > totalReservable, after loading state or // config change; we have to use SaturatingSub here to account for that. remainingReservable := util.SaturatingSub(totalReservable, r.oldNode.reserved) @@ -184,15 +184,20 @@ func (r resourceTransition[T]) handleRequested(requested T, startingMigration bo } fmtString := "Register %d%s -> %d%s (pressure %d -> %d); " + - "node reserved %d -> %d (of %d), " + + "node reserved %d%s -> %d%s (of %d), " + "node capacityPressure %d -> %d (%d -> %d spoken for)" - var buffer string + var podBuffer string + var oldNodeBuffer string + var newNodeBuffer string if r.pod.Buffer != 0 { - buffer = fmt.Sprintf(" (buffer %d)", r.pod.Buffer) + podBuffer = fmt.Sprintf(" [buffer %d]", r.pod.Buffer) + oldNodeBuffer = fmt.Sprintf(" [buffer %d]", r.oldNode.buffer) r.node.Buffer -= r.pod.Buffer r.pod.Buffer = 0 + + newNodeBuffer = fmt.Sprintf(" [buffer %d]", r.node.Buffer) } var wanted string @@ -203,9 +208,9 @@ func (r resourceTransition[T]) handleRequested(requested T, startingMigration bo verdict = fmt.Sprintf( fmtString, // Register %d%s -> %d%s (pressure %d -> %d) - r.oldPod.reserved, buffer, r.pod.Reserved, wanted, r.oldPod.capacityPressure, r.pod.CapacityPressure, - // node reserved %d -> %d (of %d) - r.oldNode.reserved, r.node.Reserved, totalReservable, + r.oldPod.reserved, podBuffer, r.pod.Reserved, wanted, r.oldPod.capacityPressure, r.pod.CapacityPressure, + // node reserved %d%s -> %d%s (of %d) + r.oldNode.reserved, oldNodeBuffer, r.node.Reserved, newNodeBuffer, totalReservable, // node capacityPressure %d -> %d (%d -> %d spoken for) r.oldNode.capacityPressure, r.node.CapacityPressure, r.oldNode.pressureAccountedFor, r.node.PressureAccountedFor, ) @@ -223,12 +228,23 @@ func (r resourceTransition[T]) handleDeleted(currentlyMigrating bool) (verdict s r.node.PressureAccountedFor -= r.pod.Reserved + r.pod.CapacityPressure } - fmtString := "pod had %d; node reserved %d -> %d, " + + var podBuffer string + var oldNodeBuffer string + var newNodeBuffer string + if r.pod.Buffer != 0 { + r.node.Buffer -= r.pod.Buffer + + podBuffer = fmt.Sprintf(" [buffer %d]", r.pod.Buffer) + oldNodeBuffer = fmt.Sprintf(" [buffer %d]", r.oldNode.buffer) + newNodeBuffer = fmt.Sprintf(" [buffer %d]", r.node.Buffer) + } + + fmtString := "pod had %d%s; node reserved %d%s -> %d%s, " + "node capacityPressure %d -> %d (%d -> %d spoken for)" verdict = fmt.Sprintf( fmtString, - // pod had %d; node reserved %d -> %d - r.pod.Reserved, r.oldNode.reserved, r.node.Reserved, + // pod had %d%s; node reserved %d%s -> %d%s + r.pod.Reserved, podBuffer, r.oldNode.reserved, oldNodeBuffer, r.node.Reserved, newNodeBuffer, // node capacityPressure %d -> %d (%d -> %d spoken for) r.oldNode.capacityPressure, r.node.CapacityPressure, r.oldNode.pressureAccountedFor, r.node.PressureAccountedFor, ) @@ -261,14 +277,19 @@ func (r resourceTransition[T]) handleAutoscalingDisabled() (verdict string) { r.node.CapacityPressure -= r.pod.CapacityPressure r.pod.CapacityPressure = 0 + var nodeBufferChange string + if r.oldPod.buffer != 0 { + nodeBufferChange = fmt.Sprintf(" [buffer %d -> %d]", r.oldNode.buffer, r.node.Buffer) + } + fmtString := "pod had buffer %d, capacityPressure %d; " + - "node reserved %d -> %d, capacityPressure %d -> %d" + "node reserved %d -> %d%s, capacityPressure %d -> %d" verdict = fmt.Sprintf( fmtString, // pod had buffer %d, capacityPressure %d; r.oldPod.buffer, r.oldPod.capacityPressure, - // node reserved %d -> %d, capacityPressure %d -> %d - r.oldNode.reserved, r.node.Reserved, r.oldNode.capacityPressure, r.node.CapacityPressure, + // node reserved %d -> %d%s, capacityPressure %d -> %d + r.oldNode.reserved, r.node.Reserved, nodeBufferChange, r.oldNode.capacityPressure, r.node.CapacityPressure, ) return verdict } diff --git a/pkg/plugin/watch.go b/pkg/plugin/watch.go index de1b649ad..e28055d29 100644 --- a/pkg/plugin/watch.go +++ b/pkg/plugin/watch.go @@ -63,6 +63,7 @@ func (e *AutoscaleEnforcer) watchNodeEvents( } type podWatchCallbacks struct { + submitPodStarted func(*zap.Logger, *corev1.Pod) submitVMDeletion func(*zap.Logger, util.NamespacedName) submitPodDeletion func(*zap.Logger, util.NamespacedName) submitPodStartMigration func(_ *zap.Logger, podName, migrationName util.NamespacedName, source bool) @@ -81,10 +82,10 @@ func (e *AutoscaleEnforcer) watchPodEvents( parentLogger *zap.Logger, metrics watch.Metrics, callbacks podWatchCallbacks, -) error { +) (*watch.Store[corev1.Pod], error) { logger := parentLogger.Named("pod-watch") - _, err := watch.Watch( + return watch.Watch( ctx, logger.Named("watch"), e.handle.ClientSet().CoreV1().Pods(corev1.NamespaceAll), @@ -106,14 +107,49 @@ func (e *AutoscaleEnforcer) watchPodEvents( watch.InitModeSync, // note: doesn't matter, because AddFunc = nil. metav1.ListOptions{}, watch.HandlerFuncs[*corev1.Pod]{ + AddFunc: func(pod *corev1.Pod, preexisting bool) { + name := util.GetNamespacedName(pod) + + if e.state.conf.ignoredNamespace(pod.Namespace) { + logger.Info("Received add event for ignored pod", zap.Object("pod", name)) + return + } + + _, isVM := pod.Labels[LabelVM] + + // Generate events for all non-VM pods that are running + if !isVM && pod.Status.Phase == corev1.PodRunning { + if !preexisting { + // Generally pods shouldn't be immediately running, so we log this as a + // warning. If it was preexisting, then it'll be handled on the initial + // cluster read already (but we generate the events anyways so that we + // definitely don't miss anything). + logger.Warn("Received add event for new non-VM pod already running", zap.Object("pod", name)) + } + callbacks.submitPodStarted(logger, pod) + } + }, UpdateFunc: func(oldPod *corev1.Pod, newPod *corev1.Pod) { name := util.GetNamespacedName(newPod) + if e.state.conf.ignoredNamespace(newPod.Namespace) { + logger.Info("Received update event for ignored pod", zap.Object("pod", name)) + return + } + + _, isVM := newPod.Labels[LabelVM] + + // Check if a non-VM pod is now running. + if !isVM && oldPod.Status.Phase == corev1.PodPending && newPod.Status.Phase == corev1.PodRunning { + logger.Info("Received update event for non-VM pod now running", zap.Object("pod", name)) + callbacks.submitPodStarted(logger, newPod) + } + // Check if pod is "completed" - handle that the same as deletion. if !util.PodCompleted(oldPod) && util.PodCompleted(newPod) { logger.Info("Received update event for completion of pod", zap.Object("pod", name)) - if _, ok := newPod.Labels[LabelVM]; ok { + if isVM { callbacks.submitVMDeletion(logger, name) } else { callbacks.submitPodDeletion(logger, name) @@ -136,6 +172,11 @@ func (e *AutoscaleEnforcer) watchPodEvents( DeleteFunc: func(pod *corev1.Pod, mayBeStale bool) { name := util.GetNamespacedName(pod) + if e.state.conf.ignoredNamespace(pod.Namespace) { + logger.Info("Received delete event for ignored pod", zap.Object("pod", name)) + return + } + if util.PodCompleted(pod) { logger.Info("Received delete event for completed pod", zap.Object("pod", name)) } else { @@ -149,7 +190,6 @@ func (e *AutoscaleEnforcer) watchPodEvents( }, }, ) - return err } // tryMigrationOwnerReference returns the name of the owning migration, if this pod *is* owned by a @@ -228,6 +268,7 @@ func (e *AutoscaleEnforcer) watchVMEvents( parentLogger *zap.Logger, metrics watch.Metrics, callbacks vmWatchCallbacks, + podIndex watch.IndexedStore[corev1.Pod, *watch.NameIndex[corev1.Pod]], ) (*watch.Store[vmapi.VirtualMachine], error) { logger := parentLogger.Named("vm-watch") @@ -252,14 +293,39 @@ func (e *AutoscaleEnforcer) watchVMEvents( metav1.ListOptions{}, watch.HandlerFuncs[*vmapi.VirtualMachine]{ UpdateFunc: func(oldVM, newVM *vmapi.VirtualMachine) { - oldInfo, err := api.ExtractVmInfo(logger, oldVM) - if err != nil { - logger.Error("Failed to extract VM info in update for old VM", util.VMNameFields(oldVM), zap.Error(err)) + if e.state.conf.ignoredNamespace(newVM.Namespace) { + logger.Info("Received update event for ignored VM", util.VMNameFields(newVM)) return } + newInfo, err := api.ExtractVmInfo(logger, newVM) if err != nil { + // Try to get the runner pod associated with the VM, if we can, but don't worry + // about it if we can't. + var runnerPod *corev1.Pod + if podName := newVM.Status.PodName; podName != "" { + // NB: index.Get returns nil if not found, so we only have a non-nil + // runnerPod if it's currently known. + runnerPod, _ = podIndex.GetIndexed(func(index *watch.NameIndex[corev1.Pod]) (*corev1.Pod, bool) { + return index.Get(newVM.Namespace, podName) + }) + } + logger.Error("Failed to extract VM info in update for new VM", util.VMNameFields(newVM), zap.Error(err)) + e.handle.EventRecorder().Eventf( + newVM, // regarding + runnerPod, // related + "Warning", // eventtype + "ExtractVmInfo", // reason + "HandleVmUpdate", // action + "Failed to extract autoscaling info about VM: %s", // note + err, + ) + return + } + oldInfo, err := api.ExtractVmInfo(logger, oldVM) + if err != nil { + logger.Error("Failed to extract VM info in update for old VM", util.VMNameFields(oldVM), zap.Error(err)) return } @@ -298,3 +364,71 @@ func (e *AutoscaleEnforcer) watchVMEvents( }, ) } + +type migrationWatchCallbacks struct { + submitMigrationFinished func(*vmapi.VirtualMachineMigration) +} + +// watchMigrationEvents *only* looks at migrations that were created by the scheduler plugin (or a +// previous version of it). +// +// We use this to trigger cleaning up migrations once they're finished, because they don't +// auto-delete, and our deterministic naming means that each we won't be able to create a new +// migration for the same VM until the old one's gone. +// +// Tracking whether a migration was created by the scheduler plugin is done by adding the label +// 'autoscaling.neon.tech/created-by-scheduler' to every migration we create. +func (e *AutoscaleEnforcer) watchMigrationEvents( + ctx context.Context, + parentLogger *zap.Logger, + metrics watch.Metrics, + callbacks migrationWatchCallbacks, +) (*watch.Store[vmapi.VirtualMachineMigration], error) { + logger := parentLogger.Named("vmm-watch") + + return watch.Watch( + ctx, + logger.Named("watch"), + e.vmClient.NeonvmV1().VirtualMachineMigrations(corev1.NamespaceAll), + watch.Config{ + ObjectNameLogField: "virtualmachinemigration", + Metrics: watch.MetricsConfig{ + Metrics: metrics, + Instance: "VirtualMachineMigrations", + }, + // FIXME: make these durations configurable. + RetryRelistAfter: util.NewTimeRange(time.Second, 3, 5), + RetryWatchAfter: util.NewTimeRange(time.Second, 3, 5), + }, + watch.Accessors[*vmapi.VirtualMachineMigrationList, vmapi.VirtualMachineMigration]{ + Items: func(list *vmapi.VirtualMachineMigrationList) []vmapi.VirtualMachineMigration { return list.Items }, + }, + watch.InitModeSync, + metav1.ListOptions{ + // NB: Including just the label itself means that we select for objects that *have* the + // label, without caring about the actual value. + // + // See also: + // https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#set-based-requirement + LabelSelector: LabelPluginCreatedMigration, + }, + watch.HandlerFuncs[*vmapi.VirtualMachineMigration]{ + UpdateFunc: func(oldObj, newObj *vmapi.VirtualMachineMigration) { + if e.state.conf.ignoredNamespace(newObj.Namespace) { + logger.Info( + "Received update event for ignored VM Migration", + zap.Object("virtualmachinemigration", util.GetNamespacedName(newObj)), + ) + return + } + + shouldDelete := newObj.Status.Phase != oldObj.Status.Phase && + (newObj.Status.Phase == vmapi.VmmSucceeded || newObj.Status.Phase == vmapi.VmmFailed) + + if shouldDelete { + callbacks.submitMigrationFinished(newObj) + } + }, + }, + ) +} diff --git a/pkg/util/buildinfo.go b/pkg/util/buildinfo.go index c69e9b02d..207813e08 100644 --- a/pkg/util/buildinfo.go +++ b/pkg/util/buildinfo.go @@ -33,6 +33,8 @@ func GetBuildInfo() BuildInfo { } } + // FIXME: the "" string is depended upon by the plugin's VirtualMachineMigration + // creation process. We should expose something better here. gitInfo := BuildGitInfo if BuildGitInfo == "" { gitInfo = "" diff --git a/pkg/util/signal.go b/pkg/util/signal.go index f83707d22..45b9b691f 100644 --- a/pkg/util/signal.go +++ b/pkg/util/signal.go @@ -7,32 +7,39 @@ import ( "sync" ) -func NewSingleSignalPair() (SignalSender, SignalReceiver) { - sigCh := make(chan struct{}) +func NewSingleSignalPair[T any]() (SignalSender[T], SignalReceiver[T]) { + sigCh := make(chan T, 1) once := &sync.Once{} closeSigCh := func() { once.Do(func() { close(sigCh) }) } - return SignalSender{send: closeSigCh}, SignalReceiver{sigCh: sigCh, closeSigCh: closeSigCh} + return SignalSender[T]{ + send: func(data T) { + once.Do(func() { + sigCh <- data + close(sigCh) + }) + }, + }, SignalReceiver[T]{sigCh: sigCh, closeSigCh: closeSigCh} } -type SignalSender struct { - send func() +type SignalSender[T any] struct { + send func(T) } -type SignalReceiver struct { - sigCh chan struct{} +type SignalReceiver[T any] struct { + sigCh chan T closeSigCh func() } -func (s SignalSender) Send() { - s.send() +func (s SignalSender[T]) Send(data T) { + s.send(data) } -func (s SignalReceiver) Recv() chan struct{} { +func (s SignalReceiver[T]) Recv() <-chan T { return s.sigCh } -func (s SignalReceiver) Close() { +func (s SignalReceiver[T]) Close() { s.closeSigCh() } diff --git a/pkg/util/watch/watch.go b/pkg/util/watch/watch.go index 859c38b96..d0c9ee080 100644 --- a/pkg/util/watch/watch.go +++ b/pkg/util/watch/watch.go @@ -146,7 +146,7 @@ func Watch[C Client[L], L metav1.ListMetaAccessor, T any, P Object[T]]( // the initial list opts.ResourceVersion = initialList.GetListMeta().GetResourceVersion() - sendStop, stopSignal := util.NewSingleSignalPair() + sendStop, stopSignal := util.NewSingleSignalPair[struct{}]() store := Store[T]{ mutex: sync.Mutex{}, @@ -356,6 +356,7 @@ func Watch[C Client[L], L metav1.ListMetaAccessor, T any, P Object[T]]( retryAfter := config.RetryRelistAfter.Random() logger.Info("Retrying relist after delay", zap.Duration("delay", retryAfter)) + store.failing.Store(true) config.Metrics.failing() select { @@ -371,6 +372,7 @@ func Watch[C Client[L], L metav1.ListMetaAccessor, T any, P Object[T]]( } } + store.failing.Store(false) config.Metrics.unfailing() // err == nil, process relistList @@ -452,6 +454,7 @@ func Watch[C Client[L], L metav1.ListMetaAccessor, T any, P Object[T]]( retryAfter := config.RetryWatchAfter.Random() logger.Info("Retrying re-watch after delay", zap.Duration("delay", retryAfter)) + store.failing.Store(true) config.Metrics.failing() select { @@ -468,6 +471,7 @@ func Watch[C Client[L], L metav1.ListMetaAccessor, T any, P Object[T]]( } // err == nil + store.failing.Store(false) config.Metrics.unfailing() break newWatcher } @@ -554,8 +558,9 @@ type Store[T any] struct { nextIndexID uint64 indexes map[uint64]Index[T] - stopSignal util.SignalSender + stopSignal util.SignalSender[struct{}] stopped atomic.Bool + failing atomic.Bool } // Relist triggers re-listing the WatchStore, returning a channel that will be closed once the @@ -573,10 +578,14 @@ func (w *Store[T]) Relist() <-chan struct{} { } func (w *Store[T]) Stop() { - w.stopSignal.Send() + w.stopSignal.Send(struct{}{}) w.stopped.Store(true) } +func (w *Store[T]) Failing() bool { + return w.failing.Load() +} + func (w *Store[T]) Stopped() bool { return w.stopped.Load() } diff --git a/tests/e2e/autoscaling/00-create-vm.yaml b/tests/e2e/autoscaling/00-create-vm.yaml index b9152e95a..006406785 100644 --- a/tests/e2e/autoscaling/00-create-vm.yaml +++ b/tests/e2e/autoscaling/00-create-vm.yaml @@ -51,7 +51,7 @@ spec: port: 5432 - name: host-metrics port: 9100 - - name: informant + - name: monitor port: 10301 extraNetwork: enable: true diff --git a/vm-deploy.yaml b/vm-deploy.yaml index d7a69403c..93fa211cd 100644 --- a/vm-deploy.yaml +++ b/vm-deploy.yaml @@ -23,4 +23,4 @@ spec: - port: 22 # ssh - port: 5432 # postgres - port: 9100 # metrics - - port: 10301 # informant + - port: 10301 # monitor diff --git a/vm-examples/pg14-disk-test/Dockerfile.vmdata b/vm-examples/pg14-disk-test/Dockerfile.vmdata index ff4d04b79..f15bd1ada 100644 --- a/vm-examples/pg14-disk-test/Dockerfile.vmdata +++ b/vm-examples/pg14-disk-test/Dockerfile.vmdata @@ -1,5 +1,5 @@ -FROM vm-informant:dev as informant -# ^ don't do anything with this; we just want it around for later use. +FROM vm-monitor:dev as monitor +# ^ don't do anything with these; we just want it around for later use. # Build the allocation tester: FROM alpine:3.16 AS allocate-loop-builder @@ -33,12 +33,10 @@ RUN echo '::sysinit:/usr/sbin/cgconfigparser -l /etc/cgconfig.conf -s 1664' >> / # Add the allocate-loop tester COPY --from=allocate-loop-builder /bin/allocate-loop /bin/allocate-loop -# Add the vm-informant -COPY --from=informant /usr/bin/vm-informant /bin/vm-informant -RUN adduser vm-informant --disabled-password --no-create-home -# note: Use 'respawn' and '--auto-restart' so that the logs are noisy if the arguments are bad, -# but we still have proper handling around cgroups, etc. -RUN echo "::respawn:su vm-informant -c '/bin/vm-informant --auto-restart --cgroup=neon-test'" >> /etc/inittab +# Add the vm-monitor +COPY --from=monitor /usr/bin/vm-monitor /bin/vm-monitor +RUN adduser vm-monitor --disabled-password --no-create-home +RUN echo "::respawn:su vm-monitor -c 'RUST_LOG=info /bin/vm-monitor --cgroup=neon-test --addr=\"0.0.0.0:10301\"'" >> /etc/inittab # Install vector.dev binary RUN set -e \ diff --git a/vm-examples/pg14-disk-test/cgconfig.conf b/vm-examples/pg14-disk-test/cgconfig.conf index 290630bca..24514a3ac 100644 --- a/vm-examples/pg14-disk-test/cgconfig.conf +++ b/vm-examples/pg14-disk-test/cgconfig.conf @@ -4,7 +4,7 @@ group neon-test { perm { admin { - uid = vm-informant; + uid = vm-monitor; } task { gid = users; diff --git a/vm-examples/postgres-minimal/Dockerfile b/vm-examples/postgres-minimal/Dockerfile index 84d406471..0ce897bfa 100644 --- a/vm-examples/postgres-minimal/Dockerfile +++ b/vm-examples/postgres-minimal/Dockerfile @@ -1,5 +1,5 @@ -FROM vm-informant:dev as informant -# ^ don't do anything with this; we just want it around for later use. +FROM vm-monitor:dev as monitor +# ^ don't do anything with these; we just want it around for later use. FROM postgres:15-bullseye @@ -18,9 +18,7 @@ RUN set -e \ ADD cgconfig.conf /etc/cgconfig.conf RUN echo '::sysinit:/usr/sbin/cgconfigparser -l /etc/cgconfig.conf -s 1664' >> /etc/inittab -# Add the vm-informant -COPY --from=informant /usr/bin/vm-informant /bin/vm-informant -RUN adduser vm-informant --disabled-password --no-create-home -# note: Use 'respawn' and '--auto-restart' so that the logs are noisy if the arguments are bad, -# but we still have proper handling around cgroups, etc. -RUN echo "::respawn:su vm-informant -c '/bin/vm-informant --auto-restart --cgroup=neon-test'" >> /etc/inittab +# Add the vm-monitor +COPY --from=monitor /usr/bin/vm-monitor /bin/vm-monitor +RUN adduser vm-monitor --disabled-password --no-create-home +RUN echo "::respawn:su vm-monitor -c 'RUST_LOG=info /bin/vm-monitor --cgroup=neon-test'" >> /etc/inittab diff --git a/vm-examples/postgres-minimal/cgconfig.conf b/vm-examples/postgres-minimal/cgconfig.conf index 290630bca..24514a3ac 100644 --- a/vm-examples/postgres-minimal/cgconfig.conf +++ b/vm-examples/postgres-minimal/cgconfig.conf @@ -4,7 +4,7 @@ group neon-test { perm { admin { - uid = vm-informant; + uid = vm-monitor; } task { gid = users;