From 0faed631e467b4a25744a091892940513045aec4 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Mon, 3 Jul 2023 07:40:48 -0700 Subject: [PATCH 01/59] agent: Rewrite scaling logic into pkg/agent/{core,executor} At a very high level, this work replaces (*Runner).handleVMResources(), moving from an imperative style to an explicit state machine. This new version is more complicated, but ultimately more flexible and easier to extend. The decision-making "core" of the scaling logic is implemented by (*core.State).NextActions(), which returns an ActionSet indicating what the "caller" should do. NextActions() is a pure function, making this easier to test - at least, in theory. That method is called and cached by executor.ExecutorCore, where there's a few different threads (each defined in exec_*.go) responsible for implementing the communications with the other components - namely, the scheduler plugin, vm-informant, and NeonVM k8s API. The various "executor" threads are written generically, using dedicated interfaces (e.g. PluginInterface / PluginHandle) that are implemented in pkg/agent/execbridge.go. --- .golangci.yml | 8 + deploy/agent/config_map.yaml | 3 + pkg/agent/config.go | 48 +- pkg/agent/core/action.go | 40 + pkg/agent/core/dumpstate.go | 142 ++++ pkg/agent/core/state.go | 710 +++++++++++++++++ pkg/agent/execbridge.go | 200 +++++ pkg/agent/executor/core.go | 155 ++++ pkg/agent/executor/exec_informant.go | 266 +++++++ pkg/agent/executor/exec_neonvm.go | 94 +++ pkg/agent/executor/exec_plugin.go | 138 ++++ pkg/agent/executor/exec_sleeper.go | 68 ++ pkg/agent/globalstate.go | 17 +- pkg/agent/informant.go | 164 ++-- pkg/agent/runner.go | 1052 +++----------------------- pkg/plugin/config.go | 2 + pkg/util/broadcast.go | 72 ++ pkg/util/watch/watch.go | 2 + 18 files changed, 2136 insertions(+), 1045 deletions(-) create mode 100644 pkg/agent/core/action.go create mode 100644 pkg/agent/core/dumpstate.go create mode 100644 pkg/agent/core/state.go create mode 100644 pkg/agent/execbridge.go create mode 100644 pkg/agent/executor/core.go create mode 100644 pkg/agent/executor/exec_informant.go create mode 100644 pkg/agent/executor/exec_neonvm.go create mode 100644 pkg/agent/executor/exec_plugin.go create mode 100644 pkg/agent/executor/exec_sleeper.go create mode 100644 pkg/util/broadcast.go diff --git a/.golangci.yml b/.golangci.yml index b6b093edc..40fa640ae 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -6,6 +6,11 @@ run: skip-dirs: - neonvm +issues: + exclude: + # ChanMutex contains only a channel, which *is* safe to copy + - 'copylocks: return copies lock value: github\.com/neondatabase/autoscaling/pkg/util\.ChanMutex' + output: format: colored-line-number print-issued-lines: true @@ -56,8 +61,11 @@ linters-settings: - '^github.com/prometheus/client_golang/prometheus(/.*)?\.\w+Opts$' - '^github\.com/containerd/cgroups/v3/cgroup2\.(Resources|Memory)' - '^github\.com/tychoish/fun/pubsub\.BrokerOptions$' + - '^github\.com/neondatabase/autoscaling/pkg/util\.JSONPatch$' + - '^github\.com/neondatabase/autoscaling/pkg/util/watch\.HandlerFuncs$' # vmapi.{VirtualMachine,VirtualMachineSpec,VirtualMachineMigration,VirtualMachineMigrationSpec} - '^github\.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1\.VirtualMachine(Migration)?(Spec)?$' + - '^github\.com/neondatabase/autoscaling/pkg/agent/core\.Action$' # see: gci: diff --git a/deploy/agent/config_map.yaml b/deploy/agent/config_map.yaml index 125f0d0aa..280f8d2b5 100644 --- a/deploy/agent/config_map.yaml +++ b/deploy/agent/config_map.yaml @@ -16,6 +16,8 @@ data: "serverPort": 10301, "retryServerMinWaitSeconds": 5, "retryServerNormalWaitSeconds": 5, + "retryDeniedDownscaleSeconds": 5, + "retryFailedRequestSeconds": 3, "registerRetrySeconds": 5, "requestTimeoutSeconds": 1, "registerTimeoutSeconds": 2, @@ -31,6 +33,7 @@ data: "scheduler": { "schedulerName": "autoscale-scheduler", "requestTimeoutSeconds": 2, + "requestAtLeastEverySeconds": 5, "requestPort": 10299 }, "dumpState": { diff --git a/pkg/agent/config.go b/pkg/agent/config.go index 7ac480782..8ab7d0329 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -12,12 +12,12 @@ import ( ) type Config struct { + DumpState *DumpStateConfig `json:"dumpState"` Scaling ScalingConfig `json:"scaling"` Informant InformantConfig `json:"informant"` Metrics MetricsConfig `json:"metrics"` Scheduler SchedulerConfig `json:"scheduler"` Billing *billing.Config `json:"billing,omitempty"` - DumpState *DumpStateConfig `json:"dumpState"` } // DumpStateConfig configures the endpoint to dump all internal state @@ -54,6 +54,13 @@ type InformantConfig struct { // register request. RegisterRetrySeconds uint `json:"registerRetrySeconds"` + // RetryFailedRequestSeconds gives the duration, in seconds, that we must wait before retrying a + // request that previously failed. + RetryFailedRequestSeconds uint `json:"retryFailedRequestSeconds"` + // RetryDeniedDownscaleSeconds gives the duration, in seconds, that we must wait before retrying + // a downscale request that was previously denied + RetryDeniedDownscaleSeconds uint `json:"retryDeniedDownscaleSeconds"` + // RequestTimeoutSeconds gives the timeout for any individual request to the informant, except // for those with separately-defined values below. RequestTimeoutSeconds uint `json:"requestTimeoutSeconds"` @@ -98,6 +105,9 @@ type SchedulerConfig struct { // // If zero, requests will have no timeout. RequestTimeoutSeconds uint `json:"requestTimeoutSeconds"` + // RequestAtLeastEverySeconds gives the maximum duration we should go without attempting a + // request to the scheduler, even if nothing's changed. + RequestAtLeastEverySeconds uint `json:"requestAtLeastEverySeconds"` // RequestPort defines the port to access the scheduler's ✨special✨ API with RequestPort uint16 `json:"requestPort"` } @@ -131,31 +141,35 @@ func (c *Config) validate() error { zeroTmpl = "field %q cannot be zero" ) - erc.Whenf(ec, c.Billing != nil && c.Billing.ActiveTimeMetricName == "", emptyTmpl, ".billing.activeTimeMetricName") - erc.Whenf(ec, c.Billing != nil && c.Billing.CPUMetricName == "", emptyTmpl, ".billing.cpuMetricName") - erc.Whenf(ec, c.Billing != nil && c.Billing.CollectEverySeconds == 0, zeroTmpl, ".billing.collectEverySeconds") - erc.Whenf(ec, c.Billing != nil && c.Billing.PushEverySeconds == 0, zeroTmpl, ".billing.pushEverySeconds") - erc.Whenf(ec, c.Billing != nil && c.Billing.PushTimeoutSeconds == 0, zeroTmpl, ".billing.pushTimeoutSeconds") - erc.Whenf(ec, c.Billing != nil && c.Billing.URL == "", emptyTmpl, ".billing.url") erc.Whenf(ec, c.DumpState != nil && c.DumpState.Port == 0, zeroTmpl, ".dumpState.port") erc.Whenf(ec, c.DumpState != nil && c.DumpState.TimeoutSeconds == 0, zeroTmpl, ".dumpState.timeoutSeconds") - erc.Whenf(ec, c.Informant.DownscaleTimeoutSeconds == 0, zeroTmpl, ".informant.downscaleTimeoutSeconds") - erc.Whenf(ec, c.Informant.RegisterRetrySeconds == 0, zeroTmpl, ".informant.registerRetrySeconds") - erc.Whenf(ec, c.Informant.RegisterTimeoutSeconds == 0, zeroTmpl, ".informant.registerTimeoutSeconds") - erc.Whenf(ec, c.Informant.RequestTimeoutSeconds == 0, zeroTmpl, ".informant.requestTimeoutSeconds") + erc.Whenf(ec, c.Scaling.RequestTimeoutSeconds == 0, zeroTmpl, ".scaling.requestTimeoutSeconds") + // add all errors if there are any: https://github.com/neondatabase/autoscaling/pull/195#discussion_r1170893494 + ec.Add(c.Scaling.DefaultConfig.Validate()) + erc.Whenf(ec, c.Informant.ServerPort == 0, zeroTmpl, ".informant.serverPort") erc.Whenf(ec, c.Informant.RetryServerMinWaitSeconds == 0, zeroTmpl, ".informant.retryServerMinWaitSeconds") erc.Whenf(ec, c.Informant.RetryServerNormalWaitSeconds == 0, zeroTmpl, ".informant.retryServerNormalWaitSeconds") - erc.Whenf(ec, c.Informant.ServerPort == 0, zeroTmpl, ".informant.serverPort") + erc.Whenf(ec, c.Informant.RegisterRetrySeconds == 0, zeroTmpl, ".informant.registerRetrySeconds") + erc.Whenf(ec, c.Informant.RetryFailedRequestSeconds == 0, zeroTmpl, ".informant.retryFailedRequestSeconds") + erc.Whenf(ec, c.Informant.RetryDeniedDownscaleSeconds == 0, zeroTmpl, ".informant.retryDeniedDownscaleSeconds") + erc.Whenf(ec, c.Informant.RequestTimeoutSeconds == 0, zeroTmpl, ".informant.requestTimeoutSeconds") + erc.Whenf(ec, c.Informant.RegisterTimeoutSeconds == 0, zeroTmpl, ".informant.registerTimeoutSeconds") + erc.Whenf(ec, c.Informant.DownscaleTimeoutSeconds == 0, zeroTmpl, ".informant.downscaleTimeoutSeconds") erc.Whenf(ec, c.Informant.UnhealthyAfterSilenceDurationSeconds == 0, zeroTmpl, ".informant.unhealthyAfterSilenceDurationSeconds") erc.Whenf(ec, c.Informant.UnhealthyStartupGracePeriodSeconds == 0, zeroTmpl, ".informant.unhealthyStartupGracePeriodSeconds") erc.Whenf(ec, c.Metrics.LoadMetricPrefix == "", emptyTmpl, ".metrics.loadMetricPrefix") + erc.Whenf(ec, c.Metrics.RequestTimeoutSeconds == 0, zeroTmpl, ".metrics.requestTimeoutSeconds") erc.Whenf(ec, c.Metrics.SecondsBetweenRequests == 0, zeroTmpl, ".metrics.secondsBetweenRequests") - erc.Whenf(ec, c.Scaling.RequestTimeoutSeconds == 0, zeroTmpl, ".scaling.requestTimeoutSeconds") - // add all errors if there are any: https://github.com/neondatabase/autoscaling/pull/195#discussion_r1170893494 - ec.Add(c.Scaling.DefaultConfig.Validate()) - erc.Whenf(ec, c.Scheduler.RequestPort == 0, zeroTmpl, ".scheduler.requestPort") - erc.Whenf(ec, c.Scheduler.RequestTimeoutSeconds == 0, zeroTmpl, ".scheduler.requestTimeoutSeconds") erc.Whenf(ec, c.Scheduler.SchedulerName == "", emptyTmpl, ".scheduler.schedulerName") + // note: c.Scheduler.RequestTimeoutSeconds == 0 is valid + erc.Whenf(ec, c.Scheduler.RequestAtLeastEverySeconds == 0, zeroTmpl, ".scheduler.requestAtLeastEverySeconds") + erc.Whenf(ec, c.Scheduler.RequestPort == 0, zeroTmpl, ".scheduler.requestPort") + erc.Whenf(ec, c.Billing != nil && c.Billing.URL == "", emptyTmpl, ".billing.url") + erc.Whenf(ec, c.Billing != nil && c.Billing.CPUMetricName == "", emptyTmpl, ".billing.cpuMetricName") + erc.Whenf(ec, c.Billing != nil && c.Billing.ActiveTimeMetricName == "", emptyTmpl, ".billing.activeTimeMetricName") + erc.Whenf(ec, c.Billing != nil && c.Billing.CollectEverySeconds == 0, zeroTmpl, ".billing.collectEverySeconds") + erc.Whenf(ec, c.Billing != nil && c.Billing.PushEverySeconds == 0, zeroTmpl, ".billing.pushEverySeconds") + erc.Whenf(ec, c.Billing != nil && c.Billing.PushTimeoutSeconds == 0, zeroTmpl, ".billing.pushTimeoutSeconds") return ec.Resolve() } diff --git a/pkg/agent/core/action.go b/pkg/agent/core/action.go new file mode 100644 index 000000000..990064d62 --- /dev/null +++ b/pkg/agent/core/action.go @@ -0,0 +1,40 @@ +package core + +import ( + "time" + + "github.com/neondatabase/autoscaling/pkg/api" +) + +type ActionSet struct { + Wait *ActionWait `json:"wait,omitempty"` + PluginRequest *ActionPluginRequest `json:"pluginRequest,omitempty"` + NeonVMRequest *ActionNeonVMRequest `json:"neonvmRequest,omitempty"` + InformantDownscale *ActionInformantDownscale `json:"informantDownscale,omitempty"` + InformantUpscale *ActionInformantUpscale `json:"informantUpscale,omitempty"` +} + +type ActionWait struct { + Duration time.Duration `json:"duration"` +} + +type ActionPluginRequest struct { + LastPermit *api.Resources `json:"current"` + Target api.Resources `json:"target"` + Metrics *api.Metrics `json:"metrics"` +} + +type ActionNeonVMRequest struct { + Current api.Resources `json:"current"` + Target api.Resources `json:"target"` +} + +type ActionInformantDownscale struct { + Current api.Resources `json:"current"` + Target api.Resources `json:"target"` +} + +type ActionInformantUpscale struct { + Current api.Resources `json:"current"` + Target api.Resources `json:"target"` +} diff --git a/pkg/agent/core/dumpstate.go b/pkg/agent/core/dumpstate.go new file mode 100644 index 000000000..b36874a6a --- /dev/null +++ b/pkg/agent/core/dumpstate.go @@ -0,0 +1,142 @@ +package core + +// Implementation of (*UpdateState).Dump() + +import ( + "time" + + "github.com/neondatabase/autoscaling/pkg/api" +) + +func shallowCopy[T any](ptr *T) *T { + if ptr == nil { + return nil + } else { + x := *ptr + return &x + } +} + +// StateDump provides introspection into the current values of the fields of State +type StateDump struct { + Config Config `json:"config"` + VM api.VmInfo `json:"vm"` + Plugin pluginStateDump `json:"plugin"` + Informant informantStateDump `json:"informant"` + NeonVM neonvmStateDump `json:"neonvm"` + Metrics *api.Metrics `json:"metrics"` +} + +// Dump produces a JSON-serializable representation of the State +func (s *State) Dump() StateDump { + return StateDump{ + Config: s.config, + VM: s.vm, + Plugin: s.plugin.dump(), + Informant: s.informant.dump(), + NeonVM: s.neonvm.dump(), + Metrics: shallowCopy(s.metrics), + } +} + +type pluginStateDump struct { + Alive bool `json:"alive"` + OngoingRequest bool `json:"ongoingRequest"` + ComputeUnit *api.Resources `json:"computeUnit"` + LastRequest *pluginRequestedDump `json:"lastRequest"` + Permit *api.Resources `json:"permit"` +} +type pluginRequestedDump struct { + At time.Time `json:"time"` + Resources api.Resources `json:"resources"` +} + +func (s *pluginState) dump() pluginStateDump { + var lastRequest *pluginRequestedDump + if s.lastRequest != nil { + lastRequest = &pluginRequestedDump{ + At: s.lastRequest.at, + Resources: s.lastRequest.resources, + } + } + + return pluginStateDump{ + Alive: s.alive, + OngoingRequest: s.ongoingRequest, + ComputeUnit: shallowCopy(s.computeUnit), + LastRequest: lastRequest, + Permit: shallowCopy(s.permit), + } +} + +type informantStateDump struct { + Active bool `json:"active"` + OngoingRequest *OngoingInformantRequestDump `json:"ongoingRequest"` + RequestedUpscale *requestedUpscaleDump `json:"requestedUpscale"` + DeniedDownscale *deniedDownscaleDump `json:"deniedDownscale"` + Approved *api.Resources `json:"approved"` + DownscaleFailureAt *time.Time `json:"downscaleFailureAt"` + UpscaleFailureAt *time.Time `json:"upscaleFailureAt"` +} +type OngoingInformantRequestDump struct { + Kind informantRequestKind `json:"kind"` +} +type requestedUpscaleDump struct { + At time.Time `json:"at"` + Base api.Resources `json:"base"` + Requested api.MoreResources `json:"requested"` +} +type deniedDownscaleDump struct { + At time.Time `json:"at"` + Requested api.Resources `json:"requested"` +} + +func (s *informantState) dump() informantStateDump { + var requestedUpscale *requestedUpscaleDump + if s.requestedUpscale != nil { + requestedUpscale = &requestedUpscaleDump{ + At: s.requestedUpscale.at, + Base: s.requestedUpscale.base, + Requested: s.requestedUpscale.requested, + } + } + + var deniedDownscale *deniedDownscaleDump + if s.deniedDownscale != nil { + deniedDownscale = &deniedDownscaleDump{ + At: s.deniedDownscale.at, + Requested: s.deniedDownscale.requested, + } + } + + var ongoingRequest *OngoingInformantRequestDump + if s.ongoingRequest != nil { + ongoingRequest = &OngoingInformantRequestDump{ + Kind: s.ongoingRequest.kind, + } + } + + return informantStateDump{ + Active: s.active, + OngoingRequest: ongoingRequest, + RequestedUpscale: requestedUpscale, + DeniedDownscale: deniedDownscale, + Approved: shallowCopy(s.approved), + DownscaleFailureAt: shallowCopy(s.downscaleFailureAt), + UpscaleFailureAt: shallowCopy(s.upscaleFailureAt), + } +} + +type neonvmStateDump struct { + LastSuccess *api.Resources `json:"lastSuccess"` + OngoingRequested *api.Resources `json:"ongoingRequested"` + RequestFailedAt *time.Time `json:"requestFailedAt"` +} + +func (s *neonvmState) dump() neonvmStateDump { + return neonvmStateDump{ + LastSuccess: shallowCopy(s.lastSuccess), + OngoingRequested: shallowCopy(s.ongoingRequested), + RequestFailedAt: shallowCopy(s.requestFailedAt), + } +} diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go new file mode 100644 index 000000000..287a92a48 --- /dev/null +++ b/pkg/agent/core/state.go @@ -0,0 +1,710 @@ +package core + +// The core scaling logic at the heart of the autoscaler-agent. This file implements everything with +// mostly pure-ish functions, so that all the making & receiving requests can be done elsewhere. +// +// Broadly our strategy is to mimic the kind of eventual consistency that is itself used in +// Kubernetes. The scaling logic wasn't always implemented like this, but because the +// autoscaler-agent *fundamentally* exists in an eventual consistency world, we have to either: +// (a) make assumptions that we know are false; or +// (b) design our system so it assumes less. +// We used to solve this by (a). We ran into¹ issues² going that way, because sometimes those false +// assumptions come back to haunt you. +// +// That said, there's still some tricky semantics we want to maintain. Internally, the +// autoscaler-agent must be designed around eventual consistency, but the API we expose to the +// vm-informant is strictly synchonous. As such, there's some subtle logic to make sure that we're +// not violating our own guarantees. +// +// --- +// ¹ https://github.com/neondatabase/autoscaling/issues/23 +// ² https://github.com/neondatabase/autoscaling/issues/350 + +import ( + "fmt" + "math" + "strings" + "time" + + "github.com/neondatabase/autoscaling/pkg/api" + "github.com/neondatabase/autoscaling/pkg/util" +) + +// Config represents some of the static configuration underlying the decision-making of State +type Config struct { + // DefaultScalingConfig is just copied from the global autoscaler-agent config. + // If the VM's ScalingConfig is nil, we use this field instead. + DefaultScalingConfig api.ScalingConfig + + // PluginRequestTick gives the period at which we should be making requests to the scheduler + // plugin, even if nothing's changed. + PluginRequestTick time.Duration + + // InformantDeniedDownscaleCooldown gives the time we must wait between making duplicate + // downscale requests to the vm-informant where the previous failed. + InformantDeniedDownscaleCooldown time.Duration + + // InformantRetryWait gives the amount of time to wait to retry after a *failed* request. + InformantRetryWait time.Duration + + // Warn provides an outlet for (*State).Next() to give warnings about conditions that are + // impeding its ability to execute. (e.g. "wanted to do X but couldn't because of Y") + Warn func(string, ...any) `json:"-"` +} + +// State holds all of the necessary internal state for a VM in order to make scaling +// decisions +type State struct { + // ANY CHANGED FIELDS MUST BE UPDATED IN dump.go AS WELL + + config Config + + // vm gives the current state of the VM - or at least, the state of the fields we care about. + // + // NB: any contents behind pointers in vm are immutable. Any time the field is updated, we + // replace it with a fresh object. + vm api.VmInfo + + // plugin records all state relevant to communications with the scheduler plugin + plugin pluginState + + // informant records all state relevant to communications with the vm-informant + informant informantState + + // neonvm records all state relevant to the NeonVM k8s API + neonvm neonvmState + + metrics *api.Metrics +} + +type pluginState struct { + alive bool + // ongoingRequest is true iff there is currently an ongoing request to *this* scheduler plugin. + ongoingRequest bool + // computeUnit, if not nil, gives the value of the compute unit we most recently got from a + // PluginResponse + computeUnit *api.Resources + // lastRequest, if not nil, gives information about the most recently started request to the + // plugin (maybe unfinished!) + lastRequest *pluginRequested + // permit, if not nil, stores the Permit in the most recent PluginResponse. This field will be + // nil if we have not been able to contact *any* scheduler. If we switch schedulers, we trust + // the old one. + permit *api.Resources +} + +type pluginRequested struct { + at time.Time + resources api.Resources +} + +type informantState struct { + // active is true iff the agent is currently "confirmed" and not "suspended" by the informant. + // Otherwise, we shouldn't be making any kind of scaling requests. + active bool + + ongoingRequest *ongoingInformantRequest + + // requestedUpscale, if not nil, stores the most recent *unresolved* upscaling requested by the + // vm-informant, along with the time at which it occurred. + requestedUpscale *requestedUpscale + + // deniedDownscale, if not nil, stores the result of the lastest denied /downscale request. + deniedDownscale *deniedDownscale + + // approved stores the most recent Resources associated with either (a) an accepted downscale + // request, or (b) a successful upscale notification. + approved *api.Resources + + downscaleFailureAt *time.Time + upscaleFailureAt *time.Time +} + +type ongoingInformantRequest struct { + kind informantRequestKind +} + +type informantRequestKind string + +const ( + informantRequestKindDownscale informantRequestKind = "downscale" + informantRequestKindUpscale informantRequestKind = "upscale" +) + +type requestedUpscale struct { + at time.Time + base api.Resources + requested api.MoreResources +} + +type deniedDownscale struct { + at time.Time + requested api.Resources +} + +type neonvmState struct { + lastSuccess *api.Resources + // ongoingRequested, if not nil, gives the resources requested + ongoingRequested *api.Resources + requestFailedAt *time.Time +} + +func NewState(vm api.VmInfo, config Config) *State { + return &State{ + config: config, + vm: vm, + plugin: pluginState{ + alive: false, + ongoingRequest: false, + computeUnit: nil, + lastRequest: nil, + permit: nil, + }, + informant: informantState{ + active: false, + ongoingRequest: nil, + requestedUpscale: nil, + deniedDownscale: nil, + approved: nil, + downscaleFailureAt: nil, + upscaleFailureAt: nil, + }, + neonvm: neonvmState{ + lastSuccess: nil, + ongoingRequested: nil, + requestFailedAt: nil, + }, + metrics: nil, + } +} + +// NextActions is used to implement the state machine. It's a pure function that *just* indicates +// what the executor should do. +func (s *State) NextActions(now time.Time) ActionSet { + var actions ActionSet + + using := s.vm.Using() + + var desiredResources api.Resources + + if s.informant.active { + desiredResources = s.desiredResourcesFromMetricsOrRequestedUpscaling() + } else { + // If we're not deemed "active" by the informant, then we shouldn't be making any kind of + // scaling requests on its behalf. + // + // We'll still talk to the scheduler to inform it about the current resource usage though, + // to mitigate any reliability issues - much of the informant is built (as of 2023-07-09) + // under the assumption that we could, in theory, have multiple autoscaler-agents on the + // same node at the same time. That's... not really true, so an informant that isn't + // "active" is more likely to just be crash-looping due to a bug. + // + // *In theory* if we had mutliple autoscaler-agents talking to a single informant, this + // would be incorrect; we'd override another one's scaling requests. But this should be + // fine. + desiredResources = using + } + + desiredResourcesApprovedByInformant := s.boundResourcesByInformantApproved(desiredResources) + desiredResourcesApprovedByPlugin := s.boundResourcesByPluginApproved(desiredResources) + // NB: informant approved provides a lower bound + approvedDesiredResources := desiredResourcesApprovedByPlugin.Max(desiredResourcesApprovedByInformant) + + ongoingNeonVMRequest := s.neonvm.ongoingRequested != nil + + var requestForPlugin api.Resources + if s.plugin.permit == nil { + // If we haven't yet gotten a proper plugin response, then we aren't allowed to ask for + // anything beyond our current usage. + requestForPlugin = using + } else { + // ... Otherwise, we should: + // 1. "inform" the plugin of any downscaling since the previous permit + // 2. "request" any desired upscaling relative to to the previous permit + // with (2) taking priority over (1), if there's any conflicts. + requestForPlugin = desiredResources.Max(using) // ignore "desired" downscaling with .Max(using) + } + + // We want to make a request to the scheduler plugin if: + // 1. we've waited long enough since the previous request; or + // 2.a. we want to request resources / inform it of downscale; and + // b. there isn't any ongoing, conflicting request + timeForNewPluginRequest := s.plugin.lastRequest == nil || now.Sub(s.plugin.lastRequest.at) >= s.config.PluginRequestTick + shouldUpdatePlugin := s.plugin.lastRequest != nil && + // "we haven't tried requesting *these* resources from it yet, or we can retry requesting" + (s.plugin.lastRequest.resources != requestForPlugin || timeForNewPluginRequest) && + !ongoingNeonVMRequest + + if !s.plugin.ongoingRequest && (timeForNewPluginRequest || shouldUpdatePlugin) && s.plugin.alive { + if !shouldUpdatePlugin { + // If we shouldn't "update" the plugin, then just inform it about the current resources + // and metrics. + actions.PluginRequest = &ActionPluginRequest{ + LastPermit: s.plugin.permit, + Target: using, + Metrics: s.metrics, + } + } else { + // ... Otherwise, we should try requesting something new form it. + actions.PluginRequest = &ActionPluginRequest{ + LastPermit: s.plugin.permit, + Target: desiredResourcesApprovedByInformant, + Metrics: s.metrics, + } + } + } else if timeForNewPluginRequest || shouldUpdatePlugin { + if s.plugin.alive { + s.config.Warn("Wanted to make a request to the plugin, but there's already one ongoing") + } else { + s.config.Warn("Wanted to make a request to the plugin, but there isn't one active right now") + } + } + + // We want to make a request to NeonVM if we've been approved for a change in resources that + // we're not currently using. + if approvedDesiredResources != using { + // ... but we can't make one if there's already a request ongoing, either via the NeonVM API + // or to the scheduler plugin, because they require taking out the request lock. + if !ongoingNeonVMRequest && !s.plugin.ongoingRequest { + actions.NeonVMRequest = &ActionNeonVMRequest{ + Current: using, + Target: approvedDesiredResources, + } + } else { + var reqs []string + if s.plugin.ongoingRequest { + reqs = append(reqs, "plugin request") + } + if ongoingNeonVMRequest && *s.neonvm.ongoingRequested != approvedDesiredResources { + reqs = append(reqs, "NeonVM request (for different resources)") + } + + if len(reqs) != 0 { + s.config.Warn("Wanted to make a request to NeonVM API, but there's already %s ongoing", strings.Join(reqs, " and ")) + } + } + } + + // We should make an upscale request to the informant if we've upscaled and the informant + // doesn't know about it. + wantInformantUpscaleRequest := s.informant.approved != nil && *s.informant.approved != desiredResources.Max(*s.informant.approved) + // However, we may need to wait before retrying (or for any ongoing requests to finish) + makeInformantUpscaleRequest := wantInformantUpscaleRequest && + s.informant.active && + s.informant.ongoingRequest == nil && + (s.informant.upscaleFailureAt == nil || + now.Sub(*s.informant.upscaleFailureAt) >= s.config.InformantRetryWait) + if wantInformantUpscaleRequest { + if makeInformantUpscaleRequest { + actions.InformantUpscale = &ActionInformantUpscale{ + Current: *s.informant.approved, + Target: desiredResources.Max(*s.informant.approved), + } + } else if !s.informant.active { + s.config.Warn("Wanted to send informant upscale request, but not active") + } else if s.informant.ongoingRequest != nil && s.informant.ongoingRequest.kind != informantRequestKindUpscale { + s.config.Warn("Wanted to send informant upscale request, but waiting other ongoing %s request", s.informant.ongoingRequest.kind) + } else if s.informant.ongoingRequest == nil { + s.config.Warn("Wanted to send informant upscale request, but waiting on retry rate limit") + } + } + + // We should make a downscale request to the informant if we want to downscale but haven't been + // approved for it. + var resourcesForInformantDownscale api.Resources + if s.informant.approved != nil { + resourcesForInformantDownscale = desiredResources.Min(*s.informant.approved) + } else { + resourcesForInformantDownscale = desiredResources.Min(using) + } + wantInformantDownscaleRequest := s.informant.approved != nil && *s.informant.approved != resourcesForInformantDownscale + if s.informant.approved == nil && resourcesForInformantDownscale != using { + s.config.Warn("Wanted to send informant downscale request, but haven't yet gotten information about its resources") + } + // However, we may need to wait before retrying (or for any ongoing requests to finish) + makeInformantDownscaleRequest := wantInformantDownscaleRequest && + s.informant.active && + s.informant.ongoingRequest == nil && + (s.informant.deniedDownscale == nil || + s.informant.deniedDownscale.requested != desiredResources.Min(using) || + now.Sub(s.informant.deniedDownscale.at) >= s.config.InformantDeniedDownscaleCooldown) && + (s.informant.downscaleFailureAt == nil || + now.Sub(*s.informant.downscaleFailureAt) >= s.config.InformantRetryWait) + + if wantInformantDownscaleRequest { + if makeInformantDownscaleRequest { + actions.InformantDownscale = &ActionInformantDownscale{ + Current: *s.informant.approved, + Target: resourcesForInformantDownscale, + } + } else if !s.informant.active { + s.config.Warn("Wanted to send informant downscale request, but not active") + } else if s.informant.ongoingRequest != nil && s.informant.ongoingRequest.kind != informantRequestKindDownscale { + s.config.Warn("Wanted to send informant downscale request, but waiting on other ongoing %s request", s.informant.ongoingRequest.kind) + } else if s.informant.ongoingRequest == nil { + s.config.Warn("Wanted to send informant downscale request, but waiting on retry rate limit") + } + } + + // --- and that's all the request types! --- + + // If there's anything waiting, we should also note how long we should wait for. + // There's two components we could be waiting on: the scheduler plugin, and the vm-informant. + maximumDuration := time.Duration(int64(uint64(1)<<63 - 1)) + requiredWait := maximumDuration + + // We always need to periodically send messages to the plugin. If actions.PluginRequest == nil, + // we know that either: + // + // (a) s.plugin.lastRequestAt != nil (otherwise timeForNewPluginRequest == true); or + // (b) s.plugin.ongoingRequest == true (the only reason why we wouldn't've exited earlier) + // + // So we actually only need to explicitly wait if there's not an ongoing request - otherwise + // we'll be notified anyways when the request is done. + if actions.PluginRequest == nil && s.plugin.alive && !s.plugin.ongoingRequest { + requiredWait = util.Min(requiredWait, now.Sub(s.plugin.lastRequest.at)) + } + + // For the vm-informant: + // if we wanted to make EITHER a downscale or upscale request, but we previously couldn't + // because of retry timeouts, we should wait for s.config.InformantRetryWait before trying + // again. + // OR if we wanted to downscale but got denied, we should wait for + // s.config.InformantDownscaleCooldown before retrying. + if s.informant.ongoingRequest == nil { + // Retry upscale on failure + if wantInformantUpscaleRequest && s.informant.upscaleFailureAt != nil { + if wait := now.Sub(*s.informant.upscaleFailureAt); wait >= s.config.InformantRetryWait { + requiredWait = util.Min(requiredWait, wait) + } + } + // Retry downscale on failure + if wantInformantDownscaleRequest && s.informant.downscaleFailureAt != nil { + if wait := now.Sub(*s.informant.downscaleFailureAt); wait >= s.config.InformantRetryWait { + requiredWait = util.Min(requiredWait, wait) + } + } + // Retry downscale if denied + if wantInformantDownscaleRequest && s.informant.deniedDownscale != nil && resourcesForInformantDownscale == s.informant.deniedDownscale.requested { + if wait := now.Sub(s.informant.deniedDownscale.at); wait >= s.config.InformantDeniedDownscaleCooldown { + requiredWait = util.Min(requiredWait, wait) + } + } + } + + // If we're waiting on anything, add the action. + if requiredWait != maximumDuration { + actions.Wait = &ActionWait{Duration: requiredWait} + } + + return actions +} + +func (s *State) scalingConfig() api.ScalingConfig { + if s.vm.ScalingConfig != nil { + return *s.vm.ScalingConfig + } else { + return s.config.DefaultScalingConfig + } +} + +func (s *State) desiredResourcesFromMetricsOrRequestedUpscaling() api.Resources { + // There's some annoying edge cases that this function has to be able to handle properly. For + // the sake of completeness, they are: + // + // 1. s.vm.Using() is not a multiple of s.computeUnit + // 2. s.vm.Max() is less than s.computeUnit (or: has at least one resource that is) + // 3. s.vm.Using() is a fractional multiple of s.computeUnit, but !allowDecrease and rounding up + // is greater than s.vm.Max() + // 4. s.vm.Using() is much larger than s.vm.Min() and not a multiple of s.computeUnit, but load + // is low so we should just decrease *anyways*. + // + // --- + // + // Broadly, the implementation works like this: + // 1. Based on load average, calculate the "goal" number of CPUs (and therefore compute units) + // 2. Cap the goal CU by min/max, etc + // 3. that's it! + + // If we don't know + if s.plugin.computeUnit == nil { + return s.vm.Using() + } + + var goalCU uint32 + if s.metrics != nil { + // Goal compute unit is at the point where (CPUs) × (LoadAverageFractionTarget) == (load + // average), + // which we can get by dividing LA by LAFT. + goalCU = uint32(math.Round(float64(s.metrics.LoadAverage1Min) / s.scalingConfig().LoadAverageFractionTarget)) + } + + // Update goalCU based on any requested upscaling + goalCU = util.Max(goalCU, s.requiredCUForRequestedUpscaling(*s.plugin.computeUnit)) + + // resources for the desired "goal" compute units + var goalResources api.Resources + + // If there's no constraints from s.metrics or s.informant.requestedUpscale, then we'd prefer to + // keep things as-is, rather than scaling down (because otherwise goalCU = 0). + if s.metrics == nil && s.informant.requestedUpscale == nil { + goalResources = s.vm.Using() + } else { + goalResources = s.plugin.computeUnit.Mul(uint16(goalCU)) + } + + // bound goal by the minimum and maximum resource amounts for the VM + result := goalResources.Min(s.vm.Max()).Max(s.vm.Min()) + + // Check that the result is sound. + // + // With the current (naive) implementation, this is trivially ok. In future versions, it might + // not be so simple, so it's good to have this integrity check here. + if result.HasFieldGreaterThan(s.vm.Max()) { + panic(fmt.Errorf( + "produced invalid desiredVMState: result has field greater than max. this = %+v", s, + )) + } else if result.HasFieldLessThan(s.vm.Min()) { + panic(fmt.Errorf( + "produced invalid desiredVMState: result has field less than min. this = %+v", s, + )) + } + + return result +} + +// NB: we could just use s.plugin.computeUnit, but that's sometimes nil. This way, it's clear that +// it's the caller's responsibility to ensure that s.plugin.computeUnit != nil. +func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources) uint32 { + if s.informant.requestedUpscale == nil { + return 0 + } + + var required uint32 + requested := s.informant.requestedUpscale.requested + + // note: floor(x / M) + 1 gives the minimum integer value greater than x / M. + + if requested.Cpu { + required = util.Max(required, uint32(s.vm.Cpu.Use/computeUnit.VCPU)+1) + } + if requested.Memory { + required = util.Max(required, uint32(s.vm.Mem.Use/computeUnit.Mem)+1) + } + + return required +} + +func (s *State) boundResourcesByInformantApproved(resources api.Resources) api.Resources { + var lowerBound api.Resources + if s.informant.approved != nil { + lowerBound = *s.informant.approved + } else { + lowerBound = s.vm.Using() + } + return resources.Max(lowerBound) +} + +func (s *State) boundResourcesByPluginApproved(resources api.Resources) api.Resources { + var upperBound api.Resources + if s.plugin.permit != nil { + upperBound = *s.plugin.permit + } else { + upperBound = s.vm.Using() + } + return resources.Min(upperBound) +} + +////////////////////////////////////////// +// PUBLIC FUNCTIONS TO UPDATE THE STATE // +////////////////////////////////////////// + +func (s *State) UpdatedVM(vm api.VmInfo) { + s.vm = vm +} + +func (s *State) UpdateMetrics(metrics api.Metrics) { + s.metrics = &metrics +} + +// PluginHandle provides write access to the scheduler plugin pieces of an UpdateState +type PluginHandle struct { + s *State +} + +func (s *State) Plugin() PluginHandle { + return PluginHandle{s} +} + +func (h PluginHandle) NewScheduler() { + h.s.plugin = pluginState{ + alive: true, + ongoingRequest: false, + computeUnit: nil, + lastRequest: nil, + permit: h.s.plugin.permit, // Keep this; trust the previous scheduler. + } +} + +func (h PluginHandle) SchedulerGone() { + h.s.plugin = pluginState{ + alive: false, + ongoingRequest: false, + computeUnit: nil, + lastRequest: nil, + permit: h.s.plugin.permit, // Keep this; trust the previous scheduler. + } +} + +func (h PluginHandle) StartingRequest(now time.Time, resources api.Resources) { + h.s.plugin.lastRequest = &pluginRequested{ + at: now, + resources: resources, + } + h.s.plugin.ongoingRequest = true +} + +func (h PluginHandle) RequestFailed(now time.Time) { + h.s.plugin.ongoingRequest = false +} + +func (h PluginHandle) RequestSuccessful(now time.Time, resp api.PluginResponse) error { + h.s.plugin.ongoingRequest = false + + if err := resp.Permit.ValidateNonZero(); err != nil { + return fmt.Errorf("Invalid permit: %w", err) + } + if err := resp.ComputeUnit.ValidateNonZero(); err != nil { + return fmt.Errorf("Invalid compute unit: %w", err) + } + + // Errors from resp in connection with the prior request + if resp.Permit.HasFieldGreaterThan(h.s.plugin.lastRequest.resources) { + return fmt.Errorf( + "Permit has resources greater than request (%+v vs. %+v)", + resp.Permit, h.s.plugin.lastRequest.resources, + ) + } + + // Errors from resp in connection with the prior request AND the VM state + if vmUsing := h.s.vm.Using(); resp.Permit.HasFieldLessThan(vmUsing) { + return fmt.Errorf("Permit has resources less than VM (%+v vs %+v)", resp.Permit, vmUsing) + } + + // All good - set everything. + + h.s.plugin.computeUnit = &resp.ComputeUnit + h.s.plugin.permit = &resp.Permit + return nil +} + +// InformantHandle provides write access to the vm-informant pieces of an UpdateState +type InformantHandle struct { + s *State +} + +func (s *State) Informant() InformantHandle { + return InformantHandle{s} +} + +func (h InformantHandle) Reset() { + h.s.informant = informantState{ + active: false, + ongoingRequest: nil, + requestedUpscale: nil, + deniedDownscale: nil, + approved: nil, + downscaleFailureAt: nil, + upscaleFailureAt: nil, + } +} + +func (h InformantHandle) Active(active bool) { + h.s.informant.active = active +} + +func (h InformantHandle) SuccessfullyRegistered() { + using := h.s.vm.Using() + h.s.informant.approved = &using // TODO: this is racy (although... informant synchronization should help *some* with this?) +} + +func (h InformantHandle) UpscaleRequested(now time.Time, resources api.MoreResources) { + h.s.informant.requestedUpscale = &requestedUpscale{ + at: now, + base: h.s.vm.Using(), // TODO: this is racy (maybe the resources were different when the informant originally made the request) + requested: resources, + } +} + +func (h InformantHandle) StartingUpscaleRequest(now time.Time) { + h.s.informant.ongoingRequest = &ongoingInformantRequest{kind: informantRequestKindUpscale} + h.s.informant.upscaleFailureAt = nil +} + +func (h InformantHandle) UpscaleRequestSuccessful(now time.Time, resources api.Resources) { + h.s.informant.ongoingRequest = nil + h.s.informant.approved = &resources +} + +func (h InformantHandle) UpscaleRequestFailed(now time.Time) { + h.s.informant.ongoingRequest = nil + h.s.informant.upscaleFailureAt = &now +} + +func (h InformantHandle) StartingDownscaleRequest(now time.Time) { + h.s.informant.ongoingRequest = &ongoingInformantRequest{kind: informantRequestKindDownscale} + h.s.informant.downscaleFailureAt = nil +} + +func (h InformantHandle) DownscaleRequestAllowed(now time.Time, requested api.Resources) { + h.s.informant.ongoingRequest = nil + h.s.informant.approved = &requested + h.s.informant.deniedDownscale = nil +} + +// Downscale request was successful but the informant denied our request. +func (h InformantHandle) DownscaleRequestDenied(now time.Time, requested api.Resources) { + h.s.informant.ongoingRequest = nil + h.s.informant.deniedDownscale = &deniedDownscale{ + at: now, + requested: requested, + } +} + +func (h InformantHandle) DownscaleRequestFailed(now time.Time) { + h.s.informant.ongoingRequest = nil + h.s.informant.downscaleFailureAt = &now +} + +type NeonVMHandle struct { + s *State +} + +func (s *State) NeonVM() NeonVMHandle { + return NeonVMHandle{s} +} + +func (h NeonVMHandle) StartingRequest(now time.Time, resources api.Resources) { + // FIXME: add time to ongoing request info (or maybe only in RequestFailed?) + h.s.neonvm.ongoingRequested = &resources +} + +func (h NeonVMHandle) RequestSuccessful(now time.Time) { + if h.s.neonvm.ongoingRequested == nil { + panic("received NeonVM().RequestSuccessful() update without ongoing request") + } + + resources := *h.s.neonvm.ongoingRequested + + // FIXME: This is actually incorrect; we shouldn't trust that the VM has already been updated + // just because the request completed. It takes longer for the reconcile cycle(s) to make the + // necessary changes. + h.s.vm.Cpu.Use = resources.VCPU + h.s.vm.Mem.Use = resources.Mem + + h.s.neonvm.ongoingRequested = nil +} + +func (h NeonVMHandle) RequestFailed(now time.Time) { + h.s.neonvm.ongoingRequested = nil +} diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go new file mode 100644 index 000000000..9eba23565 --- /dev/null +++ b/pkg/agent/execbridge.go @@ -0,0 +1,200 @@ +package agent + +// Implementations of the interfaces used by & defined in pkg/agent/executor +// +// This file is essentially the bridge between 'runner.go' and 'executor/' + +import ( + "context" + "fmt" + + "go.uber.org/zap" + + "github.com/neondatabase/autoscaling/pkg/agent/executor" + "github.com/neondatabase/autoscaling/pkg/api" + "github.com/neondatabase/autoscaling/pkg/util" +) + +var ( + _ executor.PluginInterface = (*execPluginInterface)(nil) + _ executor.NeonVMInterface = (*execNeonVMInterface)(nil) + _ executor.InformantInterface = (*execInformantInterface)(nil) +) + +///////////////////////////////////////////////////////////// +// Scheduler Plugin -related interfaces and implementation // +///////////////////////////////////////////////////////////// + +type execPluginInterface struct { + runner *Runner + core *executor.ExecutorCore +} + +func makePluginInterface(r *Runner, core *executor.ExecutorCore) *execPluginInterface { + return &execPluginInterface{runner: r, core: core} +} + +// EmptyID implements executor.PluginInterface +func (iface *execPluginInterface) EmptyID() string { + return "" +} + +// RequestLock implements executor.PluginInterface +func (iface *execPluginInterface) RequestLock() util.ChanMutex { + return iface.runner.requestLock +} + +// GetHandle implements executor.PluginInterface +func (iface *execPluginInterface) GetHandle() executor.PluginHandle { + scheduler := iface.runner.scheduler.Load() + + if scheduler == nil { + return nil + } + + return &execPluginHandle{ + runner: iface.runner, + scheduler: scheduler, + } +} + +type execPluginHandle struct { + runner *Runner + scheduler *Scheduler +} + +// ID implements executor.PluginHandle +func (h *execPluginHandle) ID() string { + return string(h.scheduler.info.UID) +} + +// Request implements executor.PluginHandle +func (h *execPluginHandle) Request( + ctx context.Context, + logger *zap.Logger, + lastPermit *api.Resources, + target api.Resources, + metrics *api.Metrics, +) (*api.PluginResponse, error) { + if lastPermit != nil { + h.runner.recordResourceChange(*lastPermit, target, h.runner.global.metrics.schedulerRequestedChange) + } + + resp, err := h.scheduler.DoRequest(ctx, logger, target, metrics) + + if err != nil && lastPermit != nil { + h.runner.recordResourceChange(*lastPermit, target, h.runner.global.metrics.schedulerApprovedChange) + } + + return resp, err +} + +///////////////////////////////////////////////// +// NeonVM-related interface and implementation // +///////////////////////////////////////////////// + +type execNeonVMInterface struct { + runner *Runner +} + +func makeNeonVMInterface(r *Runner) *execNeonVMInterface { + return &execNeonVMInterface{runner: r} +} + +// RequestLock implements executor.NeonVMInterface +func (iface *execNeonVMInterface) RequestLock() util.ChanMutex { + return iface.runner.requestLock +} + +// Request implements executor.NeonVMInterface +func (iface *execNeonVMInterface) Request(ctx context.Context, logger *zap.Logger, current, target api.Resources) error { + iface.runner.recordResourceChange(current, target, iface.runner.global.metrics.neonvmRequestedChange) + + err := iface.runner.doNeonVMRequest(ctx, target) + if err != nil { + return fmt.Errorf("Error making VM patch request: %w", err) + } + + return nil +} + +//////////////////////////////////////////////////// +// Informant-related interface and implementation // +//////////////////////////////////////////////////// + +type execInformantInterface struct { + runner *Runner + core *executor.ExecutorCore +} + +func makeInformantInterface(r *Runner, core *executor.ExecutorCore) *execInformantInterface { + return &execInformantInterface{runner: r, core: core} +} + +// EmptyID implements executor.InformantInterface +func (iface *execInformantInterface) EmptyID() string { + return "" +} + +func (iface *execInformantInterface) GetHandle() executor.InformantHandle { + server := iface.runner.server.Load() + + if server == nil || server.ExitStatus() != nil { + return nil + } + + return &execInformantHandle{server: server} +} + +type execInformantHandle struct { + server *InformantServer +} + +func (h *execInformantHandle) ID() string { + return h.server.desc.AgentID.String() +} + +func (h *execInformantHandle) RequestLock() util.ChanMutex { + return h.server.requestLock +} + +func (h *execInformantHandle) Downscale( + ctx context.Context, + logger *zap.Logger, + current api.Resources, + target api.Resources, +) (*api.DownscaleResult, error) { + // Check validity of the message we're sending + if target.HasFieldGreaterThan(current) { + innerMsg := fmt.Errorf("%+v has field greater than %+v", target, current) + panic(fmt.Errorf("(*execInformantHandle).Downscale() called with target greater than current: %w", innerMsg)) + } + + h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantRequestedChange) + + result, err := h.server.Downscale(ctx, logger, target) + + if err != nil && result.Ok { + h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantApprovedChange) + } + + return result, err +} + +func (h *execInformantHandle) Upscale(ctx context.Context, logger *zap.Logger, current, target api.Resources) error { + // Check validity of the message we're sending + if target.HasFieldLessThan(current) { + innerMsg := fmt.Errorf("%+v has field less than %+v", target, current) + panic(fmt.Errorf("(*execInformantHandle).Upscale() called with target less than current: %w", innerMsg)) + } + + h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantRequestedChange) + + err := h.server.Upscale(ctx, logger, target) + + if err != nil { + h.server.runner.recordResourceChange(current, target, h.server.runner.global.metrics.informantApprovedChange) + } + + return err +} diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go new file mode 100644 index 000000000..5ae6cfd10 --- /dev/null +++ b/pkg/agent/executor/core.go @@ -0,0 +1,155 @@ +package executor + +// Consumers of pkg/agent/core, implementing the "executors" for each type of action. These are +// wrapped up into a single ExecutorCore type, which exposes some methods for the various executors. +// +// The executors use various abstract interfaces for the scheudler / NeonVM / informant. The +// implementations of those interfaces are defiend in ifaces.go + +import ( + "sync" + "time" + + "go.uber.org/zap" + + "github.com/neondatabase/autoscaling/pkg/agent/core" + "github.com/neondatabase/autoscaling/pkg/api" + "github.com/neondatabase/autoscaling/pkg/util" +) + +type Config = core.Config + +type ExecutorCore struct { + mu sync.Mutex + + stateLogger *zap.Logger + + core *core.State + actions *timedActions + + updates *util.Broadcaster +} + +type ClientSet struct { + Plugin PluginInterface + NeonVM NeonVMInterface + Informant InformantInterface +} + +func NewExecutorCore(stateLogger *zap.Logger, vm api.VmInfo, config core.Config) *ExecutorCore { + return &ExecutorCore{ + mu: sync.Mutex{}, + stateLogger: stateLogger, + core: core.NewState(vm, config), + actions: nil, // (*ExecutorCore).getActions() checks if this is nil + updates: util.NewBroadcaster(), + } +} + +type ExecutorCoreWithClients struct { + *ExecutorCore + + clients ClientSet +} + +func (c *ExecutorCore) WithClients(clients ClientSet) ExecutorCoreWithClients { + return ExecutorCoreWithClients{ + ExecutorCore: c, + clients: clients, + } +} + +type timedActions struct { + calculatedAt time.Time + actions core.ActionSet +} + +func (c *ExecutorCore) getActions() timedActions { + c.mu.Lock() + defer c.mu.Unlock() + + if c.actions == nil { + // NOTE: Even though we cache the actions generated using time.Now(), it's *generally* ok. + now := time.Now() + c.stateLogger.Info("Recalculating ActionSet", zap.Time("now", now), zap.Any("state", c.core.Dump())) + c.actions = &timedActions{calculatedAt: now, actions: c.core.NextActions(now)} + } + + return *c.actions +} + +func (c *ExecutorCore) update(with func(*core.State)) { + c.mu.Lock() + defer c.mu.Unlock() + + // NB: We broadcast the update *before* calling with() because this gets us nicer ordering + // guarantees in some cases. + c.updates.Broadcast() + c.actions = nil + with(c.core) +} + +// Updater returns a handle on the object used for making external changes to the ExecutorCore, +// beyond what's provided by the various client (ish) interfaces +func (c *ExecutorCore) Updater() ExecutorCoreUpdater { + return ExecutorCoreUpdater{c} +} + +// ExecutorCoreUpdater provides a common interface for external changes to the ExecutorCore +type ExecutorCoreUpdater struct { + core *ExecutorCore +} + +func (c ExecutorCoreUpdater) UpdateMetrics(metrics api.Metrics, withLock func()) { + c.core.update(func(state *core.State) { + state.UpdateMetrics(metrics) + withLock() + }) +} + +// NewScheduler updates the inner state, calling (*core.State).Plugin().NewScheduler() +func (c ExecutorCoreUpdater) NewScheduler(withLock func()) { + c.core.update(func(state *core.State) { + state.Plugin().NewScheduler() + withLock() + }) +} + +// SchedulerGone updates the inner state, calling (*core.State).Plugin().SchedulerGone() +func (c ExecutorCoreUpdater) SchedulerGone(withLock func()) { + c.core.update(func(state *core.State) { + state.Plugin().SchedulerGone() + withLock() + }) +} + +func (c ExecutorCoreUpdater) ResetInformant(withLock func()) { + c.core.update(func(state *core.State) { + state.Informant().Reset() + withLock() + }) +} + +func (c ExecutorCoreUpdater) UpscaleRequested(resources api.MoreResources, withLock func()) { + c.core.update(func(state *core.State) { + state.Informant().UpscaleRequested(time.Now(), resources) + withLock() + }) +} + +func (c ExecutorCoreUpdater) InformantRegistered(active bool, withLock func()) { + c.core.update(func(state *core.State) { + state.Informant().SuccessfullyRegistered() + if active { + state.Informant().Active(active) + } + withLock() + }) +} + +func (c ExecutorCoreUpdater) InformantActive(active bool, withLock func()) { + c.core.update(func(state *core.State) { + state.Informant().Active(active) + withLock() + }) +} diff --git a/pkg/agent/executor/exec_informant.go b/pkg/agent/executor/exec_informant.go new file mode 100644 index 000000000..6f758d079 --- /dev/null +++ b/pkg/agent/executor/exec_informant.go @@ -0,0 +1,266 @@ +package executor + +import ( + "context" + "errors" + "time" + + "go.uber.org/zap" + + "github.com/neondatabase/autoscaling/pkg/agent/core" + "github.com/neondatabase/autoscaling/pkg/api" + "github.com/neondatabase/autoscaling/pkg/util" +) + +type InformantInterface interface { + EmptyID() string + GetHandle() InformantHandle +} + +type InformantHandle interface { + ID() string + RequestLock() util.ChanMutex + Downscale(_ context.Context, _ *zap.Logger, current, target api.Resources) (*api.DownscaleResult, error) + Upscale(_ context.Context, _ *zap.Logger, current, target api.Resources) error +} + +func (c *ExecutorCoreWithClients) DoInformantDownscales(ctx context.Context, logger *zap.Logger) { + var ( + updates util.BroadcastReceiver = c.updates.NewReceiver() + requestLock util.ChanMutex = util.NewChanMutex() + ifaceLogger *zap.Logger = logger.Named("client") + ) + + holdingRequestLock := false + releaseRequestLockIfHolding := func() { + if holdingRequestLock { + requestLock.Unlock() + holdingRequestLock = false + } + } + defer releaseRequestLockIfHolding() + + // meant to be called while holding c's lock + idUnchanged := func(current string) bool { + if h := c.clients.Informant.GetHandle(); h != nil { + return current == h.ID() + } else { + return current == c.clients.Informant.EmptyID() + } + } + + last := c.getActions() + for { + releaseRequestLockIfHolding() + + // Always receive an update if there is one. This helps with reliability (better guarantees + // about not missing updates) and means that the switch statements can be simpler. + select { + case <-updates.Wait(): + updates.Awake() + last = c.getActions() + default: + } + + // Wait until we're supposed to make a request. + if last.actions.InformantDownscale == nil { + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + } + } + + action := *last.actions.InformantDownscale + + informant := c.clients.Informant.GetHandle() + + if informant != nil { + requestLock = informant.RequestLock() + + // Try to acquire the request lock, but if something happens while we're waiting, we'll + // abort & retry on the next loop iteration (or maybe not, if last.actions changed). + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + case <-requestLock.WaitLock(): + holdingRequestLock = true + } + } + + var startTime time.Time + c.update(func(state *core.State) { + logger.Info("Starting informant downscale request", zap.Any("action", action)) + startTime = time.Now() + state.Informant().StartingDownscaleRequest(startTime) + }) + + result, err := doSingleInformantDownscaleRequest(ctx, ifaceLogger, informant, action) + endTime := time.Now() + + c.update(func(state *core.State) { + unchanged := idUnchanged(informant.ID()) + logFields := []zap.Field{ + zap.Any("action", action), + zap.Duration("duration", endTime.Sub(startTime)), + zap.Bool("unchanged", unchanged), + } + + if err != nil { + logger.Error("Informant downscale request failed", append(logFields, zap.Error(err))...) + if unchanged { + state.Informant().DownscaleRequestFailed(endTime) + } + return + } + + logFields = append(logFields, zap.Any("response", result)) + + if !result.Ok { + logger.Warn("Informant denied downscale", logFields...) + if unchanged { + state.Informant().DownscaleRequestDenied(endTime, action.Target) + } + } else { + logger.Info("Informant approved downscale", logFields...) + if unchanged { + state.Informant().DownscaleRequestAllowed(endTime, action.Target) + } + } + }) + } +} + +func doSingleInformantDownscaleRequest( + ctx context.Context, + logger *zap.Logger, + iface InformantHandle, + action core.ActionInformantDownscale, +) (*api.DownscaleResult, error) { + if iface == nil { + return nil, errors.New("No currently active informant") + } + + return iface.Downscale(ctx, logger, action.Current, action.Target) +} + +func (c *ExecutorCoreWithClients) DoInformantUpscales(ctx context.Context, logger *zap.Logger) { + var ( + updates util.BroadcastReceiver = c.updates.NewReceiver() + requestLock util.ChanMutex = util.NewChanMutex() + ifaceLogger *zap.Logger = logger.Named("client") + ) + + holdingRequestLock := false + releaseRequestLockIfHolding := func() { + if holdingRequestLock { + requestLock.Unlock() + holdingRequestLock = false + } + } + defer releaseRequestLockIfHolding() + + // meant to be called while holding c's lock + idUnchanged := func(current string) bool { + if h := c.clients.Informant.GetHandle(); h != nil { + return current == h.ID() + } else { + return current == c.clients.Informant.EmptyID() + } + } + + last := c.getActions() + for { + releaseRequestLockIfHolding() + + // Always receive an update if there is one. This helps with reliability (better guarantees + // about not missing updates) and means that the switch statements can be simpler. + select { + case <-updates.Wait(): + updates.Awake() + last = c.getActions() + default: + } + + // Wait until we're supposed to make a request. + if last.actions.InformantUpscale == nil { + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + } + } + + action := *last.actions.InformantUpscale + + informant := c.clients.Informant.GetHandle() + + if informant != nil { + requestLock = informant.RequestLock() + + // Try to acquire the request lock, but if something happens while we're waiting, we'll + // abort & retry on the next loop iteration (or maybe not, if last.actions changed). + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + case <-requestLock.WaitLock(): + holdingRequestLock = true + } + } + + var startTime time.Time + c.update(func(state *core.State) { + logger.Info("Starting informant upscale request", zap.Any("action", action)) + startTime = time.Now() + state.Informant().StartingUpscaleRequest(startTime) + }) + + err := doSingleInformantUpscaleRequest(ctx, ifaceLogger, informant, action) + endTime := time.Now() + + c.update(func(state *core.State) { + unchanged := idUnchanged(informant.ID()) + logFields := []zap.Field{ + zap.Any("action", action), + zap.Duration("duration", endTime.Sub(startTime)), + zap.Bool("unchanged", unchanged), + } + + if err != nil { + logger.Error("Informant upscale request failed", append(logFields, zap.Error(err))...) + if unchanged { + state.Informant().UpscaleRequestFailed(endTime) + } + return + } + + logger.Info("Informant upscale request successful", logFields...) + if unchanged { + state.Informant().UpscaleRequestSuccessful(endTime, action.Target) + } + }) + } +} + +func doSingleInformantUpscaleRequest( + ctx context.Context, + logger *zap.Logger, + iface InformantHandle, + action core.ActionInformantUpscale, +) error { + if iface == nil { + return errors.New("No currently active informant") + } + + return iface.Upscale(ctx, logger, action.Current, action.Target) +} diff --git a/pkg/agent/executor/exec_neonvm.go b/pkg/agent/executor/exec_neonvm.go new file mode 100644 index 000000000..62f89d988 --- /dev/null +++ b/pkg/agent/executor/exec_neonvm.go @@ -0,0 +1,94 @@ +package executor + +import ( + "context" + "time" + + "go.uber.org/zap" + + "github.com/neondatabase/autoscaling/pkg/agent/core" + "github.com/neondatabase/autoscaling/pkg/api" + "github.com/neondatabase/autoscaling/pkg/util" +) + +type NeonVMInterface interface { + RequestLock() util.ChanMutex + Request(_ context.Context, _ *zap.Logger, current, target api.Resources) error +} + +func (c *ExecutorCoreWithClients) DoNeonVMRequests(ctx context.Context, logger *zap.Logger) { + var ( + updates util.BroadcastReceiver = c.updates.NewReceiver() + requestLock util.ChanMutex = c.clients.NeonVM.RequestLock() + ifaceLogger *zap.Logger = logger.Named("client") + ) + + holdingRequestLock := false + releaseRequestLockIfHolding := func() { + if holdingRequestLock { + requestLock.Unlock() + holdingRequestLock = false + } + } + defer releaseRequestLockIfHolding() + + last := c.getActions() + for { + releaseRequestLockIfHolding() + + // Always receive an update if there is one. This helps with reliability (better guarantees + // about not missing updates) and means that the switch statements can be simpler. + select { + case <-updates.Wait(): + updates.Awake() + last = c.getActions() + default: + } + + // Wait until we're supposed to make a request. + if last.actions.NeonVMRequest == nil { + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + } + } + + action := *last.actions.NeonVMRequest + + // Try to acquire the request lock, but if something happens while we're waiting, we'll + // abort & retry on the next loop iteration (or maybe not, if last.actions changed). + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + case <-requestLock.WaitLock(): + holdingRequestLock = true + } + + var startTime time.Time + c.update(func(state *core.State) { + logger.Info("Starting NeonVM request", zap.Any("action", action)) + startTime = time.Now() + state.NeonVM().StartingRequest(startTime, action.Target) + }) + + err := c.clients.NeonVM.Request(ctx, ifaceLogger, action.Current, action.Target) + endTime := time.Now() + logFields := []zap.Field{zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime))} + + c.update(func(state *core.State) { + if err != nil { + logger.Error("NeonVM request failed", append(logFields, zap.Error(err))...) + state.NeonVM().RequestFailed(endTime) + } else /* err == nil */ { + logger.Info("NeonVM request successful", logFields...) + state.NeonVM().RequestSuccessful(endTime) + } + }) + } +} diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go new file mode 100644 index 000000000..bcf5b7670 --- /dev/null +++ b/pkg/agent/executor/exec_plugin.go @@ -0,0 +1,138 @@ +package executor + +import ( + "context" + "errors" + "time" + + "go.uber.org/zap" + + "github.com/neondatabase/autoscaling/pkg/agent/core" + "github.com/neondatabase/autoscaling/pkg/api" + "github.com/neondatabase/autoscaling/pkg/util" +) + +type PluginInterface interface { + EmptyID() string + RequestLock() util.ChanMutex + GetHandle() PluginHandle +} + +type PluginHandle interface { + ID() string + Request(_ context.Context, _ *zap.Logger, lastPermit *api.Resources, target api.Resources, _ *api.Metrics) (*api.PluginResponse, error) +} + +func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger *zap.Logger) { + var ( + updates util.BroadcastReceiver = c.updates.NewReceiver() + requestLock util.ChanMutex = c.clients.Plugin.RequestLock() + ifaceLogger *zap.Logger = logger.Named("client") + ) + + holdingRequestLock := false + releaseRequestLockIfHolding := func() { + if holdingRequestLock { + requestLock.Unlock() + holdingRequestLock = false + } + } + defer releaseRequestLockIfHolding() + + idUnchanged := func(current string) bool { + if h := c.clients.Plugin.GetHandle(); h != nil { + return current == h.ID() + } else { + return current == c.clients.Plugin.EmptyID() + } + } + + last := c.getActions() + for { + releaseRequestLockIfHolding() + + // Always receive an update if there is one. This helps with reliability (better guarantees + // about not missing updates) and means that the switch statements can be simpler. + select { + case <-updates.Wait(): + updates.Awake() + last = c.getActions() + default: + } + + // Wait until we're supposed to make a request. + if last.actions.PluginRequest == nil { + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + } + } + + action := *last.actions.PluginRequest + + pluginIface := c.clients.Plugin.GetHandle() + + // Try to acquire the request lock, but if something happens while we're waiting, we'll + // abort & retry on the next loop iteration (or maybe not, if last.actions changed). + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // NB: don't .Awake(); allow that to be handled at the top of the loop. + continue + case <-requestLock.WaitLock(): + holdingRequestLock = true + } + + // update the state to indicate that the request is starting. + var startTime time.Time + c.update(func(state *core.State) { + logger.Info("Starting plugin request", zap.Any("action", action)) + startTime = time.Now() + state.Plugin().StartingRequest(startTime, action.Target) + }) + + resp, err := doSinglePluginRequest(ctx, ifaceLogger, pluginIface, action) + endTime := time.Now() + + c.update(func(state *core.State) { + unchanged := idUnchanged(pluginIface.ID()) + logFields := []zap.Field{ + zap.Any("action", action), + zap.Duration("duration", endTime.Sub(startTime)), + zap.Bool("unchanged", unchanged), + } + + if err != nil { + logger.Error("Plugin request failed", append(logFields, zap.Error(err))...) + if unchanged { + state.Plugin().RequestFailed(endTime) + } + } else { + logFields = append(logFields, zap.Any("response", resp)) + logger.Info("Plugin request successful", logFields...) + if unchanged { + if err := state.Plugin().RequestSuccessful(endTime, *resp); err != nil { + logger.Error("Plugin response validation failed", append(logFields, zap.Error(err))...) + } + } + } + }) + } +} + +func doSinglePluginRequest( + ctx context.Context, + logger *zap.Logger, + iface PluginHandle, + action core.ActionPluginRequest, +) (*api.PluginResponse, error) { + if iface == nil { + return nil, errors.New("No currently enabled plugin handle") + } + + return iface.Request(ctx, logger, action.LastPermit, action.Target, action.Metrics) +} diff --git a/pkg/agent/executor/exec_sleeper.go b/pkg/agent/executor/exec_sleeper.go new file mode 100644 index 000000000..4208491df --- /dev/null +++ b/pkg/agent/executor/exec_sleeper.go @@ -0,0 +1,68 @@ +package executor + +import ( + "context" + "time" + + "go.uber.org/zap" + + "github.com/neondatabase/autoscaling/pkg/agent/core" +) + +func (c *ExecutorCore) DoSleeper(ctx context.Context, logger *zap.Logger) { + updates := c.updates.NewReceiver() + + // preallocate the timer. We clear it at the top of the loop; the 0 duration is just because we + // need *some* value, so it might as well be zero. + timer := time.NewTimer(0) + defer timer.Stop() + + last := c.getActions() + for { + // Ensure the timer is cleared at the top of the loop + if !timer.Stop() { + <-timer.C + } + + // If NOT waiting for a particular duration: + if last.actions.Wait == nil { + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + updates.Awake() + last = c.getActions() + } + } + + // If YES waiting for a particular duration + if last.actions.Wait != nil { + // NB: It's possible for last.calculatedAt to be somewhat out of date. It's *probably* + // fine, because we'll be given a notification any time the state has changed, so we + // should wake from a select soon enough to get here + timer.Reset(last.actions.Wait.Duration) + + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + updates.Awake() + + last = c.getActions() + case <-timer.C: + select { + // If there's also an update, then let that take preference: + case <-updates.Wait(): + updates.Awake() + last = c.getActions() + // Otherwise, trigger cache invalidation because we've waited for the requested + // amount of time: + default: + c.update(func(*core.State) {}) + updates.Awake() + last = c.getActions() + } + } + } + } +} diff --git a/pkg/agent/globalstate.go b/pkg/agent/globalstate.go index 41381992b..db877e634 100644 --- a/pkg/agent/globalstate.go +++ b/pkg/agent/globalstate.go @@ -329,17 +329,16 @@ func (s *agentState) newRunner(vmInfo api.VmInfo, podName util.NamespacedName, p status: nil, // set by calller schedulerRespondedWithMigration: false, - shutdown: nil, // set by (*Runner).Run - vm: vmInfo, - podName: podName, - podIP: podIP, - lock: util.NewChanMutex(), - requestLock: util.NewChanMutex(), - requestedUpscale: api.MoreResources{Cpu: false, Memory: false}, + shutdown: nil, // set by (*Runner).Run + vm: vmInfo, + podName: podName, + podIP: podIP, + lock: util.NewChanMutex(), + requestLock: util.NewChanMutex(), lastMetrics: nil, - scheduler: nil, - server: nil, + scheduler: atomic.Pointer[Scheduler]{}, + server: atomic.Pointer[InformantServer]{}, informant: nil, computeUnit: nil, lastApproved: nil, diff --git a/pkg/agent/informant.go b/pkg/agent/informant.go index a37269724..9b38895da 100644 --- a/pkg/agent/informant.go +++ b/pkg/agent/informant.go @@ -11,6 +11,7 @@ import ( "net/http" "strconv" "strings" + "sync/atomic" "time" "github.com/google/uuid" @@ -70,13 +71,8 @@ type InformantServer struct { // This field MAY be read while holding EITHER runner.lock OR requestLock. mode InformantServerMode - // updatedInformant is signalled once, when the InformantServer's register request completes, - // and the value of runner.informant is updated. - updatedInformant util.CondChannelSender - - // upscaleRequested is signalled whenever a valid request on /try-upscale is received, with at - // least one field set to true (i.e., at least one resource is being requested). - upscaleRequested util.CondChannelSender + // callbacks provide an abstraction for + callbacks informantStateCallbacks // requestLock guards requests to the VM informant to make sure that only one request is being // made at a time. @@ -86,7 +82,7 @@ type InformantServer struct { requestLock util.ChanMutex // exitStatus holds some information about why the server exited - exitStatus *InformantServerExitStatus + exitStatus atomic.Pointer[InformantServerExitStatus] // exit signals that the server should shut down, and sets exitStatus to status. // @@ -130,8 +126,7 @@ func NewInformantServer( ctx context.Context, logger *zap.Logger, runner *Runner, - updatedInformant util.CondChannelSender, - upscaleRequested util.CondChannelSender, + callbacks informantStateCallbacks, ) (*InformantServer, util.SignalReceiver, error) { // Manually start the TCP listener so that we can see the port it's assigned addr := net.TCPAddr{IP: net.IPv4zero, Port: 0 /* 0 means it'll be assigned any(-ish) port */} @@ -157,16 +152,15 @@ func NewInformantServer( MinProtoVersion: MinInformantProtocolVersion, MaxProtoVersion: MaxInformantProtocolVersion, }, - seqNum: 0, - receivedIDCheck: false, - madeContact: false, - protoVersion: nil, - mode: InformantServerUnconfirmed, - updatedInformant: updatedInformant, - upscaleRequested: upscaleRequested, - requestLock: util.NewChanMutex(), - exitStatus: nil, - exit: nil, // see below. + seqNum: 0, + receivedIDCheck: false, + madeContact: false, + protoVersion: nil, + mode: InformantServerUnconfirmed, + callbacks: callbacks, + requestLock: util.NewChanMutex(), + exitStatus: atomic.Pointer[InformantServerExitStatus]{}, + exit: nil, // see below. } logger = logger.With(zap.Object("server", server.desc)) @@ -188,8 +182,7 @@ func NewInformantServer( cancelBackground() // Set server.exitStatus if isn't already - if server.exitStatus == nil { - server.exitStatus = &status + if swapped := server.exitStatus.CompareAndSwap(nil, &status); swapped { logFunc := logger.Warn if status.RetryShouldFix { logFunc = logger.Info @@ -240,15 +233,10 @@ func NewInformantServer( // set server.exitStatus if it isn't already -- generally this should only occur if err // isn't http.ErrServerClosed, because other server exits should be controlled by - runner.lock.Lock() - defer runner.lock.Unlock() - - if server.exitStatus == nil { - server.exitStatus = &InformantServerExitStatus{ - Err: fmt.Errorf("Unexpected exit: %w", err), - RetryShouldFix: false, - } - } + server.exitStatus.CompareAndSwap(nil, &InformantServerExitStatus{ + Err: fmt.Errorf("Unexpected exit: %w", err), + RetryShouldFix: false, + }) }) // Thread waiting for the context to be canceled so we can use it to shut down the server @@ -319,7 +307,7 @@ func IsNormalInformantError(err error) bool { // // This method MUST be called while holding s.runner.lock. func (s *InformantServer) valid() error { - if s.exitStatus != nil { + if s.exitStatus.Load() != nil { return InformantServerAlreadyExitedError } @@ -334,7 +322,7 @@ func (s *InformantServer) valid() error { panic(fmt.Errorf("Unexpected InformantServerMode %q", s.mode)) } - if s.runner.server != s { + if s.runner.server.Load() != s { return InformantServerNotCurrentError } return nil @@ -342,13 +330,8 @@ func (s *InformantServer) valid() error { // ExitStatus returns the InformantServerExitStatus associated with the server, if it has been // instructed to exit -// -// This method MUST NOT be called while holding s.runner.lock. func (s *InformantServer) ExitStatus() *InformantServerExitStatus { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - return s.exitStatus + return s.exitStatus.Load() } // setLastInformantError is a helper method to abbreviate setting the Runner's lastInformantError @@ -362,7 +345,7 @@ func (s *InformantServer) setLastInformantError(err error, runnerLocked bool) { defer s.runner.lock.Unlock() } - if s.runner.server == s { + if s.runner.server.Load() == s { s.runner.lastInformantError = err } } @@ -396,7 +379,7 @@ func (s *InformantServer) RegisterWithInformant(ctx context.Context, logger *zap panic(fmt.Errorf("Unexpected InformantServerMode %q", s.mode)) } - if s.exitStatus != nil { + if s.ExitStatus() != nil { err := InformantServerAlreadyExitedError s.setLastInformantError(err, true) return err @@ -468,22 +451,25 @@ func (s *InformantServer) RegisterWithInformant(ctx context.Context, logger *zap s.mode = InformantServerSuspended s.protoVersion = &resp.ProtoVersion - if s.runner.server == s { - oldInformant := s.runner.informant - s.runner.informant = resp - s.updatedInformant.Send() // signal we've changed the informant - - if oldInformant == nil { - logger.Info("Registered with informant", zap.Any("informant", *resp)) - } else if *oldInformant != *resp { - logger.Info( - "Re-registered with informant, InformantDesc changed", - zap.Any("oldInformant", *oldInformant), - zap.Any("informant", *resp), - ) - } else { - logger.Info("Re-registered with informant; InformantDesc unchanged", zap.Any("informant", *oldInformant)) - } + if s.runner.server.Load() == s { + // signal we've changed the informant, and do the logging while we're at it, so there's + // a synchronous record of what happened. + s.callbacks.registered(false, func() { + oldInformant := s.runner.informant + s.runner.informant = resp + + if oldInformant == nil { + logger.Info("Registered with informant", zap.Any("informant", *resp)) + } else if *oldInformant != *resp { + logger.Info( + "Re-registered with informant, InformantDesc changed", + zap.Any("oldInformant", *oldInformant), + zap.Any("informant", *resp), + ) + } else { + logger.Info("Re-registered with informant; InformantDesc unchanged", zap.Any("informant", *oldInformant)) + } + }) } else { logger.Warn("Registering with informant completed but the server has already been replaced") } @@ -687,7 +673,7 @@ func (s *InformantServer) handleID(ctx context.Context, _ *zap.Logger, body *str s.receivedIDCheck = true - if s.exitStatus != nil { + if s.ExitStatus() != nil { return nil, 404, errors.New("Server has already exited") } @@ -726,7 +712,7 @@ func (s *InformantServer) handleResume( s.runner.lock.Lock() defer s.runner.lock.Unlock() - if s.exitStatus != nil { + if s.ExitStatus() != nil { return nil, 404, errors.New("Server has already exited") } @@ -737,12 +723,14 @@ func (s *InformantServer) handleResume( switch s.mode { case InformantServerSuspended: s.mode = InformantServerRunning - logger.Info( - "Informant server mode updated", - zap.String("action", "resume"), - zap.String("oldMode", string(InformantServerSuspended)), - zap.String("newMode", string(InformantServerRunning)), - ) + s.callbacks.setActive(true, func() { + logger.Info( + "Informant server mode updated", + zap.String("action", "resume"), + zap.String("oldMode", string(InformantServerSuspended)), + zap.String("newMode", string(InformantServerRunning)), + ) + }) case InformantServerRunning: internalErr := errors.New("Got /resume request for server, but it is already running") logger.Warn("Protocol violation", zap.Error(internalErr)) @@ -799,19 +787,21 @@ func (s *InformantServer) handleSuspend( } }() - if s.exitStatus != nil { + if s.ExitStatus() != nil { return nil, 404, errors.New("Server has already exited") } switch s.mode { case InformantServerRunning: s.mode = InformantServerSuspended - logger.Info( - "Informant server mode updated", - zap.String("action", "suspend"), - zap.String("oldMode", string(InformantServerRunning)), - zap.String("newMode", string(InformantServerSuspended)), - ) + s.callbacks.setActive(false, func() { + logger.Info( + "Informant server mode updated", + zap.String("action", "suspend"), + zap.String("oldMode", string(InformantServerRunning)), + zap.String("newMode", string(InformantServerSuspended)), + ) + }) case InformantServerSuspended: internalErr := errors.New("Got /suspend request for server, but it is already suspended") logger.Warn("Protocol violation", zap.Error(internalErr)) @@ -876,7 +866,7 @@ func (s *InformantServer) handleTryUpscale( s.runner.lock.Lock() defer s.runner.lock.Unlock() - if s.exitStatus != nil { + if s.ExitStatus() != nil { return nil, 404, errors.New("Server has already exited") } @@ -887,18 +877,16 @@ func (s *InformantServer) handleTryUpscale( return nil, 400, err } - if body.MoreResources.Cpu || body.MoreResources.Memory { - s.upscaleRequested.Send() - } else { - logger.Warn("Received try-upscale request that has no resources selected") - } + s.callbacks.upscaleRequested(body.MoreResources, func() { + if !body.MoreResources.Cpu && !body.MoreResources.Memory { + logger.Warn("Received try-upscale request that has no resources selected") + } - logger.Info( - "Updating requested upscale", - zap.Any("oldRequested", s.runner.requestedUpscale), - zap.Any("newRequested", body.MoreResources), - ) - s.runner.requestedUpscale = body.MoreResources + logger.Info( + "Updating requested upscale", + zap.Any("requested", body.MoreResources), + ) + }) return &api.AgentIdentificationMessage{ Data: api.AgentIdentification{AgentID: s.desc.AgentID}, @@ -982,11 +970,8 @@ func (s *InformantServer) HealthCheck(ctx context.Context, logger *zap.Logger) ( // Downscale makes a request to the informant's /downscale endpoint with the api.Resources // -// This method MUST NOT be called while holding i.server.runner.lock OR i.server.requestLock. +// This method MUST NOT be called while holding i.server.runner.lock. func (s *InformantServer) Downscale(ctx context.Context, logger *zap.Logger, to api.Resources) (*api.DownscaleResult, error) { - s.requestLock.Lock() - defer s.requestLock.Unlock() - err := func() error { s.runner.lock.Lock() defer s.runner.lock.Unlock() @@ -1039,9 +1024,6 @@ func (s *InformantServer) Downscale(ctx context.Context, logger *zap.Logger, to } func (s *InformantServer) Upscale(ctx context.Context, logger *zap.Logger, to api.Resources) error { - s.requestLock.Lock() - defer s.requestLock.Unlock() - err := func() error { s.runner.lock.Lock() defer s.runner.lock.Unlock() diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index bbb321237..f14a8ec81 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -52,7 +52,6 @@ import ( "errors" "fmt" "io" - "math" "net/http" "runtime/debug" "strconv" @@ -64,6 +63,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ktypes "k8s.io/apimachinery/pkg/types" + "github.com/neondatabase/autoscaling/pkg/agent/executor" "github.com/neondatabase/autoscaling/pkg/agent/schedwatch" "github.com/neondatabase/autoscaling/pkg/api" "github.com/neondatabase/autoscaling/pkg/util" @@ -129,16 +129,12 @@ type Runner struct { // from non-nil to nil. The data behind each pointer is immutable, but the value of the pointer // itself is not. lastMetrics *api.Metrics - // requestedUpscale provides information about any requested upscaling by a VM informant - // - // This value is reset whenever we start a new informant server - requestedUpscale api.MoreResources // scheduler is the current scheduler that we're communicating with, or nil if there isn't one. // Each scheduler's info field is immutable. When a scheduler is replaced, only the pointer // value here is updated; the original Scheduler remains unchanged. - scheduler *Scheduler - server *InformantServer + scheduler atomic.Pointer[Scheduler] + server atomic.Pointer[InformantServer] // informant holds the most recent InformantDesc that an InformantServer has received in its // normal operation. If there has been at least one InformantDesc received, this field will not // be nil. @@ -243,24 +239,24 @@ func (r *Runner) State(ctx context.Context) (*RunnerState, error) { defer r.lock.Unlock() var scheduler *SchedulerState - if r.scheduler != nil { + if sched := r.scheduler.Load(); sched != nil { scheduler = &SchedulerState{ - Info: r.scheduler.info, - Registered: r.scheduler.registered, - FatalError: r.scheduler.fatalError, + Info: sched.info, + Registered: sched.registered, + FatalError: sched.fatalError, } } var serverState *InformantServerState - if r.server != nil { + if server := r.server.Load(); server != nil { serverState = &InformantServerState{ - Desc: r.server.desc, - SeqNum: r.server.seqNum, - ReceivedIDCheck: r.server.receivedIDCheck, - MadeContact: r.server.madeContact, - ProtoVersion: r.server.protoVersion, - Mode: r.server.mode, - ExitStatus: r.server.exitStatus, + Desc: server.desc, + SeqNum: server.seqNum, + ReceivedIDCheck: server.receivedIDCheck, + MadeContact: server.madeContact, + ProtoVersion: server.protoVersion, + Mode: server.mode, + ExitStatus: server.exitStatus.Load(), } } @@ -349,14 +345,29 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util schedulerWatch.Using(*scheduler) } - // signal when r.lastMetrics is updated - sendMetricsSignal, recvMetricsSignal := util.NewCondChannelPair() - // signal when new schedulers are *registered* - sendSchedSignal, recvSchedSignal := util.NewCondChannelPair() - // signal when r.informant is updated - sendInformantUpd, recvInformantUpd := util.NewCondChannelPair() - // signal when the informant requests upscaling - sendUpscaleRequested, recvUpscaleRequested := util.NewCondChannelPair() + execLogger := logger.Named("exec") + + coreExecLogger := execLogger.Named("core") + executorCore := executor.NewExecutorCore(coreExecLogger.Named("state"), r.vm, executor.Config{ + DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, + PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), + InformantDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Informant.RetryDeniedDownscaleSeconds), + InformantRetryWait: time.Second * time.Duration(r.global.config.Informant.RetryFailedRequestSeconds), + Warn: func(msg string, args ...any) { + coreExecLogger.Warn(fmt.Sprintf(msg, args...)) + }, + }) + + pluginIface := makePluginInterface(r, executorCore) + neonvmIface := makeNeonVMInterface(r) + informantIface := makeInformantInterface(r, executorCore) + + // "ecwc" stands for "ExecutorCoreWithClients" + ecwc := executorCore.WithClients(executor.ClientSet{ + Plugin: pluginIface, + NeonVM: neonvmIface, + Informant: informantIface, + }) logger.Info("Starting background workers") @@ -367,17 +378,44 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util r.spawnBackgroundWorker(ctx, logger, "deadlock checker (main)", ignoreLogger(mainDeadlockChecker)) r.spawnBackgroundWorker(ctx, logger, "deadlock checker (request lock)", ignoreLogger(reqDeadlockChecker)) r.spawnBackgroundWorker(ctx, logger, "track scheduler", func(c context.Context, l *zap.Logger) { - r.trackSchedulerLoop(c, l, scheduler, schedulerWatch, sendSchedSignal) + r.trackSchedulerLoop(c, l, scheduler, schedulerWatch, func(withLock func()) { + ecwc.Updater().NewScheduler(withLock) + }) }) + sendInformantUpd, recvInformantUpd := util.NewCondChannelPair() r.spawnBackgroundWorker(ctx, logger, "get metrics", func(c context.Context, l *zap.Logger) { - r.getMetricsLoop(c, l, sendMetricsSignal, recvInformantUpd) - }) - r.spawnBackgroundWorker(ctx, logger, "handle VM resources", func(c context.Context, l *zap.Logger) { - r.handleVMResources(c, l, recvMetricsSignal, recvUpscaleRequested, recvSchedSignal, vmInfoUpdated) + r.getMetricsLoop(c, l, recvInformantUpd, func(metrics api.Metrics, withLock func()) { + ecwc.Updater().UpdateMetrics(metrics, withLock) + }) }) r.spawnBackgroundWorker(ctx, logger, "informant server loop", func(c context.Context, l *zap.Logger) { - r.serveInformantLoop(c, l, sendInformantUpd, sendUpscaleRequested) + r.serveInformantLoop( + c, + l, + informantStateCallbacks{ + resetInformant: func(withLock func()) { + ecwc.Updater().ResetInformant(withLock) + }, + upscaleRequested: func(request api.MoreResources, withLock func()) { + ecwc.Updater().UpscaleRequested(request, withLock) + }, + registered: func(active bool, withLock func()) { + ecwc.Updater().InformantRegistered(active, func() { + sendInformantUpd.Send() + withLock() + }) + }, + setActive: func(active bool, withLock func()) { + ecwc.Updater().InformantActive(active, withLock) + }, + }, + ) }) + r.spawnBackgroundWorker(ctx, execLogger.Named("sleeper"), "executor: sleeper", ecwc.DoSleeper) + r.spawnBackgroundWorker(ctx, execLogger.Named("plugin"), "executor: plugin", ecwc.DoPluginRequests) + r.spawnBackgroundWorker(ctx, execLogger.Named("neonvm"), "executor: neonvm", ecwc.DoNeonVMRequests) + r.spawnBackgroundWorker(ctx, execLogger.Named("informant-downscale"), "executor: informant downscale", ecwc.DoInformantDownscales) + r.spawnBackgroundWorker(ctx, execLogger.Named("informant-upscale"), "executor: informant upscale", ecwc.DoInformantUpscales) // Note: Run doesn't terminate unless the parent context is cancelled - either because the VM // pod was deleted, or the autoscaler-agent is exiting. @@ -456,8 +494,8 @@ func (r *Runner) spawnBackgroundWorker(ctx context.Context, logger *zap.Logger, func (r *Runner) getMetricsLoop( ctx context.Context, logger *zap.Logger, - newMetrics util.CondChannelSender, updatedInformant util.CondChannelReceiver, + newMetrics func(metrics api.Metrics, withLock func()), ) { timeout := time.Second * time.Duration(r.global.config.Metrics.RequestTimeoutSeconds) waitBetweenDuration := time.Second * time.Duration(r.global.config.Metrics.SecondsBetweenRequests) @@ -474,14 +512,9 @@ func (r *Runner) getMetricsLoop( goto next } - logger.Info("Got metrics", zap.Any("metrics", *metrics)) - - func() { - r.lock.Lock() - defer r.lock.Unlock() - r.lastMetrics = metrics - newMetrics.Send() - }() + newMetrics(*metrics, func() { + logger.Info("Updated metrics", zap.Any("metrics", *metrics)) + }) next: waitBetween := time.After(waitBetweenDuration) @@ -506,138 +539,11 @@ func (r *Runner) getMetricsLoop( } } -// handleVMResources is the primary background worker responsible for updating the desired state of -// the VM and communicating with the other components to make that happen, if possible. -// -// A new desired state is calculated when signalled on updatedMetrics or newScheduler. -// -// It may not be obvious at first, so: The reason why we try again when signalled on newScheduler, -// even though scheduler registration is handled separately, is that we might've had a prior desired -// increase that wasn't possible at the time (because the scheduler was unavailable) but is now -// possible, without the metrics being updated. -func (r *Runner) handleVMResources( - ctx context.Context, - logger *zap.Logger, - updatedMetrics util.CondChannelReceiver, - upscaleRequested util.CondChannelReceiver, - registeredScheduler util.CondChannelReceiver, - vmInfoUpdated util.CondChannelReceiver, -) { - for { - var reason VMUpdateReason - - select { - case <-ctx.Done(): - return - case <-updatedMetrics.Recv(): - reason = UpdatedMetrics - case <-upscaleRequested.Recv(): - reason = UpscaleRequested - case <-registeredScheduler.Recv(): - reason = RegisteredScheduler - case <-vmInfoUpdated.Recv(): - // Only actually do the update if something we care about changed: - newVMInfo := func() api.VmInfo { - r.status.mu.Lock() - defer r.status.mu.Unlock() - return r.status.vmInfo - }() - - if !newVMInfo.ScalingEnabled { - // This shouldn't happen because any update to the VM object that has - // ScalingEnabled=false should get translated into a "deletion" so the runner stops. - // So we shoudln't get an "update" event, and if we do, something's gone very wrong. - panic("explicit VM update given but scaling is disabled") - } - - // Update r.vm and r.lastApproved (see comment explaining why) - if changed := func() (changed bool) { - r.lock.Lock() - defer r.lock.Unlock() - - if r.vm.Mem.SlotSize.Cmp(*newVMInfo.Mem.SlotSize) != 0 { - // VM memory slot sizes can't change at runtime, at time of writing (2023-04-12). - // It's worth checking it here though, because something must have gone horribly - // wrong elsewhere for the memory slots size to change that it's worth aborting - // before anything else goes wrong - and if, in future, we allow them to change, - // it's better to panic than have subtly incorrect logic. - panic("VM changed memory slot size") - } - - // Create vm, which is r.vm with some fields taken from newVMInfo. - // - // Instead of copying r.vm, we create the entire struct explicitly so that we can - // have field exhaustiveness checking make sure that we don't forget anything when - // fields are added to api.VmInfo. - vm := api.VmInfo{ - Name: r.vm.Name, - Namespace: r.vm.Namespace, - Cpu: api.VmCpuInfo{ - Min: newVMInfo.Cpu.Min, - Use: r.vm.Cpu.Use, // TODO: Eventually we should explicitly take this as input, use newVMInfo - Max: newVMInfo.Cpu.Max, - }, - Mem: api.VmMemInfo{ - Min: newVMInfo.Mem.Min, - Use: r.vm.Mem.Use, // TODO: Eventually we should explicitly take this as input, use newVMInfo - Max: newVMInfo.Mem.Max, - - SlotSize: r.vm.Mem.SlotSize, // checked for equality above. - }, - - ScalingConfig: newVMInfo.ScalingConfig, - AlwaysMigrate: newVMInfo.AlwaysMigrate, - ScalingEnabled: newVMInfo.ScalingEnabled, // note: see above, checking newVMInfo.ScalingEnabled != false - } - - changed = vm != r.vm - r.vm = vm - - // As a final (necessary) precaution, update lastApproved so that it isn't possible - // for the scheduler to observe a temporary low upper bound that causes it to - // have state that's inconsistent with us (potentially causing overallocation). If - // we didn't handle this, the following sequence of actions would cause inconsistent - // state: - // - // 1. VM is at 4 CPU (of max 4), runner & scheduler agree - // 2. Scheduler dies - // 3. Runner loses contact with scheduler - // 4. VM Cpu.Max gets set to 2 - // 5. Runner observes Cpu.Max = 2 and forces downscale to 2 CPU - // 6. New scheduler appears, observes Cpu.Max = 2 - // 7. VM Cpu.Max gets set to 4 - // 8. Runner observes Cpu.Max = 4 (lastApproved is still 4) - // <-- INCONSISTENT STATE --> - // 9. Scheduler observes Cpu.Max = 4 - // - // If the runner observes the updated state before the scheduler, it's entirely - // possible for the runner to make a request that *it* thinks is just informative, - // but that the scheduler thinks is requesting more resources. At that point, the - // request can unexpectedly fail, or the scheduler can over-allocate, etc. - if r.lastApproved != nil { - *r.lastApproved = r.lastApproved.Min(vm.Max()) - } - - return - }(); !changed { - continue - } - - reason = UpdatedVMInfo - } - - err := r.updateVMResources( - ctx, logger, reason, updatedMetrics.Consume, registeredScheduler.Consume, - ) - if err != nil { - if ctx.Err() != nil { - logger.Warn("Error updating VM resources (but context already expired)", zap.Error(err)) - return - } - - logger.Error("Error updating VM resources", zap.Error(err)) - } - } +type informantStateCallbacks struct { + resetInformant func(withLock func()) + upscaleRequested func(request api.MoreResources, withLock func()) + registered func(active bool, withLock func()) + setActive func(active bool, withLock func()) } // serveInformantLoop repeatedly creates an InformantServer to handle communications with the VM @@ -647,8 +553,7 @@ func (r *Runner) handleVMResources( func (r *Runner) serveInformantLoop( ctx context.Context, logger *zap.Logger, - updatedInformant util.CondChannelSender, - upscaleRequested util.CondChannelSender, + callbacks informantStateCallbacks, ) { // variables set & accessed across loop iterations var ( @@ -664,13 +569,6 @@ func (r *Runner) serveInformantLoop( retryServer: for { - // On each (re)try, unset the informant's requested upscale. We need to do this *before* - // starting the server, because otherwise it's possible for a racy /try-upscale request to - // sneak in before we reset it, which would cause us to incorrectly ignore the request. - if upscaleRequested.Unsend() { - logger.Info("Cancelled existing 'upscale requested' signal due to informant server restart") - } - if normalRetryWait != nil { logger.Info("Retrying informant server after delay", zap.Duration("delay", normalWait)) select { @@ -701,7 +599,7 @@ retryServer: minRetryWait = time.After(minWait) lastStart = time.Now() - server, exited, err := NewInformantServer(ctx, logger, r, updatedInformant, upscaleRequested) + server, exited, err := NewInformantServer(ctx, logger, r, callbacks) if ctx.Err() != nil { if err != nil { logger.Warn("Error starting informant server (but context canceled)", zap.Error(err)) @@ -719,14 +617,14 @@ retryServer: defer r.lock.Unlock() var kind string - if r.server == nil { + if r.server.Load() == nil { kind = "Setting" } else { kind = "Updating" } logger.Info(fmt.Sprintf("%s initial informant server", kind), zap.Object("server", server.desc)) - r.server = server + r.server.Store(server) }() logger.Info("Registering with informant") @@ -790,7 +688,7 @@ func (r *Runner) trackSchedulerLoop( logger *zap.Logger, init *schedwatch.SchedulerInfo, schedulerWatch schedwatch.SchedulerWatch, - registeredScheduler util.CondChannelSender, + newScheduler func(withLock func()), ) { // pre-declare a bunch of variables because we have some gotos here. var ( @@ -819,9 +717,9 @@ startScheduler: fatal = func() util.SignalReceiver { logger := logger.With(zap.Object("scheduler", currentInfo)) - // Print info about a new scheduler, unless this is the first one. + verb := "Setting" if init == nil || init.UID != currentInfo.UID { - logger.Info("Updating scheduler pod") + verb = "Updating" } sendFatal, recvFatal := util.NewSingleSignalPair() @@ -834,31 +732,14 @@ startScheduler: fatal: sendFatal, } - func() { - r.lock.Lock() - defer r.lock.Unlock() - - r.scheduler = sched - r.lastSchedulerError = nil - }() - - r.spawnBackgroundWorker(ctx, logger, "Scheduler.Register()", func(c context.Context, logger *zap.Logger) { - r.requestLock.Lock() - defer r.requestLock.Unlock() + r.lock.Lock() + defer r.lock.Unlock() - // It's possible for another thread to take responsibility for registering the - // scheduler, instead of us. Don't need to double-register. - if sched.registered { - return - } + newScheduler(func() { + logger.Info(fmt.Sprintf("%s scheduler pod", verb)) - if err := sched.Register(c, logger, registeredScheduler.Send); err != nil { - if c.Err() != nil { - logger.Warn("Error registering with scheduler (but context is done)", zap.Error(err)) - } else { - logger.Error("Error registering with scheduler", zap.Error(err)) - } - } + r.scheduler.Store(sched) + r.lastSchedulerError = nil }) return recvFatal @@ -881,20 +762,22 @@ startScheduler: r.lock.Lock() defer r.lock.Unlock() - if r.scheduler.info.UID != info.UID { + scheduler := r.scheduler.Load() + + if scheduler.info.UID != info.UID { logger.Info( "Scheduler candidate pod was deleted, but we aren't using it yet", - zap.Object("scheduler", r.scheduler.info), zap.Object("candidate", info), + zap.Object("scheduler", scheduler.info), zap.Object("candidate", info), ) return false } logger.Info( "Scheduler pod was deleted. Aborting further communication", - zap.Object("scheduler", r.scheduler.info), + zap.Object("scheduler", scheduler.info), ) - r.scheduler = nil + r.scheduler.Store(nil) return true }() @@ -975,10 +858,10 @@ func (r *Runner) doMetricsRequestIfEnabled( // nice to have have the guarantees around not racing. clearNewInformantSignal() - if r.server == nil || r.server.mode != InformantServerRunning { + if server := r.server.Load(); server == nil || server.mode != InformantServerRunning { var state = "unset" - if r.server != nil { - state = string(r.server.mode) + if server != nil { + state = string(server.mode) } logger.Info(fmt.Sprintf("Cannot make metrics request because informant server is %s", state)) @@ -1041,454 +924,7 @@ func (r *Runner) doMetricsRequestIfEnabled( return handle(body) } -// VMUpdateReason provides context to (*Runner).updateVMResources about why an update to the VM's -// resources has been requested -type VMUpdateReason string - -const ( - UpdatedMetrics VMUpdateReason = "metrics" - UpscaleRequested VMUpdateReason = "upscale requested" - RegisteredScheduler VMUpdateReason = "scheduler" - UpdatedVMInfo VMUpdateReason = "updated VM info" -) - -// atomicUpdateState holds some pre-validated data for (*Runner).updateVMResources, fetched -// atomically (i.e. all at once, while holding r.lock) with the (*Runner).atomicState method -// -// Because atomicState is able to return nil when there isn't yet enough information to update the -// VM's resources, some validation is already guaranteed by representing the data without pointers. -type atomicUpdateState struct { - computeUnit api.Resources - metrics api.Metrics - vm api.VmInfo - lastApproved api.Resources - requestedUpscale api.MoreResources - config api.ScalingConfig -} - -// updateVMResources is responsible for the high-level logic that orchestrates a single update to -// the VM's resources - or possibly just informing the scheduler that nothing's changed. -// -// This method sometimes returns nil if the reason we couldn't perform the update was solely because -// other information was missing (e.g., we haven't yet contacted a scheduler). In these cases, an -// appropriate message is logged. -func (r *Runner) updateVMResources( - ctx context.Context, - logger *zap.Logger, - reason VMUpdateReason, - clearUpdatedMetricsSignal func(), - clearNewSchedulerSignal func(), -) error { - // Acquiring this lock *may* take a while, so we'll allow it to be interrupted by ctx - // - // We'll need the lock for access to the scheduler and NeonVM, and holding it across all the - // request means that our logic can be a little simpler :) - if err := r.requestLock.TryLock(ctx); err != nil { - return err - } - defer r.requestLock.Unlock() - - logger.Info("Updating VM resources", zap.String("reason", string(reason))) - - // A /suspend request from a VM informant will wait until requestLock returns. So we're good to - // make whatever requests we need as long as the informant is here at the start. - // - // The reason we care about the informant server being "enabled" is that the VM informant uses - // it to ensure that there's at most one autoscaler-agent that's making requests on its behalf. - if err := r.validateInformant(); err != nil { - logger.Warn("Unable to update VM resources because informant server is disabled", zap.Error(err)) - return nil - } - - // state variables - var ( - start api.Resources // r.vm.Using(), at the time of the start of this function - for metrics. - target api.Resources - capped api.Resources // target, but capped by r.lastApproved - ) - - if r.schedulerRespondedWithMigration { - logger.Info("Aborting VM resource update because scheduler previously said VM is migrating") - return nil - } - - state, err := func() (*atomicUpdateState, error) { - r.lock.Lock() - defer r.lock.Unlock() - - clearUpdatedMetricsSignal() - - state := r.getStateForVMUpdate(logger, reason) - if state == nil { - // if state == nil, the reason why we can't do the operation was already logged. - return nil, nil - } else if r.scheduler != nil && r.scheduler.fatalError != nil { - logger.Warn("Unable to update VM resources because scheduler had a prior fatal error") - return nil, nil - } - - // Calculate the current and desired state of the VM - target = state.desiredVMState(true) // note: this sets the state value in the loop body - - current := state.vm.Using() - start = current - - msg := "Target VM state is equal to current" - if target != current { - msg = "Target VM state different from current" - } - logger.Info(msg, zap.Object("current", current), zap.Object("target", target)) - - // Check if there's resources that can (or must) be updated before talking to the scheduler. - // - // During typical operation, this only occurs when the target state corresponds to fewer - // compute units than the current state. However, this can also happen when: - // - // * lastApproved and target are both greater than the VM's state; or - // * VM's state doesn't match the compute unit and only one resource is being decreased - // - // To make handling these edge-cases smooth, the code here is more generic than typical - // operation requires. - - // note: r.atomicState already checks the validity of r.lastApproved - namely that it has no - // values less than r.vm.Using(). - capped = target.Min(state.lastApproved) // note: this sets the state value in the loop body - - return state, nil - }() - - // note: state == nil means that there's some other reason we couldn't do the operation that - // was already logged. - if err != nil || state == nil { - return err - } - - // If there's an update that can be done immediately, do it! Typically, capped will - // represent the resources we'd like to downscale. - if capped != state.vm.Using() { - // If our downscale gets rejected, calculate a new target - rejectedDownscale := func() (newTarget api.Resources, _ error) { - target = state.desiredVMState(false /* don't allow downscaling */) - return target.Min(state.lastApproved), nil - } - - nowUsing, err := r.doVMUpdate(ctx, logger, state.vm.Using(), capped, rejectedDownscale) - if err != nil { - return fmt.Errorf("Error doing VM update 1: %w", err) - } else if nowUsing == nil { - // From the comment above doVMUpdate: - // - // > If the VM informant is required and unavailable (or becomes unavailable), this - // > method will: return nil, nil; log an appropriate warning; and reset the VM's - // > state to its current value. - // - // So we should just return nil. We can't update right now, and there isn't anything - // left to log. - return nil - } - - state.vm.SetUsing(*nowUsing) - } - - // Fetch the scheduler, to (a) inform it of the current state, and (b) request an - // increase, if we want one. - sched := func() *Scheduler { - r.lock.Lock() - defer r.lock.Unlock() - - clearNewSchedulerSignal() - return r.scheduler - }() - - // If we can't reach the scheduler, then we've already done everything we can. Emit a - // warning and exit. We'll get notified to retry when a new one comes online. - if sched == nil { - logger.Warn("Unable to complete updating VM resources", zap.Error(errors.New("no scheduler registered"))) - return nil - } - - // If the scheduler isn't registered yet, then either the initial register request failed, or it - // hasn't gotten a chance to send it yet. - if !sched.registered { - if err := sched.Register(ctx, logger, func() {}); err != nil { - logger.Error("Error registering with scheduler", zap.Object("scheduler", sched.info), zap.Error(err)) - logger.Warn("Unable to complete updating VM resources", zap.Error(errors.New("scheduler Register request failed"))) - return nil - } - } - - r.recordResourceChange(start, target, r.global.metrics.schedulerRequestedChange) - - request := api.AgentRequest{ - ProtoVersion: PluginProtocolVersion, - Pod: r.podName, - Resources: target, - Metrics: &state.metrics, // FIXME: the metrics here *might* be a little out of date. - } - response, err := sched.DoRequest(ctx, logger, &request) - if err != nil { - logger.Error("Scheduler request failed", zap.Object("scheduler", sched.info), zap.Error(err)) - logger.Warn("Unable to complete updating VM resources", zap.Error(errors.New("scheduler request failed"))) - return nil - } else if response.Migrate != nil { - // info about migration has already been logged by DoRequest - return nil - } - - permit := response.Permit - r.recordResourceChange(start, permit, r.global.metrics.schedulerApprovedChange) - - // sched.DoRequest should have validated the permit, meaning that it's not less than the - // current resource usage. - vmUsing := state.vm.Using() - if permit.HasFieldLessThan(vmUsing) { - panic(errors.New("invalid state: permit less than what's in use")) - } else if permit.HasFieldGreaterThan(target) { - panic(errors.New("invalid state: permit greater than target")) - } - - if permit == vmUsing { - if vmUsing != target { - logger.Info("Scheduler denied increase, staying at current", zap.Object("current", vmUsing)) - } - - // nothing to do - return nil - } else /* permit > vmUsing */ { - if permit != target { - logger.Warn("Scheduler capped increase to permit", zap.Object("permit", permit)) - } else { - logger.Info("Scheduler allowed increase to permit", zap.Object("permit", permit)) - } - - rejectedDownscale := func() (newTarget api.Resources, _ error) { - panic(errors.New("rejectedDownscale called but request should be increasing, not decreasing")) - } - if _, err := r.doVMUpdate(ctx, logger, vmUsing, permit, rejectedDownscale); err != nil { - return fmt.Errorf("Error doing VM update 2: %w", err) - } - - return nil - } -} - -// getStateForVMUpdate produces the atomicUpdateState for updateVMResources -// -// This method MUST be called while holding r.lock. -func (r *Runner) getStateForVMUpdate(logger *zap.Logger, updateReason VMUpdateReason) *atomicUpdateState { - if r.lastMetrics == nil { - if updateReason == UpdatedMetrics { - panic(errors.New("invalid state: metrics signalled but r.lastMetrics == nil")) - } - - logger.Warn("Unable to update VM resources because we haven't received metrics yet") - return nil - } else if r.computeUnit == nil { - if updateReason == RegisteredScheduler { - // note: the scheduler that was registered might not be the scheduler we just got! - // However, r.computeUnit is never supposed to go from non-nil to nil, so that doesn't - // actually matter. - panic(errors.New("invalid state: registered scheduler signalled but r.computeUnit == nil")) - } - - // note: as per the docs on r.computeUnit, this should only occur when we haven't yet talked - // to a scheduler. - logger.Warn("Unable to update VM resources because r.computeUnit hasn't been set yet") - return nil - } else if r.lastApproved == nil { - panic(errors.New("invalid state: r.computeUnit != nil but r.lastApproved == nil")) - } - - // Check that the VM's current usage is <= lastApproved - if vmUsing := r.vm.Using(); vmUsing.HasFieldGreaterThan(*r.lastApproved) { - panic(fmt.Errorf( - "invalid state: r.vm has resources greater than r.lastApproved (%+v vs %+v)", - vmUsing, *r.lastApproved, - )) - } - - config := r.global.config.Scaling.DefaultConfig - if r.vm.ScalingConfig != nil { - config = *r.vm.ScalingConfig - } - - return &atomicUpdateState{ - computeUnit: *r.computeUnit, - metrics: *r.lastMetrics, - vm: r.vm, - lastApproved: *r.lastApproved, - requestedUpscale: r.requestedUpscale, - config: config, - } -} - -// desiredVMState calculates what the resource allocation to the VM should be, given the metrics and -// current state. -func (s *atomicUpdateState) desiredVMState(allowDecrease bool) api.Resources { - // There's some annoying edge cases that this function has to be able to handle properly. For - // the sake of completeness, they are: - // - // 1. s.vm.Using() is not a multiple of s.computeUnit - // 2. s.vm.Max() is less than s.computeUnit (or: has at least one resource that is) - // 3. s.vm.Using() is a fractional multiple of s.computeUnit, but !allowDecrease and rounding up - // is greater than s.vm.Max() - // 4. s.vm.Using() is much larger than s.vm.Min() and not a multiple of s.computeUnit, but load - // is low so we should just decrease *anyways*. - // - // --- - // - // Broadly, the implementation works like this: - // 1. Based on load average, calculate the "goal" number of CPUs (and therefore compute units) - // 2. Cap the goal CU by min/max, etc - // 3. that's it! - - // Goal compute unit is at the point where (CPUs) × (LoadAverageFractionTarget) == (load - // average), - // which we can get by dividing LA by LAFT. - goalCU := uint32(math.Round(float64(s.metrics.LoadAverage1Min) / s.config.LoadAverageFractionTarget)) - - // Update goalCU based on any requested upscaling - goalCU = util.Max(goalCU, s.requiredCUForRequestedUpscaling()) - - // new CU must be >= current CU if !allowDecrease - if !allowDecrease { - _, upperBoundCU := s.computeUnitsBounds() - goalCU = util.Max(goalCU, upperBoundCU) - } - - // resources for the desired "goal" compute units - goal := s.computeUnit.Mul(uint16(goalCU)) - - // bound goal by the minimum and maximum resource amounts for the VM - result := goal.Min(s.vm.Max()).Max(s.vm.Min()) - - // Check that the result is sound. - // - // With the current (naive) implementation, this is trivially ok. In future versions, it might - // not be so simple, so it's good to have this integrity check here. - if result.HasFieldGreaterThan(s.vm.Max()) { - panic(fmt.Errorf( - "produced invalid desiredVMState: result has field greater than max. this = %+v", *s, - )) - } else if result.HasFieldLessThan(s.vm.Min()) { - panic(fmt.Errorf( - "produced invalid desiredVMState: result has field less than min. this = %+v", *s, - )) - } - - return result -} - -// computeUnitsBounds returns the minimum and maximum number of Compute Units required to fit each -// resource for the VM's current allocation -// -// Under typical operation, this will just return two equal values, both of which are equal to the -// VM's current number of Compute Units. However, if the VM's resource allocation doesn't cleanly -// divide to a multiple of the Compute Unit, the upper and lower bounds will be different. This can -// happen when the Compute Unit is changed, or when the VM's maximum or minimum resource allocations -// has previously prevented it from being set to a multiple of the Compute Unit. -func (s *atomicUpdateState) computeUnitsBounds() (uint32, uint32) { - // (x + M-1) / M is equivalent to ceil(x/M), as long as M != 0, which is already guaranteed by - // the - minCPUUnits := (uint32(s.vm.Cpu.Use) + uint32(s.computeUnit.VCPU) - 1) / uint32(s.computeUnit.VCPU) - minMemUnits := uint32((s.vm.Mem.Use + s.computeUnit.Mem - 1) / s.computeUnit.Mem) - - return util.Min(minCPUUnits, minMemUnits), util.Max(minCPUUnits, minMemUnits) -} - -// requiredCUForRequestedUpscaling returns the minimum Compute Units required to abide by the -// requested upscaling, if there is any. -// -// If there's no requested upscaling, then this method will return zero. -// -// This method does not respect any bounds on Compute Units placed by the VM's maximum or minimum -// resource allocation. -func (s *atomicUpdateState) requiredCUForRequestedUpscaling() uint32 { - var required uint32 - - // note: floor(x / M) + 1 gives the minimum integer value greater than x / M. - - if s.requestedUpscale.Cpu { - required = util.Max(required, uint32(s.vm.Cpu.Use/s.computeUnit.VCPU)+1) - } - if s.requestedUpscale.Memory { - required = util.Max(required, uint32(s.vm.Mem.Use/s.computeUnit.Mem)+1) - } - - return required -} - -// doVMUpdate handles updating the VM's resources from current to target WITHOUT CHECKING WITH THE -// SCHEDULER. It is the caller's responsibility to ensure that target is not greater than -// r.lastApproved, and check with the scheduler if necessary. -// -// If the VM informant is required and unavailable (or becomes unavailable), this method will: -// return nil, nil; log an appropriate warning; and reset the VM's state to its current value. -// -// If some resources in target are less than current, and the VM informant rejects the proposed -// downscaling, rejectedDownscale will be called. If it returns an error, that error will be -// returned and the update will be aborted. Otherwise, the returned newTarget will be used. -// -// This method MUST be called while holding r.requestLock AND NOT r.lock. -func (r *Runner) doVMUpdate( - ctx context.Context, - logger *zap.Logger, - current api.Resources, - target api.Resources, - rejectedDownscale func() (newTarget api.Resources, _ error), -) (*api.Resources, error) { - logger.Info("Attempting VM update", zap.Object("current", current), zap.Object("target", target)) - - // helper handling function to reset r.vm to reflect the actual current state. Must not be - // called while holding r.lock. - resetVMTo := func(amount api.Resources) { - r.lock.Lock() - defer r.lock.Unlock() - - r.vm.SetUsing(amount) - } - - if err := r.validateInformant(); err != nil { - logger.Warn("Aborting VM update because informant server is not valid", zap.Error(err)) - resetVMTo(current) - return nil, nil - } - - // If there's any fields that are being downscaled, request that from the VM informant. - downscaled := current.Min(target) - if downscaled != current { - r.recordResourceChange(current, downscaled, r.global.metrics.informantRequestedChange) - - resp, err := r.doInformantDownscale(ctx, logger, downscaled) - if err != nil || resp == nil /* resp = nil && err = nil when the error has been handled */ { - return nil, err - } - - if resp.Ok { - r.recordResourceChange(current, downscaled, r.global.metrics.informantApprovedChange) - } else { - newTarget, err := rejectedDownscale() - if err != nil { - resetVMTo(current) - return nil, err - } else if newTarget.HasFieldLessThan(current) { - panic(fmt.Errorf( - "rejectedDownscale returned new target less than current: %+v has field less than %+v", - newTarget, current, - )) - } - - if newTarget != target { - logger.Info("VM update: rejected downscale changed target", zap.Object("target", newTarget)) - } - - target = newTarget - } - } - - r.recordResourceChange(downscaled, target, r.global.metrics.neonvmRequestedChange) - - // Make the NeonVM request +func (r *Runner) doNeonVMRequest(ctx context.Context, target api.Resources) error { patches := []util.JSONPatch{{ Op: util.PatchReplace, Path: "/spec/guest/cpus/use", @@ -1499,8 +935,6 @@ func (r *Runner) doVMUpdate( Value: target.Mem, }} - logger.Info("Making NeonVM request for resources", zap.Object("target", target), zap.Any("patches", patches)) - patchPayload, err := json.Marshal(patches) if err != nil { panic(fmt.Errorf("Error marshalling JSON patch: %w", err)) @@ -1516,58 +950,13 @@ func (r *Runner) doVMUpdate( _, err = r.global.vmClient.NeonvmV1().VirtualMachines(r.vm.Namespace). Patch(requestCtx, r.vm.Name, ktypes.JSONPatchType, patchPayload, metav1.PatchOptions{}) - // We couldn't update the VM if err != nil { r.global.metrics.neonvmRequestsOutbound.WithLabelValues(fmt.Sprintf("[error: %s]", util.RootError(err))).Inc() - - // If the context was cancelled, we generally don't need to worry about whether setting r.vm - // back to current is sound. All operations on this VM are done anyways. - if ctx.Err() != nil { - resetVMTo(current) // FIXME: yeah, even though the comment above says "don't worry", maybe worry? - return nil, fmt.Errorf("Error making VM patch request: %w", err) - } - - // Otherwise, something went wrong *in the request itself*. This probably leaves us in an - // inconsistent state, so we're best off ending all further operations. The correct way to - // fatally error is by panicking - our infra here ensures it won't take down any other - // runners. - panic(fmt.Errorf("Unexpected VM patch request failure: %w", err)) + return err } r.global.metrics.neonvmRequestsOutbound.WithLabelValues("ok").Inc() - - // We scaled. If we run into an issue around further communications with the informant, then - // it'll be left with an inconsistent state - there's not really anything we can do about that, - // unfortunately. - resetVMTo(target) - - upscaled := target // we already handled downscaling; only upscaling can be left - if upscaled.HasFieldGreaterThan(current) { - // Unset fields in r.requestedUpscale if we've handled it. - // - // Essentially, for each field F, set: - // - // r.requestedUpscale.F = r.requestedUpscale && !(upscaled.F > current.F) - func() { - r.lock.Lock() - defer r.lock.Unlock() - - r.requestedUpscale = r.requestedUpscale.And(upscaled.IncreaseFrom(current).Not()) - }() - - r.recordResourceChange(downscaled, upscaled, r.global.metrics.informantRequestedChange) - - if ok, err := r.doInformantUpscale(ctx, logger, upscaled); err != nil || !ok { - return nil, err - } - - r.recordResourceChange(downscaled, upscaled, r.global.metrics.informantApprovedChange) - } - - logger.Info("Updated VM resources", zap.Object("current", current), zap.Object("target", target)) - - // Everything successful. - return &target, nil + return nil } func (r *Runner) recordResourceChange(current, target api.Resources, metrics resourceChangePair) { @@ -1601,141 +990,26 @@ func (r *Runner) recordResourceChange(current, target api.Resources, metrics res } } -// validateInformant checks that the Runner's informant server is present AND active (i.e. not -// suspended). -// -// If either condition is false, this method returns error. This is typically used to check that the -// Runner is enabled before making a request to NeonVM or the scheduler, in which case holding -// r.requestLock is advised. -// -// This method MUST NOT be called while holding r.lock. -func (r *Runner) validateInformant() error { - r.lock.Lock() - defer r.lock.Unlock() - - if r.server == nil { - return errors.New("no informant server set") - } - return r.server.valid() -} - -// doInformantDownscale is a convenience wrapper around (*InformantServer).Downscale that locks r, -// checks if r.server is nil, and does the request. -// -// Some errors are logged by this method instead of being returned. If that happens, this method -// returns nil, nil. -// -// This method MUST NOT be called while holding r.lock. -func (r *Runner) doInformantDownscale(ctx context.Context, logger *zap.Logger, to api.Resources) (*api.DownscaleResult, error) { - msg := "Error requesting informant downscale" - - server := func() *InformantServer { - r.lock.Lock() - defer r.lock.Unlock() - return r.server - }() - if server == nil { - return nil, fmt.Errorf("%s: InformantServer is not set (this should not occur after startup)", msg) - } - - resp, err := server.Downscale(ctx, logger, to) - if err != nil { - if IsNormalInformantError(err) { - logger.Warn(msg, zap.Object("server", server.desc), zap.Error(err)) - return nil, nil - } else { - return nil, fmt.Errorf("%s: %w", msg, err) - } - } - - return resp, nil -} - -// doInformantDownscale is a convenience wrapper around (*InformantServer).Upscale that locks r, -// checks if r.server is nil, and does the request. -// -// Some errors are logged by this method instead of being returned. If that happens, this method -// returns false, nil. -// -// This method MUST NOT be called while holding r.lock. -func (r *Runner) doInformantUpscale(ctx context.Context, logger *zap.Logger, to api.Resources) (ok bool, _ error) { - msg := "Error notifying informant of upscale" - - server := func() *InformantServer { - r.lock.Lock() - defer r.lock.Unlock() - return r.server - }() - if server == nil { - return false, fmt.Errorf("%s: InformantServer is not set (this should not occur after startup)", msg) - } - - if err := server.Upscale(ctx, logger, to); err != nil { - if IsNormalInformantError(err) { - logger.Warn(msg, zap.Error(err)) - return false, nil - } else { - return false, fmt.Errorf("%s: %w", msg, err) - } - } - - return true, nil -} - -// Register performs the initial request required to register with a scheduler -// -// This method is called immediately after the Scheduler is created, and may be called -// subsequent times if the initial request fails. -// -// signalOk will be called if the request succeeds, with s.runner.lock held - but only if -// s.runner.scheduler == s. -// -// This method MUST be called while holding s.runner.requestLock AND NOT s.runner.lock -func (s *Scheduler) Register(ctx context.Context, logger *zap.Logger, signalOk func()) error { - metrics, resources := func() (*api.Metrics, api.Resources) { - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - return s.runner.lastMetrics, s.runner.vm.Using() - }() - - req := api.AgentRequest{ +// DoRequest sends a request to the scheduler and does not validate the response. +func (s *Scheduler) DoRequest( + ctx context.Context, + logger *zap.Logger, + resources api.Resources, + metrics *api.Metrics, +) (_ *api.PluginResponse, err error) { + reqData := &api.AgentRequest{ ProtoVersion: PluginProtocolVersion, Pod: s.runner.podName, Resources: resources, Metrics: metrics, } - if _, err := s.DoRequest(ctx, logger, &req); err != nil { - return err - } - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - s.registered = true - if s.runner.scheduler == s { - signalOk() - } - - return nil -} -// SendRequest implements all of the tricky logic for requests sent to the scheduler plugin -// -// This method checks: -// * That the response is semantically valid -// * That the response matches with the state of s.runner.vm, if s.runner.scheduler == s -// -// This method may set: -// - s.fatalError -// - s.runner.{computeUnit,lastApproved,lastSchedulerError,schedulerRespondedWithMigration}, -// if s.runner.scheduler == s. -// -// This method MAY ALSO call s.runner.shutdown(), if s.runner.scheduler == s. -// -// This method MUST be called while holding s.runner.requestLock AND NOT s.runner.lock. -func (s *Scheduler) DoRequest(ctx context.Context, logger *zap.Logger, reqData *api.AgentRequest) (*api.PluginResponse, error) { - logger = logger.With(zap.Object("scheduler", s.info)) + // make sure we log any error we're returning: + defer func() { + if err != nil { + logger.Error("Scheduler request failed", zap.Error(err)) + } + }() reqBody, err := json.Marshal(reqData) if err != nil { @@ -1754,7 +1028,7 @@ func (s *Scheduler) DoRequest(ctx context.Context, logger *zap.Logger, reqData * } request.Header.Set("content-type", "application/json") - logger.Info("Sending AgentRequest", zap.Any("request", reqData)) + logger.Info("Sending request to scheduler", zap.Any("request", reqData)) response, err := http.DefaultClient.Do(request) if err != nil { @@ -1794,89 +1068,11 @@ func (s *Scheduler) DoRequest(ctx context.Context, logger *zap.Logger, reqData * return nil, s.handleRequestError(reqData, fmt.Errorf("Bad JSON response: %w", err)) } - logger.Info("Received PluginResponse", zap.Any("response", respData)) - - s.runner.lock.Lock() - locked := true - defer func() { - if locked { - s.runner.lock.Unlock() - } - }() - - if err := s.validatePluginResponse(logger, reqData, &respData); err != nil { - // Must unlock before handling because it's required by validatePluginResponse, but - // handleFatalError is required not to have it. - locked = false - s.runner.lock.Unlock() - - // Fatal, because an invalid response indicates mismatched state, so we can't assume - // anything about the plugin's state. - return nil, s.handleFatalError(reqData, fmt.Errorf("Semantically invalid response: %w", err)) - } - - // if this scheduler is still current, update all the relevant fields in s.runner - if s.runner.scheduler == s { - s.runner.computeUnit = &respData.ComputeUnit - s.runner.lastApproved = &respData.Permit - s.runner.lastSchedulerError = nil - if respData.Migrate != nil { - logger.Info("Shutting down Runner because scheduler response indicated migration started") - s.runner.schedulerRespondedWithMigration = true - s.runner.shutdown() - } - } + logger.Info("Received response from scheduler", zap.Any("response", respData)) return &respData, nil } -// validatePluginResponse checks that the PluginResponse is valid for the AgentRequest that was -// sent. -// -// This method will not update any fields in s or s.runner. -// -// This method MUST be called while holding s.runner.requestLock AND s.runner.lock. -func (s *Scheduler) validatePluginResponse( - logger *zap.Logger, - req *api.AgentRequest, - resp *api.PluginResponse, -) error { - isCurrent := s.runner.scheduler == s - - if err := req.Resources.ValidateNonZero(); err != nil { - panic(fmt.Errorf("we created an invalid AgentRequest.Resources: %w", err)) - } - - // Errors from resp alone - if err := resp.Permit.ValidateNonZero(); err != nil { - return fmt.Errorf("Invalid permit: %w", err) - } - if err := resp.ComputeUnit.ValidateNonZero(); err != nil { - return fmt.Errorf("Invalid compute unit: %w", err) - } - - // Errors from resp in connection with the prior request - if resp.Permit.HasFieldGreaterThan(req.Resources) { - return fmt.Errorf( - "Permit has resources greater than request (%+v vs. %+v)", - resp.Permit, req.Resources, - ) - } - - // Errors from resp in connection with the prior request AND the VM state - if isCurrent { - if vmUsing := s.runner.vm.Using(); resp.Permit.HasFieldLessThan(vmUsing) { - return fmt.Errorf("Permit has resources less than VM (%+v vs %+v)", resp.Permit, vmUsing) - } - } - - if !isCurrent && resp.Migrate != nil { - logger.Warn("scheduler is no longer current, but its response signalled migration") - } - - return nil -} - // handlePreRequestError appropriately handles updating the Scheduler and its Runner's state to // reflect that an error occurred. It returns the error passed to it // @@ -1891,7 +1087,7 @@ func (s *Scheduler) handlePreRequestError(err error) error { s.runner.lock.Lock() defer s.runner.lock.Unlock() - if s.runner.scheduler == s { + if s.runner.scheduler.Load() == s { s.runner.lastSchedulerError = err } @@ -1912,7 +1108,7 @@ func (s *Scheduler) handleRequestError(req *api.AgentRequest, err error) error { s.runner.lock.Lock() defer s.runner.lock.Unlock() - if s.runner.scheduler == s { + if s.runner.scheduler.Load() == s { s.runner.lastSchedulerError = err // Because downscaling s.runner.vm must be done before any request that decreases its @@ -1947,7 +1143,7 @@ func (s *Scheduler) handleFatalError(req *api.AgentRequest, err error) error { s.fatalError = err - if s.runner.scheduler == s { + if s.runner.scheduler.Load() == s { s.runner.lastSchedulerError = err // for reasoning on lastApproved, see handleRequestError. lastApproved := s.runner.vm.Using() diff --git a/pkg/plugin/config.go b/pkg/plugin/config.go index cfb7b1090..40f3ff093 100644 --- a/pkg/plugin/config.go +++ b/pkg/plugin/config.go @@ -233,6 +233,7 @@ func (c *nodeConfig) vCpuLimits(total *resource.Quantity) (_ nodeResourceState[v System: vmapi.MilliCPU(systemCpus * 1000), Watermark: vmapi.MilliCPU(c.Cpu.Watermark * float32(reservableCpus) * 1000), Reserved: 0, + Buffer: 0, CapacityPressure: 0, PressureAccountedFor: 0, }, margin, nil @@ -277,6 +278,7 @@ func (c *nodeConfig) memoryLimits( System: uint16(systemSlots), Watermark: uint16(c.Memory.Watermark * float32(reservableSlots)), Reserved: 0, + Buffer: 0, CapacityPressure: 0, PressureAccountedFor: 0, }, margin, nil diff --git a/pkg/util/broadcast.go b/pkg/util/broadcast.go new file mode 100644 index 000000000..a11d8e03a --- /dev/null +++ b/pkg/util/broadcast.go @@ -0,0 +1,72 @@ +package util + +// A channel-based sync.Cond-like interface, with support for broadcast operations (but some +// additional restrictions) + +import ( + "sync" +) + +func NewBroadcaster() *Broadcaster { + return &Broadcaster{ + mu: sync.Mutex{}, + ch: make(chan struct{}, 1), + sent: 0, + } +} + +type Broadcaster struct { + mu sync.Mutex + ch chan struct{} + + sent uint64 +} + +type BroadcastReceiver struct { + b *Broadcaster + + viewed uint64 +} + +func (b *Broadcaster) Broadcast() { + b.mu.Lock() + defer b.mu.Unlock() + + close(b.ch) + b.ch = make(chan struct{}, 1) + b.sent += 1 +} + +func (b *Broadcaster) NewReceiver() BroadcastReceiver { + b.mu.Lock() + defer b.mu.Unlock() + + return BroadcastReceiver{ + b: b, + viewed: b.sent, + } +} + +var closedChannel = func() <-chan struct{} { + ch := make(chan struct{}) + close(ch) + return ch +}() + +func (r *BroadcastReceiver) Wait() <-chan struct{} { + r.b.mu.Lock() + defer r.b.mu.Unlock() + + if r.b.sent == r.viewed { + return r.b.ch + } else { + return closedChannel + } +} + +func (r *BroadcastReceiver) Awake() { + r.b.mu.Lock() + defer r.b.mu.Unlock() + + r.viewed = r.b.sent +} diff --git a/pkg/util/watch/watch.go b/pkg/util/watch/watch.go index c8b319f89..859c38b96 100644 --- a/pkg/util/watch/watch.go +++ b/pkg/util/watch/watch.go @@ -149,12 +149,14 @@ func Watch[C Client[L], L metav1.ListMetaAccessor, T any, P Object[T]]( sendStop, stopSignal := util.NewSingleSignalPair() store := Store[T]{ + mutex: sync.Mutex{}, objects: make(map[types.UID]*T), triggerRelist: make(chan struct{}, 1), // ensure sends are non-blocking relisted: make(chan struct{}), nextIndexID: 0, indexes: make(map[uint64]Index[T]), stopSignal: sendStop, + stopped: atomic.Bool{}, } items := accessors.Items(initialList) From 91d6cc85d069a799271059e73c425a2544164378 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 28 Sep 2023 16:54:03 -0700 Subject: [PATCH 02/59] Remove make test/build/run dependence on `go vet` --- Makefile | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 9e51582dc..6c66f2e12 100644 --- a/Makefile +++ b/Makefile @@ -96,15 +96,12 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust fmt: ## Run go fmt against code. go fmt ./... -.PHONY: vet -vet: ## Run go vet against code. - # `go vet` requires gcc - # ref https://github.com/golang/go/issues/56755 - CGO_ENABLED=0 go vet ./... - +.PHONY: lint +lint: ## Run golangci-lint against code. + golangci-lint run .PHONY: test -test: fmt vet envtest ## Run tests. +test: fmt envtest ## Run tests. # chmodding KUBEBUILDER_ASSETS dir to make it deletable by owner, # otherwise it fails with actions/checkout on self-hosted GitHub runners # ref: https://github.com/kubernetes-sigs/controller-runtime/pull/2245 @@ -116,7 +113,7 @@ test: fmt vet envtest ## Run tests. ##@ Build .PHONY: build -build: fmt vet bin/vm-builder bin/vm-builder-generic ## Build all neonvm binaries. +build: fmt bin/vm-builder bin/vm-builder-generic ## Build all neonvm binaries. go build -o bin/controller neonvm/main.go go build -o bin/vxlan-controller neonvm/tools/vxlan/controller/main.go go build -o bin/runner neonvm/runner/main.go @@ -130,7 +127,7 @@ bin/vm-builder-generic: ## Build vm-builder-generic binary. CGO_ENABLED=0 go build -o bin/vm-builder-generic neonvm/tools/vm-builder-generic/main.go .PHONY: run -run: fmt vet ## Run a controller from your host. +run: fmt ## Run a controller from your host. go run ./neonvm/main.go .PHONY: vm-monitor From 1021a78dd51404a43d0663f783ebbdd9c39d5b32 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 28 Sep 2023 17:58:48 -0700 Subject: [PATCH 03/59] Update (*core.State).DesiredResources... for recent changes in main --- pkg/agent/core/state.go | 53 ++++++++++++++++++++++++++++++------ pkg/agent/core/state_test.go | 2 +- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 637c8eb01..8f0cc2c67 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -404,26 +404,53 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling() api.Resources // --- // // Broadly, the implementation works like this: - // 1. Based on load average, calculate the "goal" number of CPUs (and therefore compute units) + // For CPU: + // Based on load average, calculate the "goal" number of CPUs (and therefore compute units) + // + // For Memory: + // Based on memory usage, calculate the VM's desired memory allocation and extrapolate a + // goal number of CUs from that. + // + // 1. Take the maximum of these two goal CUs to create a unified goal CU // 2. Cap the goal CU by min/max, etc // 3. that's it! - // If we don't know + // if we don't know what the compute unit is, don't do anything. if s.plugin.computeUnit == nil { return s.vm.Using() } var goalCU uint32 if s.metrics != nil { + // For CPU: // Goal compute unit is at the point where (CPUs) × (LoadAverageFractionTarget) == (load // average), - // which we can get by dividing LA by LAFT. - goalCU = uint32(math.Round(float64(s.metrics.LoadAverage1Min) / s.scalingConfig().LoadAverageFractionTarget)) + // which we can get by dividing LA by LAFT, and then dividing by the number of CPUs per CU + goalCPUs := float64(s.metrics.LoadAverage1Min) / s.scalingConfig().LoadAverageFractionTarget + cpuGoalCU := uint32(math.Round(goalCPUs / s.plugin.computeUnit.VCPU.AsFloat64())) + + // For Mem: + // Goal compute unit is at the point where (Mem) * (MemoryUsageFractionTarget) == (Mem Usage) + // We can get the desired memory allocation in bytes by dividing MU by MUFT, and then convert + // that to CUs + // + // NOTE: use uint64 for calculations on bytes as uint32 can overflow + memGoalBytes := uint64(math.Round(float64(s.metrics.MemoryUsageBytes) / s.scalingConfig().MemoryUsageFractionTarget)) + bytesPerCU := uint64(int64(s.plugin.computeUnit.Mem) * s.vm.Mem.SlotSize.Value()) + memGoalCU := uint32(memGoalBytes / bytesPerCU) + + goalCU = util.Max(cpuGoalCU, memGoalCU) } // Update goalCU based on any requested upscaling goalCU = util.Max(goalCU, s.requiredCUForRequestedUpscaling(*s.plugin.computeUnit)) + // // new CU must be >= current CU if !allowDecrease + // if !allowDecrease { + // _, upperBoundCU := s.computeUnitsBounds() + // goalCU = util.Max(goalCU, upperBoundCU) + // } + // resources for the desired "goal" compute units var goalResources api.Resources @@ -435,20 +462,30 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling() api.Resources goalResources = s.plugin.computeUnit.Mul(uint16(goalCU)) } - // bound goal by the minimum and maximum resource amounts for the VM + // bound goalResources by the minimum and maximum resource amounts for the VM result := goalResources.Min(s.vm.Max()).Max(s.vm.Min()) + // FIXME: re-enable this logic. + // // If no decreases are allowed, then we *must* make sure that the VM's usage value has not + // // decreased, even if it's greater than the VM maximum. + // // + // // We can run into situtations like this when VM scale-down on bounds change fails, so we end up + // // with a usage value greater than the maximum. + // if !allowDecrease { + // result = result.Max(s.VM.Using()) + // } + // Check that the result is sound. // // With the current (naive) implementation, this is trivially ok. In future versions, it might // not be so simple, so it's good to have this integrity check here. - if result.HasFieldGreaterThan(s.vm.Max()) { + if /* FIXME: re-enable: allowDecrease && */ result.HasFieldGreaterThan(s.vm.Max()) { panic(fmt.Errorf( - "produced invalid desiredVMState: result has field greater than max. this = %+v", s, + "produced invalid desired state: result has field greater than max. this = %+v", *s, )) } else if result.HasFieldLessThan(s.vm.Min()) { panic(fmt.Errorf( - "produced invalid desiredVMState: result has field less than min. this = %+v", s, + "produced invalid desired state: result has field less than min. this = %+v", *s, )) } diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 8abc19c7e..37b7aeba9 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -10,7 +10,7 @@ import ( "github.com/neondatabase/autoscaling/pkg/api" ) -func Test_desiredVMState(t *testing.T) { +func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { cases := []struct { name string From 5f4ff2eacae00c6bac05faac86fabd73a1e93dab Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 28 Sep 2023 18:08:05 -0700 Subject: [PATCH 04/59] fix core/state_test State initialization --- pkg/agent/core/state_test.go | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 37b7aeba9..7d559a9d9 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -15,9 +15,10 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { name string // helpers for setting fields (ish) of State: - metrics api.Metrics - vmUsing api.Resources - requestedUpscale api.MoreResources + metrics api.Metrics + vmUsing api.Resources + schedulerApproved api.Resources + requestedUpscale api.MoreResources // expected output from (*State).DesiredResourcesFromMetricsOrRequestedUpscaling() expected api.Resources @@ -29,8 +30,9 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { LoadAverage5Min: 0.0, // unused MemoryUsageBytes: 0.0, }, - vmUsing: api.Resources{VCPU: 250, Mem: 1}, - requestedUpscale: api.MoreResources{Cpu: false, Memory: false}, + vmUsing: api.Resources{VCPU: 250, Mem: 1}, + schedulerApproved: api.Resources{VCPU: 250, Mem: 1}, + requestedUpscale: api.MoreResources{Cpu: false, Memory: false}, expected: api.Resources{VCPU: 500, Mem: 2}, }, @@ -70,10 +72,25 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { }, ) - // set the metrics - state.UpdateMetrics(c.metrics) + computeUnit := api.Resources{VCPU: 250, Mem: 1} t.Run(c.name, func(t *testing.T) { + // set the metrics + state.UpdateMetrics(c.metrics) + + // set the compute unit and lastApproved by simulating a scheduler request/response + state.Plugin().NewScheduler() + state.Plugin().StartingRequest(time.Now(), c.schedulerApproved) + err := state.Plugin().RequestSuccessful(time.Now(), api.PluginResponse{ + Permit: c.schedulerApproved, + Migrate: nil, + ComputeUnit: computeUnit, + }) + if err != nil { + t.Errorf("state.Plugin().RequestSuccessful() failed: %s", err) + return + } + actual := state.DesiredResourcesFromMetricsOrRequestedUpscaling() if actual != c.expected { t.Errorf("expected output %+v but got %+v", c.expected, actual) From e2b7075b392987eb32af596c8185060a637af51f Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 28 Sep 2023 18:51:34 -0700 Subject: [PATCH 05/59] small improvement to computeUnit availability --- pkg/agent/core/state.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 8f0cc2c67..98edc23c9 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -417,6 +417,7 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling() api.Resources // if we don't know what the compute unit is, don't do anything. if s.plugin.computeUnit == nil { + s.config.Warn("Can't determine desired resources because compute unit hasn't been set yet") return s.vm.Using() } @@ -569,7 +570,7 @@ func (h PluginHandle) SchedulerGone() { h.s.plugin = pluginState{ alive: false, ongoingRequest: false, - computeUnit: nil, + computeUnit: h.s.plugin.computeUnit, lastRequest: nil, permit: h.s.plugin.permit, // Keep this; trust the previous scheduler. } From bf18987a36e97485da1543c2addda7f4d57338a3 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 28 Sep 2023 19:39:28 -0700 Subject: [PATCH 06/59] switch arithmetic ordering --- pkg/agent/core/state.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 98edc23c9..1795e0a0b 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -503,13 +503,13 @@ func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources) uint3 var required uint32 requested := s.monitor.requestedUpscale.requested - // note: floor(x / M) + 1 gives the minimum integer value greater than x / M. + // note: 1 + floor(x / M) gives the minimum integer value greater than x / M. if requested.Cpu { - required = util.Max(required, uint32(s.vm.Cpu.Use/computeUnit.VCPU)+1) + required = util.Max(required, 1+uint32(s.vm.Cpu.Use/computeUnit.VCPU)) } if requested.Memory { - required = util.Max(required, uint32(s.vm.Mem.Use/computeUnit.Mem)+1) + required = util.Max(required, 1+uint32(s.vm.Mem.Use/computeUnit.Mem)) } return required From 7ef0d41309fd4fed4c9f7e430dc34732384e069c Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 28 Sep 2023 19:41:23 -0700 Subject: [PATCH 07/59] respect vm-monitor's denied downscaling This commit was actually really fun to write. By doing it this way, we actually automatically get "retry with a slightly larger downscale" for free, and hooking everything up was super simple! Also, while we're here, it's worth adding back the other tests from before this PR, now that they pass. --- pkg/agent/core/dumpstate.go | 2 + pkg/agent/core/state.go | 83 ++++++++++++++++++++++-------- pkg/agent/core/state_test.go | 67 ++++++++++++++++++++++-- pkg/agent/executor/exec_monitor.go | 2 +- 4 files changed, 128 insertions(+), 26 deletions(-) diff --git a/pkg/agent/core/dumpstate.go b/pkg/agent/core/dumpstate.go index 03c06dce6..bbb833e5e 100644 --- a/pkg/agent/core/dumpstate.go +++ b/pkg/agent/core/dumpstate.go @@ -88,6 +88,7 @@ type requestedUpscaleDump struct { } type deniedDownscaleDump struct { At time.Time `json:"at"` + Current api.Resources `json:"current"` Requested api.Resources `json:"requested"` } @@ -105,6 +106,7 @@ func (s *monitorState) dump() monitorStateDump { if s.deniedDownscale != nil { deniedDownscale = &deniedDownscaleDump{ At: s.deniedDownscale.at, + Current: s.deniedDownscale.current, Requested: s.deniedDownscale.requested, } } diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 1795e0a0b..008098e83 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -26,6 +26,8 @@ import ( "strings" "time" + vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" + "github.com/neondatabase/autoscaling/pkg/api" "github.com/neondatabase/autoscaling/pkg/util" ) @@ -139,6 +141,7 @@ type requestedUpscale struct { type deniedDownscale struct { at time.Time + current api.Resources requested api.Resources } @@ -185,7 +188,7 @@ func (s *State) NextActions(now time.Time) ActionSet { using := s.vm.Using() - desiredResources := s.DesiredResourcesFromMetricsOrRequestedUpscaling() + desiredResources := s.DesiredResourcesFromMetricsOrRequestedUpscaling(now) desiredResourcesApprovedByMonitor := s.boundResourcesByMonitorApproved(desiredResources) desiredResourcesApprovedByPlugin := s.boundResourcesByPluginApproved(desiredResources) @@ -390,7 +393,7 @@ func (s *State) scalingConfig() api.ScalingConfig { } } -func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling() api.Resources { +func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) api.Resources { // There's some annoying edge cases that this function has to be able to handle properly. For // the sake of completeness, they are: // @@ -443,21 +446,19 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling() api.Resources goalCU = util.Max(cpuGoalCU, memGoalCU) } - // Update goalCU based on any requested upscaling + // Update goalCU based on any requested upscaling or downscaling that was previously denied goalCU = util.Max(goalCU, s.requiredCUForRequestedUpscaling(*s.plugin.computeUnit)) - - // // new CU must be >= current CU if !allowDecrease - // if !allowDecrease { - // _, upperBoundCU := s.computeUnitsBounds() - // goalCU = util.Max(goalCU, upperBoundCU) - // } + deniedDownscaleInEffect := s.deniedDownscaleInEffect(now) + if deniedDownscaleInEffect { + goalCU = util.Max(goalCU, s.requiredCUForDeniedDownscale(*s.plugin.computeUnit, s.monitor.deniedDownscale.requested)) + } // resources for the desired "goal" compute units var goalResources api.Resources // If there's no constraints from s.metrics or s.monitor.requestedUpscale, then we'd prefer to // keep things as-is, rather than scaling down (because otherwise goalCU = 0). - if s.metrics == nil && s.monitor.requestedUpscale == nil { + if s.metrics == nil && s.monitor.requestedUpscale == nil && !deniedDownscaleInEffect { goalResources = s.vm.Using() } else { goalResources = s.plugin.computeUnit.Mul(uint16(goalCU)) @@ -466,21 +467,29 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling() api.Resources // bound goalResources by the minimum and maximum resource amounts for the VM result := goalResources.Min(s.vm.Max()).Max(s.vm.Min()) - // FIXME: re-enable this logic. - // // If no decreases are allowed, then we *must* make sure that the VM's usage value has not - // // decreased, even if it's greater than the VM maximum. - // // - // // We can run into situtations like this when VM scale-down on bounds change fails, so we end up - // // with a usage value greater than the maximum. - // if !allowDecrease { - // result = result.Max(s.VM.Using()) - // } + // ... but if we aren't allowed to downscale, then we *must* make sure that the VM's usage value + // won't decrease to the previously denied amount, even if it's greater than the maximum. + // + // We can run into siutations like this when VM scale-down on bounds change fails, so we end up + // with a usage value greater than the maximum. + // + // It's not a great situation to be in, but it's easier to make the policy "give the users a + // little extra if we mess up" than "oops we OOM-killed your DB, hope you weren't doing anything". + if deniedDownscaleInEffect { + // roughly equivalent to "result >= s.monitor.deniedDownscale.requested" + if !result.HasFieldGreaterThan(s.monitor.deniedDownscale.requested) { + // This can only happen if s.vm.Max() is less than goalResources, because otherwise this + // would have been factored into goalCU, affecting goalResources. Hence, the warning. + s.config.Warn("Can't decrease desired resources to within VM maximum because of vm-monitor previously denied downscale request") + } + result = result.Max(s.minRequiredResourcesForDeniedDownscale(*s.plugin.computeUnit, *s.monitor.deniedDownscale)) + } // Check that the result is sound. // // With the current (naive) implementation, this is trivially ok. In future versions, it might // not be so simple, so it's good to have this integrity check here. - if /* FIXME: re-enable: allowDecrease && */ result.HasFieldGreaterThan(s.vm.Max()) { + if !deniedDownscaleInEffect && result.HasFieldGreaterThan(s.vm.Max()) { panic(fmt.Errorf( "produced invalid desired state: result has field greater than max. this = %+v", *s, )) @@ -515,6 +524,37 @@ func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources) uint3 return required } +func (s *State) deniedDownscaleInEffect(now time.Time) bool { + return s.monitor.deniedDownscale != nil && + // Previous denied downscaling attempts are in effect until the cooldown expires + now.Before(s.monitor.deniedDownscale.at.Add(s.config.MonitorDeniedDownscaleCooldown)) +} + +// NB: like requiredCUForRequestedUpscaling, we make the caller provide the values so that it's +// more clear that it's the caller's responsibility to ensure the values are non-nil. +func (s *State) requiredCUForDeniedDownscale(computeUnit, deniedResources api.Resources) uint32 { + // note: floor(x / M) + 1 gives the minimum integer value greater than x / M. + requiredFromCPU := 1 + uint32(deniedResources.VCPU/computeUnit.VCPU) + requiredFromMem := 1 + uint32(deniedResources.Mem/computeUnit.Mem) + + return util.Max(requiredFromCPU, requiredFromMem) +} + +func (s *State) minRequiredResourcesForDeniedDownscale(computeUnit api.Resources, denied deniedDownscale) api.Resources { + var res api.Resources + + if denied.requested.VCPU < denied.current.VCPU { + // increase the value by one CU's worth + res.VCPU = computeUnit.VCPU * vmapi.MilliCPU(1+uint32(denied.requested.VCPU/computeUnit.VCPU)) + } + + if denied.requested.Mem < denied.current.Mem { + res.Mem = computeUnit.Mem * (1 + uint16(denied.requested.Mem/computeUnit.Mem)) + } + + return res +} + func (s *State) boundResourcesByMonitorApproved(resources api.Resources) api.Resources { var lowerBound api.Resources if s.monitor.approved != nil { @@ -678,10 +718,11 @@ func (h MonitorHandle) DownscaleRequestAllowed(now time.Time, requested api.Reso } // Downscale request was successful but the monitor denied our request. -func (h MonitorHandle) DownscaleRequestDenied(now time.Time, requested api.Resources) { +func (h MonitorHandle) DownscaleRequestDenied(now time.Time, current, requested api.Resources) { h.s.monitor.ongoingRequest = nil h.s.monitor.deniedDownscale = &deniedDownscale{ at: now, + current: current, requested: requested, } } diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 7d559a9d9..f103454dc 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -1,9 +1,12 @@ package core_test import ( + "fmt" "testing" "time" + "golang.org/x/exp/slices" + "k8s.io/apimachinery/pkg/api/resource" "github.com/neondatabase/autoscaling/pkg/agent/core" @@ -19,9 +22,11 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { vmUsing api.Resources schedulerApproved api.Resources requestedUpscale api.MoreResources + deniedDownscale *api.Resources // expected output from (*State).DesiredResourcesFromMetricsOrRequestedUpscaling() expected api.Resources + warnings []string }{ { name: "BasicScaleup", @@ -33,12 +38,50 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { vmUsing: api.Resources{VCPU: 250, Mem: 1}, schedulerApproved: api.Resources{VCPU: 250, Mem: 1}, requestedUpscale: api.MoreResources{Cpu: false, Memory: false}, + deniedDownscale: nil, + + expected: api.Resources{VCPU: 500, Mem: 2}, + warnings: nil, + }, + { + name: "MismatchedApprovedNoScaledown", + metrics: api.Metrics{ + LoadAverage1Min: 0.0, // ordinarily would like to scale down + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + }, + vmUsing: api.Resources{VCPU: 250, Mem: 2}, + schedulerApproved: api.Resources{VCPU: 250, Mem: 2}, + requestedUpscale: api.MoreResources{Cpu: false, Memory: false}, + deniedDownscale: &api.Resources{VCPU: 250, Mem: 1}, + // need to scale up because vmUsing is mismatched and otherwise we'd be scaling down. expected: api.Resources{VCPU: 500, Mem: 2}, + warnings: nil, + }, + { + // ref https://github.com/neondatabase/autoscaling/issues/512 + name: "MismatchedApprovedNoScaledownButVMAtMaximum", + metrics: api.Metrics{ + LoadAverage1Min: 0.0, // ordinarily would like to scale down + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + }, + vmUsing: api.Resources{VCPU: 1000, Mem: 5}, // note: mem greater than maximum. It can happen when scaling bounds change + schedulerApproved: api.Resources{VCPU: 1000, Mem: 5}, // unused + requestedUpscale: api.MoreResources{Cpu: false, Memory: false}, + deniedDownscale: &api.Resources{VCPU: 1000, Mem: 4}, + + expected: api.Resources{VCPU: 1000, Mem: 5}, + warnings: []string{ + "Can't decrease desired resources to within VM maximum because of vm-monitor previously denied downscale request", + }, }, } for _, c := range cases { + warnings := []string{} + state := core.NewState( api.VmInfo{ Name: "test", @@ -68,7 +111,9 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { PluginRequestTick: time.Second, MonitorDeniedDownscaleCooldown: time.Second, MonitorRetryWait: time.Second, - Warn: nil, + Warn: func(format string, args ...any) { + warnings = append(warnings, fmt.Sprintf(format, args...)) + }, }, ) @@ -78,10 +123,12 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { // set the metrics state.UpdateMetrics(c.metrics) + now := time.Now() + // set the compute unit and lastApproved by simulating a scheduler request/response state.Plugin().NewScheduler() - state.Plugin().StartingRequest(time.Now(), c.schedulerApproved) - err := state.Plugin().RequestSuccessful(time.Now(), api.PluginResponse{ + state.Plugin().StartingRequest(now, c.schedulerApproved) + err := state.Plugin().RequestSuccessful(now, api.PluginResponse{ Permit: c.schedulerApproved, Migrate: nil, ComputeUnit: computeUnit, @@ -91,10 +138,22 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { return } - actual := state.DesiredResourcesFromMetricsOrRequestedUpscaling() + // set deniedDownscale (if needed) by simulating a vm-monitor request/response + if c.deniedDownscale != nil { + state.Monitor().Reset() + state.Monitor().Active(true) + state.Monitor().StartingDownscaleRequest(now) + state.Monitor().DownscaleRequestDenied(now, c.vmUsing, *c.deniedDownscale) + } + + actual := state.DesiredResourcesFromMetricsOrRequestedUpscaling(now) if actual != c.expected { t.Errorf("expected output %+v but got %+v", c.expected, actual) } + + if !slices.Equal(c.warnings, warnings) { + t.Errorf("expected warnings %+v but got %+v", c.warnings, warnings) + } }) } } diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 817a6213c..3c1450786 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -124,7 +124,7 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge if !result.Ok { logger.Warn("vm-monitor denied downscale", logFields...) if unchanged { - state.Monitor().DownscaleRequestDenied(endTime, action.Target) + state.Monitor().DownscaleRequestDenied(endTime, action.Current, action.Target) } } else { logger.Info("vm-monitor approved downscale", logFields...) From d2f09921a1447c8c9cf21f273751fd96d5e7dd75 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 28 Sep 2023 19:55:15 -0700 Subject: [PATCH 08/59] simplify condition --- pkg/agent/core/state.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 008098e83..9b7b432c7 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -456,9 +456,9 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) a // resources for the desired "goal" compute units var goalResources api.Resources - // If there's no constraints from s.metrics or s.monitor.requestedUpscale, then we'd prefer to - // keep things as-is, rather than scaling down (because otherwise goalCU = 0). - if s.metrics == nil && s.monitor.requestedUpscale == nil && !deniedDownscaleInEffect { + // If there's no constraints and s.metrics is nil, then we'll end up with goalCU = 0. + // But if we have no metrics, we'd prefer to keep things as-is, rather than scaling down. + if s.metrics == nil && goalCU == 0 { goalResources = s.vm.Using() } else { goalResources = s.plugin.computeUnit.Mul(uint16(goalCU)) From 724ae6bf7c83c2f85e6a78770ac9fcc56c94040c Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 29 Sep 2023 08:11:55 -0700 Subject: [PATCH 09/59] add more thorough tests (not yet passing) --- go.mod | 3 +- pkg/agent/core/state_test.go | 262 +++++++++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b91fed8c1..9ed852478 100644 --- a/go.mod +++ b/go.mod @@ -58,6 +58,7 @@ require ( github.com/onsi/gomega v1.24.2 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 github.com/prometheus/client_golang v1.14.0 + github.com/stretchr/testify v1.8.1 github.com/tychoish/fun v0.8.5 github.com/vishvananda/netlink v1.1.1-0.20220125195016-0639e7e787ba go.uber.org/zap v1.24.0 @@ -139,13 +140,13 @@ require ( github.com/opencontainers/image-spec v1.1.0-rc2 // indirect github.com/opencontainers/selinux v1.10.0 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect github.com/sirupsen/logrus v1.9.0 // indirect github.com/spf13/cobra v1.6.1 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.8.1 // indirect github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect go.etcd.io/etcd/api/v3 v3.5.6 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.6 // indirect diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index f103454dc..1506f1289 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -5,10 +5,13 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "golang.org/x/exp/slices" "k8s.io/apimachinery/pkg/api/resource" + vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" + "github.com/neondatabase/autoscaling/pkg/agent/core" "github.com/neondatabase/autoscaling/pkg/api" ) @@ -157,3 +160,262 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { }) } } + +type initialStateParams struct { + computeUnit api.Resources + minCU uint16 + maxCU uint16 +} + +type initialStateOpt struct { + preCreate func(*initialStateParams) + postCreate func(*api.VmInfo, *core.Config) +} + +func withComputeUnit(cu api.Resources) (o initialStateOpt) { + o.preCreate = func(p *initialStateParams) { p.computeUnit = cu } + return +} + +func withSizeRange(minCU, maxCU uint16) (o initialStateOpt) { + o.preCreate = func(p *initialStateParams) { + p.minCU = minCU + p.maxCU = maxCU + } + return +} + +func withVMUsing(res api.Resources) (o initialStateOpt) { + o.postCreate = func(vm *api.VmInfo, _ *core.Config) { + vm.Cpu.Use = res.VCPU + vm.Mem.Use = res.Mem + } + return +} + +func withStoredWarnings(warnings *[]string) (o initialStateOpt) { + o.postCreate = func(_ *api.VmInfo, config *core.Config) { + config.Warn = func(format string, args ...any) { + *warnings = append(*warnings, fmt.Sprintf(format, args...)) + } + } + return +} + +func createInitialState(opts ...initialStateOpt) *core.State { + pre := initialStateParams{ + computeUnit: api.Resources{VCPU: 250, Mem: 1}, + minCU: 1, + maxCU: 4, + } + for _, o := range opts { + if o.preCreate != nil { + o.preCreate(&pre) + } + } + + vm := api.VmInfo{ + Name: "test", + Namespace: "test", + Cpu: api.VmCpuInfo{ + Min: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, + Use: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, + Max: vmapi.MilliCPU(pre.maxCU) * pre.computeUnit.VCPU, + }, + Mem: api.VmMemInfo{ + SlotSize: resource.NewQuantity(1<<30 /* 1 Gi */, resource.BinarySI), + Min: pre.minCU * pre.computeUnit.Mem, + Use: pre.minCU * pre.computeUnit.Mem, + Max: pre.maxCU * pre.computeUnit.Mem, + }, + ScalingConfig: nil, + AlwaysMigrate: false, + ScalingEnabled: true, + } + + config := core.Config{ + DefaultScalingConfig: api.ScalingConfig{ + LoadAverageFractionTarget: 0.5, + MemoryUsageFractionTarget: 0.5, + }, + PluginRequestTick: 5 * time.Second, + MonitorDeniedDownscaleCooldown: 5 * time.Second, + MonitorRetryWait: 5 * time.Second, + Warn: func(string, ...any) {}, + } + + for _, o := range opts { + if o.postCreate != nil { + o.postCreate(&vm, &config) + } + } + + return core.NewState(vm, config) +} + +type fakeClock struct { + base time.Time + now time.Time +} + +func newFakeClock() *fakeClock { + base, err := time.Parse(time.RFC3339, "2000-01-01T00:00:00Z") // a nice round number, to make things easier + if err != nil { + panic(err) + } + + return &fakeClock{base: base, now: base} +} + +func (c *fakeClock) inc(duration time.Duration) { + c.now = c.now.Add(duration) +} + +func (c *fakeClock) elapsed() time.Duration { + return c.now.Sub(c.base) +} + +func Test_NextActions(t *testing.T) { + simulateInitialSchedulerRequest := func(t *testing.T, state *core.State, clock *fakeClock, reqTime time.Duration) { + state.Plugin().NewScheduler() + + actions := state.NextActions(clock.now) + assert.NotNil(t, actions.PluginRequest) + action := actions.PluginRequest + assert.Nil(t, action.LastPermit) + state.Plugin().StartingRequest(clock.now, action.Target) + clock.inc(reqTime) + assert.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + Permit: action.Target, + Migrate: nil, + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, // TODO: make this configurable... somehow. + })) + } + + // Thorough checks of a relatively simple flow + t.Run("BasicScaleupFlow", func(t *testing.T) { + warnings := []string{} + clock := newFakeClock() + state := createInitialState( + withStoredWarnings(&warnings), + ) + + hundredMillis := 100 * time.Millisecond + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + simulateInitialSchedulerRequest(t, state, clock, hundredMillis) + assert.Equal(t, warnings, []string{"Can't determine desired resources because compute unit hasn't been set yet"}) + warnings = nil // reset + + clock.inc(hundredMillis) + metrics := api.Metrics{ + LoadAverage1Min: 0.3, + LoadAverage5Min: 0.0, // unused + MemoryUsageBytes: 0.0, + } + state.UpdateMetrics(metrics) + + // double-check that we agree about the desired resources + assert.Equal( + t, + api.Resources{VCPU: 500, Mem: 2}, + state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.now), + ) + + // Now that the initial scheduler request is done, and we have metrics that indicate + // scale-up would be a good idea, we should be contacting the scheduler to get approval. + actions := state.NextActions(clock.now) + assert.Equal(t, core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: &api.Resources{VCPU: 250, Mem: 1}, + Target: api.Resources{VCPU: 500, Mem: 2}, + Metrics: &metrics, + }, + // shouldn't have anything to say to the other components + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + Wait: nil, // and, don't need to wait because plugin should be ongoing + }, actions) + assert.Empty(t, warnings) + // start the request: + state.Plugin().StartingRequest(clock.now, actions.PluginRequest.Target) + clock.inc(hundredMillis) + // should have nothing more to do; waiting on plugin request to come back + assert.Equal(t, core.ActionSet{ + Wait: nil, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, state.NextActions(clock.now)) + assert.Empty(t, warnings) + assert.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + Permit: api.Resources{VCPU: 500, Mem: 2}, + Migrate: nil, + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + })) + assert.Empty(t, warnings) + assert.Equal(t, clock.elapsed(), 2*hundredMillis) + + // Scheduler approval is done, now we should be making the request to NeonVM + actions = state.NextActions(clock.now) + assert.Equal(t, core.ActionSet{ + PluginRequest: nil, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: api.Resources{VCPU: 250, Mem: 1}, + Target: api.Resources{VCPU: 500, Mem: 2}, + }, + MonitorDownscale: nil, + MonitorUpscale: nil, + Wait: nil, // don't need to wait because NeonVM should be ongoing + }, actions) + assert.Empty(t, warnings) + // start the request: + state.NeonVM().StartingRequest(clock.now, actions.NeonVMRequest.Target) + clock.inc(hundredMillis) + // should have nothing more to do; waiting on NeonVM request to come back + assert.Equal(t, core.ActionSet{ + Wait: nil, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, state.NextActions(clock.now)) + assert.Empty(t, warnings) + state.NeonVM().RequestSuccessful(clock.now) + assert.Empty(t, warnings) + assert.Equal(t, clock.elapsed(), 3*hundredMillis) + + // NeonVM change is done, now we should finish by notifying the vm-monitor + actions = state.NextActions(clock.now) + assert.Equal(t, core.ActionSet{ + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: &core.ActionMonitorUpscale{ + Current: api.Resources{VCPU: 250, Mem: 1}, + Target: api.Resources{VCPU: 500, Mem: 2}, + }, + Wait: nil, // don't need to wait because monitor request should be ongoing + }, actions) + assert.Empty(t, warnings) + // start the request: + state.Monitor().StartingUpscaleRequest(clock.now) + clock.inc(hundredMillis) + // should have nothing more to do; waiting on vm-monitor request to come back + assert.Equal(t, core.ActionSet{ + Wait: nil, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, state.NextActions(clock.now)) + assert.Empty(t, warnings) + state.Monitor().UpscaleRequestSuccessful(clock.now, actions.MonitorUpscale.Target) + assert.Empty(t, warnings) + assert.Equal(t, clock.elapsed(), 4*hundredMillis) + }) +} From 5ac1b60cd61a9a883bc37d31e6c8067ef5d7c51c Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 29 Sep 2023 08:34:49 -0700 Subject: [PATCH 10/59] switch test from testify/assert to testify/require tl;dr of the difference is that require calls t.FailNow(), so we stop at the first error, which is more in line with what we want. --- pkg/agent/core/state_test.go | 50 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 1506f1289..8117314aa 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -5,7 +5,7 @@ import ( "testing" "time" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "golang.org/x/exp/slices" "k8s.io/apimachinery/pkg/api/resource" @@ -280,12 +280,12 @@ func Test_NextActions(t *testing.T) { state.Plugin().NewScheduler() actions := state.NextActions(clock.now) - assert.NotNil(t, actions.PluginRequest) + require.NotNil(t, actions.PluginRequest) action := actions.PluginRequest - assert.Nil(t, action.LastPermit) + require.Nil(t, action.LastPermit) state.Plugin().StartingRequest(clock.now, action.Target) clock.inc(reqTime) - assert.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + require.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ Permit: action.Target, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, // TODO: make this configurable... somehow. @@ -306,7 +306,7 @@ func Test_NextActions(t *testing.T) { state.Monitor().Active(true) simulateInitialSchedulerRequest(t, state, clock, hundredMillis) - assert.Equal(t, warnings, []string{"Can't determine desired resources because compute unit hasn't been set yet"}) + require.Equal(t, warnings, []string{"Can't determine desired resources because compute unit hasn't been set yet"}) warnings = nil // reset clock.inc(hundredMillis) @@ -318,7 +318,7 @@ func Test_NextActions(t *testing.T) { state.UpdateMetrics(metrics) // double-check that we agree about the desired resources - assert.Equal( + require.Equal( t, api.Resources{VCPU: 500, Mem: 2}, state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.now), @@ -327,7 +327,7 @@ func Test_NextActions(t *testing.T) { // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. actions := state.NextActions(clock.now) - assert.Equal(t, core.ActionSet{ + require.Equal(t, core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, @@ -339,30 +339,30 @@ func Test_NextActions(t *testing.T) { MonitorUpscale: nil, Wait: nil, // and, don't need to wait because plugin should be ongoing }, actions) - assert.Empty(t, warnings) + require.Empty(t, warnings) // start the request: state.Plugin().StartingRequest(clock.now, actions.PluginRequest.Target) clock.inc(hundredMillis) // should have nothing more to do; waiting on plugin request to come back - assert.Equal(t, core.ActionSet{ + require.Equal(t, core.ActionSet{ Wait: nil, PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, }, state.NextActions(clock.now)) - assert.Empty(t, warnings) - assert.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + require.Empty(t, warnings) + require.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ Permit: api.Resources{VCPU: 500, Mem: 2}, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, })) - assert.Empty(t, warnings) - assert.Equal(t, clock.elapsed(), 2*hundredMillis) + require.Empty(t, warnings) + require.Equal(t, clock.elapsed(), 2*hundredMillis) // Scheduler approval is done, now we should be making the request to NeonVM actions = state.NextActions(clock.now) - assert.Equal(t, core.ActionSet{ + require.Equal(t, core.ActionSet{ PluginRequest: nil, NeonVMRequest: &core.ActionNeonVMRequest{ Current: api.Resources{VCPU: 250, Mem: 1}, @@ -372,26 +372,26 @@ func Test_NextActions(t *testing.T) { MonitorUpscale: nil, Wait: nil, // don't need to wait because NeonVM should be ongoing }, actions) - assert.Empty(t, warnings) + require.Empty(t, warnings) // start the request: state.NeonVM().StartingRequest(clock.now, actions.NeonVMRequest.Target) clock.inc(hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back - assert.Equal(t, core.ActionSet{ + require.Equal(t, core.ActionSet{ Wait: nil, PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, }, state.NextActions(clock.now)) - assert.Empty(t, warnings) + require.Empty(t, warnings) state.NeonVM().RequestSuccessful(clock.now) - assert.Empty(t, warnings) - assert.Equal(t, clock.elapsed(), 3*hundredMillis) + require.Empty(t, warnings) + require.Equal(t, clock.elapsed(), 3*hundredMillis) // NeonVM change is done, now we should finish by notifying the vm-monitor actions = state.NextActions(clock.now) - assert.Equal(t, core.ActionSet{ + require.Equal(t, core.ActionSet{ PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, @@ -401,21 +401,21 @@ func Test_NextActions(t *testing.T) { }, Wait: nil, // don't need to wait because monitor request should be ongoing }, actions) - assert.Empty(t, warnings) + require.Empty(t, warnings) // start the request: state.Monitor().StartingUpscaleRequest(clock.now) clock.inc(hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back - assert.Equal(t, core.ActionSet{ + require.Equal(t, core.ActionSet{ Wait: nil, PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, }, state.NextActions(clock.now)) - assert.Empty(t, warnings) + require.Empty(t, warnings) state.Monitor().UpscaleRequestSuccessful(clock.now, actions.MonitorUpscale.Target) - assert.Empty(t, warnings) - assert.Equal(t, clock.elapsed(), 4*hundredMillis) + require.Empty(t, warnings) + require.Equal(t, clock.elapsed(), 4*hundredMillis) }) } From bcb8d96661e40faa47591091754314d336991a9b Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 29 Sep 2023 13:37:05 -0700 Subject: [PATCH 11/59] add State.debug for print debugging --- pkg/agent/core/state.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 9b7b432c7..67069eadd 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -61,6 +61,10 @@ type State struct { config Config + // unused. Exists to make it easier to add print debugging (via .config.Warn) for a single call + // to NextActions. + debug bool + // vm gives the current state of the VM - or at least, the state of the fields we care about. // // NB: any contents behind pointers in vm are immutable. Any time the field is updated, we @@ -155,6 +159,7 @@ type neonvmState struct { func NewState(vm api.VmInfo, config Config) *State { return &State{ config: config, + debug: false, vm: vm, plugin: pluginState{ alive: false, @@ -579,6 +584,10 @@ func (s *State) boundResourcesByPluginApproved(resources api.Resources) api.Reso // PUBLIC FUNCTIONS TO UPDATE THE STATE // ////////////////////////////////////////// +func (s *State) Debug(enabled bool) { + s.debug = enabled +} + func (s *State) UpdatedVM(vm api.VmInfo) { s.vm = vm } From 921a78ee071a324b477c50f2791bfbaca6a7c653 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 29 Sep 2023 14:54:10 -0700 Subject: [PATCH 12/59] fix warn log lines: s/informant/vm-monitor/ --- pkg/agent/core/state.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 67069eadd..a31cd82ce 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -291,11 +291,11 @@ func (s *State) NextActions(now time.Time) ActionSet { Target: desiredResources.Max(*s.monitor.approved), } } else if !s.monitor.active { - s.config.Warn("Wanted to send informant upscale request, but not active") + s.config.Warn("Wanted to send vm-monitor upscale request, but not active") } else if s.monitor.ongoingRequest != nil && s.monitor.ongoingRequest.kind != monitorRequestKindUpscale { - s.config.Warn("Wanted to send informant upscale request, but waiting other ongoing %s request", s.monitor.ongoingRequest.kind) + s.config.Warn("Wanted to send vm-monitor upscale request, but waiting other ongoing %s request", s.monitor.ongoingRequest.kind) } else if s.monitor.ongoingRequest == nil { - s.config.Warn("Wanted to send informant upscale request, but waiting on retry rate limit") + s.config.Warn("Wanted to send vm-monitor upscale request, but waiting on retry rate limit") } } @@ -309,7 +309,7 @@ func (s *State) NextActions(now time.Time) ActionSet { } wantMonitorDownscaleRequest := s.monitor.approved != nil && *s.monitor.approved != resourcesForMonitorDownscale if s.monitor.approved == nil && resourcesForMonitorDownscale != using { - s.config.Warn("Wanted to send informant downscale request, but haven't yet gotten information about its resources") + s.config.Warn("Wanted to send vm-monitor downscale request, but haven't yet gotten information about its resources") } // However, we may need to wait before retrying (or for any ongoing requests to finish) makeMonitorDownscaleRequest := wantMonitorDownscaleRequest && @@ -328,11 +328,11 @@ func (s *State) NextActions(now time.Time) ActionSet { Target: resourcesForMonitorDownscale, } } else if !s.monitor.active { - s.config.Warn("Wanted to send informant downscale request, but not active") + s.config.Warn("Wanted to send vm-monitor downscale request, but not active") } else if s.monitor.ongoingRequest != nil && s.monitor.ongoingRequest.kind != monitorRequestKindDownscale { - s.config.Warn("Wanted to send informant downscale request, but waiting on other ongoing %s request", s.monitor.ongoingRequest.kind) + s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on other ongoing %s request", s.monitor.ongoingRequest.kind) } else if s.monitor.ongoingRequest == nil { - s.config.Warn("Wanted to send informant downscale request, but waiting on retry rate limit") + s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on retry rate limit") } } From b7d205b5d5e13d6070458a42d943b54bc52f9d54 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 29 Sep 2023 15:48:58 -0700 Subject: [PATCH 13/59] fix comment --- pkg/agent/core/state.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index a31cd82ce..6506f87b1 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -57,7 +57,7 @@ type Config struct { // State holds all of the necessary internal state for a VM in order to make scaling // decisions type State struct { - // ANY CHANGED FIELDS MUST BE UPDATED IN dump.go AS WELL + // ANY CHANGED FIELDS MUST BE UPDATED IN dumpstate.go AS WELL config Config From f3f527fa18084fc0ed550fd3b3a491dc21c87aeb Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 29 Sep 2023 21:34:11 -0700 Subject: [PATCH 14/59] rewrite the rewrite, I guess --- deploy/agent/config_map.yaml | 1 + pkg/agent/config.go | 5 + pkg/agent/core/dumpstate.go | 8 +- pkg/agent/core/state.go | 635 +++++++++++++++++++---------- pkg/agent/core/state_test.go | 195 ++++++++- pkg/agent/executor/exec_monitor.go | 10 +- pkg/agent/executor/exec_plugin.go | 1 + pkg/agent/runner.go | 1 + 8 files changed, 610 insertions(+), 246 deletions(-) diff --git a/deploy/agent/config_map.yaml b/deploy/agent/config_map.yaml index 5f06d0ff8..40cea39fa 100644 --- a/deploy/agent/config_map.yaml +++ b/deploy/agent/config_map.yaml @@ -34,6 +34,7 @@ data: "schedulerName": "autoscale-scheduler", "requestTimeoutSeconds": 2, "requestAtLeastEverySeconds": 5, + "retryDeniedUpscaleSeconds": 2, "requestPort": 10299 }, "dumpState": { diff --git a/pkg/agent/config.go b/pkg/agent/config.go index ae13327e2..1a36e2d75 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -93,6 +93,9 @@ type SchedulerConfig struct { // RequestAtLeastEverySeconds gives the maximum duration we should go without attempting a // request to the scheduler, even if nothing's changed. RequestAtLeastEverySeconds uint `json:"requestAtLeastEverySeconds"` + // RetryDeniedUpscaleSeconds gives the duration, in seconds, that we must wait before resending + // a request for resources that were not approved + RetryDeniedUpscaleSeconds uint `json:"retryDeniedUpscaleSeconds"` // RequestPort defines the port to access the scheduler's ✨special✨ API with RequestPort uint16 `json:"requestPort"` } @@ -153,6 +156,8 @@ func (c *Config) validate() error { ec.Add(c.Scaling.DefaultConfig.Validate()) erc.Whenf(ec, c.Scheduler.RequestPort == 0, zeroTmpl, ".scheduler.requestPort") erc.Whenf(ec, c.Scheduler.RequestTimeoutSeconds == 0, zeroTmpl, ".scheduler.requestTimeoutSeconds") + erc.Whenf(ec, c.Scheduler.RequestAtLeastEverySeconds == 0, zeroTmpl, ".scheduler.requestAtLeastEverySeconds") + erc.Whenf(ec, c.Scheduler.RetryDeniedUpscaleSeconds == 0, zeroTmpl, ".scheduler.retryDeniedUpscaleSeconds") erc.Whenf(ec, c.Scheduler.SchedulerName == "", emptyTmpl, ".scheduler.schedulerName") return ec.Resolve() diff --git a/pkg/agent/core/dumpstate.go b/pkg/agent/core/dumpstate.go index bbb833e5e..f4c521d40 100644 --- a/pkg/agent/core/dumpstate.go +++ b/pkg/agent/core/dumpstate.go @@ -70,7 +70,6 @@ func (s *pluginState) dump() pluginStateDump { } type monitorStateDump struct { - Active bool `json:"active"` OngoingRequest *OngoingMonitorRequestDump `json:"ongoingRequest"` RequestedUpscale *requestedUpscaleDump `json:"requestedUpscale"` DeniedDownscale *deniedDownscaleDump `json:"deniedDownscale"` @@ -79,7 +78,8 @@ type monitorStateDump struct { UpscaleFailureAt *time.Time `json:"upscaleFailureAt"` } type OngoingMonitorRequestDump struct { - Kind monitorRequestKind `json:"kind"` + Kind monitorRequestKind `json:"kind"` + Requested api.Resources `json:"resources"` } type requestedUpscaleDump struct { At time.Time `json:"at"` @@ -114,12 +114,12 @@ func (s *monitorState) dump() monitorStateDump { var ongoingRequest *OngoingMonitorRequestDump if s.ongoingRequest != nil { ongoingRequest = &OngoingMonitorRequestDump{ - Kind: s.ongoingRequest.kind, + Kind: s.ongoingRequest.kind, + Requested: s.ongoingRequest.requested, } } return monitorStateDump{ - Active: s.active, OngoingRequest: ongoingRequest, RequestedUpscale: requestedUpscale, DeniedDownscale: deniedDownscale, diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 6506f87b1..2abe15e6a 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -21,6 +21,7 @@ package core // ² https://github.com/neondatabase/autoscaling/issues/350 import ( + "errors" "fmt" "math" "strings" @@ -42,6 +43,10 @@ type Config struct { // plugin, even if nothing's changed. PluginRequestTick time.Duration + // PluginDeniedRetryWait gives the amount of time we must wait before re-requesting resources + // that were not fully granted. + PluginDeniedRetryWait time.Duration + // MonitorDeniedDownscaleCooldown gives the time we must wait between making duplicate // downscale requests to the vm-monitor where the previous failed. MonitorDeniedDownscaleCooldown time.Duration @@ -105,10 +110,6 @@ type pluginRequested struct { } type monitorState struct { - // active is true iff the agent is currently "confirmed" and not "suspended" by the monitor. - // Otherwise, we shouldn't be making any kind of scaling requests. - active bool - ongoingRequest *ongoingMonitorRequest // requestedUpscale, if not nil, stores the most recent *unresolved* upscaling requested by the @@ -126,8 +127,13 @@ type monitorState struct { upscaleFailureAt *time.Time } +func (ms *monitorState) active() bool { + return ms.approved != nil +} + type ongoingMonitorRequest struct { - kind monitorRequestKind + kind monitorRequestKind + requested api.Resources } type monitorRequestKind string @@ -156,6 +162,10 @@ type neonvmState struct { requestFailedAt *time.Time } +func (ns *neonvmState) ongoingRequest() bool { + return ns.ongoingRequested != nil +} + func NewState(vm api.VmInfo, config Config) *State { return &State{ config: config, @@ -169,7 +179,6 @@ func NewState(vm api.VmInfo, config Config) *State { permit: nil, }, monitor: monitorState{ - active: false, ongoingRequest: nil, requestedUpscale: nil, deniedDownscale: nil, @@ -191,203 +200,345 @@ func NewState(vm api.VmInfo, config Config) *State { func (s *State) NextActions(now time.Time) ActionSet { var actions ActionSet - using := s.vm.Using() + desiredResources, desiredResourcesRequiredWait := s.DesiredResourcesFromMetricsOrRequestedUpscaling(now) - desiredResources := s.DesiredResourcesFromMetricsOrRequestedUpscaling(now) + // ---- + // Requests to the scheduler plugin: + var pluginRequiredWait *time.Duration + actions.PluginRequest, pluginRequiredWait = s.calculatePluginAction(now, desiredResources) - desiredResourcesApprovedByMonitor := s.boundResourcesByMonitorApproved(desiredResources) - desiredResourcesApprovedByPlugin := s.boundResourcesByPluginApproved(desiredResources) - // NB: monitor approved provides a lower bound - approvedDesiredResources := desiredResourcesApprovedByPlugin.Max(desiredResourcesApprovedByMonitor) + // ---- + // Requests to NeonVM: + var pluginRequested *api.Resources + var pluginRequestedPhase string = "" + if s.plugin.ongoingRequest { + pluginRequested = &s.plugin.lastRequest.resources + pluginRequestedPhase = "ongoing" + } else if actions.PluginRequest != nil { + pluginRequested = &actions.PluginRequest.Target + pluginRequestedPhase = "planned" + } + actions.NeonVMRequest = s.calculateNeonVMAction(now, desiredResources, pluginRequested, pluginRequestedPhase) - ongoingNeonVMRequest := s.neonvm.ongoingRequested != nil + // ---- + // Requests to vm-monitor (upscaling) + // + // NB: upscaling takes priority over downscaling requests, because otherwise we'd potentially + // forego notifying the vm-monitor of increased resources because we were busy asking if it + // could downscale. + // var monitorUpscaleRequestResources api.Resources - var requestForPlugin api.Resources - if s.plugin.permit == nil { - // If we haven't yet gotten a proper plugin response, then we aren't allowed to ask for - // anything beyond our current usage. - requestForPlugin = using - } else { - // ... Otherwise, we should: - // 1. "inform" the plugin of any downscaling since the previous permit - // 2. "request" any desired upscaling relative to to the previous permit - // with (2) taking priority over (1), if there's any conflicts. - requestForPlugin = desiredResources.Max(using) // ignore "desired" downscaling with .Max(using) + var monitorUpscaleRequiredWait *time.Duration + actions.MonitorUpscale, monitorUpscaleRequiredWait = s.calculateMonitorUpscaleAction(now, desiredResources) + + // ---- + // Requests to vm-monitor (downscaling) + plannedUpscale := actions.MonitorUpscale != nil + var monitorDownscaleRequiredWait *time.Duration + actions.MonitorDownscale, monitorDownscaleRequiredWait = s.calculateMonitorDownscaleAction(now, desiredResources, plannedUpscale) + + // --- and that's all the request types! --- + + // If there's anything waiting, we should also note how long we should wait for. + // There's two components we could be waiting on: the scheduler plugin, and the vm-monitor. + maximumDuration := time.Duration(int64(uint64(1)<<63 - 1)) + requiredWait := maximumDuration + + requiredWaits := []*time.Duration{ + desiredResourcesRequiredWait, + pluginRequiredWait, + monitorUpscaleRequiredWait, + monitorDownscaleRequiredWait, + } + for _, w := range requiredWaits { + if w != nil { + requiredWait = util.Min(requiredWait, *w) + } + } + + // If we're waiting on anything, add it as an action + if requiredWait != maximumDuration { + actions.Wait = &ActionWait{Duration: requiredWait} + } + + return actions +} + +func (s *State) calculatePluginAction( + now time.Time, + desiredResources api.Resources, +) (*ActionPluginRequest, *time.Duration) { + logFailureReason := func(reason string) { + s.config.Warn("Wanted to make a request to the scheduler plugin, but %s", reason) + } + + // additional resources we want to request OR previous downscaling we need to inform the plugin of + // NOTE: only valid if s.plugin.permit != nil AND there's no ongoing NeonVM request. + requestResources := s.clampResources( + s.vm.Using(), + desiredResources, + s.vm.Using(), // don't decrease below VM using (decrease happens *before* telling the plugin) + desiredResources, // but any increase is ok + ) + // resources if we're just informing the plugin of current resource usage. + currentResources := s.vm.Using() + if s.neonvm.ongoingRequested != nil { + // include any ongoing NeonVM request, because we're already using that. + currentResources = currentResources.Max(*s.neonvm.ongoingRequested) } // We want to make a request to the scheduler plugin if: - // 1. we've waited long enough since the previous request; or - // 2.a. we want to request resources / inform it of downscale; and - // b. there isn't any ongoing, conflicting request - timeForNewPluginRequest := s.plugin.lastRequest == nil || now.Sub(s.plugin.lastRequest.at) >= s.config.PluginRequestTick - shouldUpdatePlugin := s.plugin.lastRequest != nil && - // "we haven't tried requesting *these* resources from it yet, or we can retry requesting" - (s.plugin.lastRequest.resources != requestForPlugin || timeForNewPluginRequest) && - !ongoingNeonVMRequest - - if !s.plugin.ongoingRequest && (timeForNewPluginRequest || shouldUpdatePlugin) && s.plugin.alive { - if !shouldUpdatePlugin { - // If we shouldn't "update" the plugin, then just inform it about the current resources - // and metrics. - actions.PluginRequest = &ActionPluginRequest{ - LastPermit: s.plugin.permit, - Target: using, - Metrics: s.metrics, - } - } else { - // ... Otherwise, we should try requesting something new form it. - actions.PluginRequest = &ActionPluginRequest{ - LastPermit: s.plugin.permit, - Target: desiredResourcesApprovedByMonitor, - Metrics: s.metrics, - } - } - } else if timeForNewPluginRequest || shouldUpdatePlugin { - if s.plugin.alive { - s.config.Warn("Wanted to make a request to the plugin, but there's already one ongoing") - } else { - s.config.Warn("Wanted to make a request to the plugin, but there isn't one active right now") + // 1. it's been long enough since the previous request (so we're obligated by PluginRequestTick); or + // 2.a. we want to request resources / inform it of downscale; + // b. there isn't any ongoing, conflicting request; and + // c. we haven't recently been denied these resources + var timeUntilNextRequestTick time.Duration + if s.plugin.lastRequest != nil { + timeUntilNextRequestTick = s.config.PluginRequestTick - now.Sub(s.plugin.lastRequest.at) + } + + timeForRequest := timeUntilNextRequestTick <= 0 + + var timeUntilRetryBackoffExpires time.Duration + requestPreviouslyDenied := !s.plugin.ongoingRequest && + s.plugin.lastRequest != nil && + s.plugin.permit != nil && + s.plugin.lastRequest.resources.HasFieldGreaterThan(*s.plugin.permit) + if requestPreviouslyDenied { + timeUntilRetryBackoffExpires = s.plugin.lastRequest.at.Add(s.config.PluginDeniedRetryWait).Sub(now) + } + + waitingOnRetryBackoff := timeUntilRetryBackoffExpires > 0 + + // changing the resources we're requesting from the plugin + wantToRequestNewResources := s.plugin.lastRequest != nil && s.plugin.permit != nil && + requestResources != *s.plugin.permit + // ... and this isn't a duplicate (or, at least it's been long enough) + shouldRequestNewResources := wantToRequestNewResources && !waitingOnRetryBackoff + + permittedRequestResources := desiredResources + if !shouldRequestNewResources { + permittedRequestResources = currentResources + } + + // Can't make a request if the plugin isn't active/alive + if !s.plugin.alive { + if timeForRequest || shouldRequestNewResources { + logFailureReason("there isn't one active right now") } + return nil, nil } - // We want to make a request to NeonVM if we've been approved for a change in resources that - // we're not currently using. - if approvedDesiredResources != using { - // ... but we can't make one if there's already a request ongoing, either via the NeonVM API - // or to the scheduler plugin, because they require taking out the request lock. - if !ongoingNeonVMRequest && !s.plugin.ongoingRequest { - actions.NeonVMRequest = &ActionNeonVMRequest{ - Current: using, - Target: approvedDesiredResources, - } - } else { - var reqs []string - if s.plugin.ongoingRequest { - reqs = append(reqs, "plugin request") - } - if ongoingNeonVMRequest && *s.neonvm.ongoingRequested != approvedDesiredResources { - reqs = append(reqs, "NeonVM request (for different resources)") - } - - if len(reqs) != 0 { - s.config.Warn("Wanted to make a request to NeonVM API, but there's already %s ongoing", strings.Join(reqs, " and ")) - } + // Can't make a duplicate request + if s.plugin.ongoingRequest { + // ... but if the desired request is different from what we would be making, + // then it's worth logging + if s.plugin.lastRequest.resources != permittedRequestResources { + logFailureReason("there's already an ongoing request for different resources") } + return nil, nil } - // We should make an upscale request to the monitor if we've upscaled and the monitor - // doesn't know about it. - wantMonitorUpscaleRequest := s.monitor.approved != nil && *s.monitor.approved != desiredResources.Max(*s.monitor.approved) - // However, we may need to wait before retrying (or for any ongoing requests to finish) - makeMonitorUpscaleRequest := wantMonitorUpscaleRequest && - s.monitor.active && - s.monitor.ongoingRequest == nil && - (s.monitor.upscaleFailureAt == nil || - now.Sub(*s.monitor.upscaleFailureAt) >= s.config.MonitorRetryWait) - if wantMonitorUpscaleRequest { - if makeMonitorUpscaleRequest { - actions.MonitorUpscale = &ActionMonitorUpscale{ - Current: *s.monitor.approved, - Target: desiredResources.Max(*s.monitor.approved), - } - } else if !s.monitor.active { - s.config.Warn("Wanted to send vm-monitor upscale request, but not active") - } else if s.monitor.ongoingRequest != nil && s.monitor.ongoingRequest.kind != monitorRequestKindUpscale { - s.config.Warn("Wanted to send vm-monitor upscale request, but waiting other ongoing %s request", s.monitor.ongoingRequest.kind) - } else if s.monitor.ongoingRequest == nil { - s.config.Warn("Wanted to send vm-monitor upscale request, but waiting on retry rate limit") + // At this point, all that's left is either making the request, or saying to wait. + // The rest of the complication is just around accurate logging. + if timeForRequest || shouldRequestNewResources { + return &ActionPluginRequest{ + LastPermit: s.plugin.permit, + Target: permittedRequestResources, + Metrics: s.metrics, + }, nil + } else { + if wantToRequestNewResources && waitingOnRetryBackoff { + logFailureReason("but previous request for more resources was denied too recently") + } + waitTime := timeUntilNextRequestTick + if waitingOnRetryBackoff { + waitTime = util.Min(waitTime, timeUntilRetryBackoffExpires) } + return nil, &waitTime + } +} + +func (s *State) calculateNeonVMAction( + now time.Time, + desiredResources api.Resources, + pluginRequested *api.Resources, + pluginRequestedPhase string, +) *ActionNeonVMRequest { + // clamp desiredResources to what we're allowed to make a request for + desiredResources = s.clampResources( + s.vm.Using(), // current: what we're using already + desiredResources, // target: desired resources + desiredResources.Max(s.monitorApprovedLowerBound()), // lower bound: downscaling that the monitor has approved + desiredResources.Min(s.pluginApprovedUpperBound()), // upper bound: upscaling that the plugin has approved + ) + + // If we're already using the desired resources, then no need to make a request + if s.vm.Using() == desiredResources { + return nil } - // We should make a downscale request to the monitor if we want to downscale but haven't been - // approved for it. - var resourcesForMonitorDownscale api.Resources - if s.monitor.approved != nil { - resourcesForMonitorDownscale = desiredResources.Min(*s.monitor.approved) + conflictingPluginRequest := pluginRequested != nil && pluginRequested.HasFieldLessThan(desiredResources) + + if !s.neonvm.ongoingRequest() && !conflictingPluginRequest { + return &ActionNeonVMRequest{ + Current: s.vm.Using(), + Target: desiredResources, + } } else { - resourcesForMonitorDownscale = desiredResources.Min(using) - } - wantMonitorDownscaleRequest := s.monitor.approved != nil && *s.monitor.approved != resourcesForMonitorDownscale - if s.monitor.approved == nil && resourcesForMonitorDownscale != using { - s.config.Warn("Wanted to send vm-monitor downscale request, but haven't yet gotten information about its resources") - } - // However, we may need to wait before retrying (or for any ongoing requests to finish) - makeMonitorDownscaleRequest := wantMonitorDownscaleRequest && - s.monitor.active && - s.monitor.ongoingRequest == nil && - (s.monitor.deniedDownscale == nil || - s.monitor.deniedDownscale.requested != desiredResources.Min(using) || - now.Sub(s.monitor.deniedDownscale.at) >= s.config.MonitorDeniedDownscaleCooldown) && - (s.monitor.downscaleFailureAt == nil || - now.Sub(*s.monitor.downscaleFailureAt) >= s.config.MonitorRetryWait) - - if wantMonitorDownscaleRequest { - if makeMonitorDownscaleRequest { - actions.MonitorDownscale = &ActionMonitorDownscale{ - Current: *s.monitor.approved, - Target: resourcesForMonitorDownscale, - } - } else if !s.monitor.active { - s.config.Warn("Wanted to send vm-monitor downscale request, but not active") - } else if s.monitor.ongoingRequest != nil && s.monitor.ongoingRequest.kind != monitorRequestKindDownscale { - s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on other ongoing %s request", s.monitor.ongoingRequest.kind) - } else if s.monitor.ongoingRequest == nil { - s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on retry rate limit") + var reqs []string + if s.plugin.ongoingRequest { + reqs = append(reqs, fmt.Sprintf("plugin request %s", pluginRequestedPhase)) } + if s.neonvm.ongoingRequest() && *s.neonvm.ongoingRequested != desiredResources { + reqs = append(reqs, "NeonVM request (for different resources) ongoing") + } + + if len(reqs) != 0 { + s.config.Warn("Wanted to make a request to NeonVM API, but there's already %s ongoing", strings.Join(reqs, " and ")) + } + + return nil } +} - // --- and that's all the request types! --- +func (s *State) calculateMonitorUpscaleAction( + now time.Time, + desiredResources api.Resources, +) (*ActionMonitorUpscale, *time.Duration) { + // can't do anything if we don't have an active connection to the vm-monitor + if !s.monitor.active() { + return nil, nil + } - // If there's anything waiting, we should also note how long we should wait for. - // There's two components we could be waiting on: the scheduler plugin, and the vm-monitor. - maximumDuration := time.Duration(int64(uint64(1)<<63 - 1)) - requiredWait := maximumDuration + requestResources := s.clampResources( + *s.monitor.approved, // current: last resources we got the OK from the monitor on + s.vm.Using(), // target: what the VM is currently using + *s.monitor.approved, // don't decrease below what the monitor is currently set to (this is an *upscale* request) + desiredResources, // don't increase above desired resources + ) - // We always need to periodically send messages to the plugin. If actions.PluginRequest == nil, - // we know that either: - // - // (a) s.plugin.lastRequestAt != nil (otherwise timeForNewPluginRequest == true); or - // (b) s.plugin.ongoingRequest == true (the only reason why we wouldn't've exited earlier) - // - // So we actually only need to explicitly wait if there's not an ongoing request - otherwise - // we'll be notified anyways when the request is done. - if actions.PluginRequest == nil && s.plugin.alive && !s.plugin.ongoingRequest { - requiredWait = util.Min(requiredWait, now.Sub(s.plugin.lastRequest.at)) - } - - // For the vm-monitor: - // if we wanted to make EITHER a downscale or upscale request, but we previously couldn't - // because of retry timeouts, we should wait for s.config.MonitorRetryWait before trying - // again. - // OR if we wanted to downscale but got denied, we should wait for - // s.config.MonitorDownscaleCooldown before retrying. - if s.monitor.ongoingRequest == nil { - // Retry upscale on failure - if wantMonitorUpscaleRequest && s.monitor.upscaleFailureAt != nil { - if wait := now.Sub(*s.monitor.upscaleFailureAt); wait >= s.config.MonitorRetryWait { - requiredWait = util.Min(requiredWait, wait) - } + // Check validity of the request that we would send, before sending it + if requestResources.HasFieldLessThan(*s.monitor.approved) { + panic(fmt.Errorf( + "resources for vm-monitor upscaling are less than what was last approved: %+v has field less than %+v", + requestResources, + *s.monitor.approved, + )) + } + + wantToDoRequest := requestResources != *s.monitor.approved + if !wantToDoRequest { + return nil, nil + } + + // Can't make another request if there's already one ongoing + if s.monitor.ongoingRequest != nil { + var requestDescription string + if s.monitor.ongoingRequest.kind == monitorRequestKindUpscale && s.monitor.ongoingRequest.requested != requestResources { + requestDescription = "upscale request (for different resources)" + } else if s.monitor.ongoingRequest.kind == monitorRequestKindDownscale { + requestDescription = "downscale request" } - // Retry downscale on failure - if wantMonitorDownscaleRequest && s.monitor.downscaleFailureAt != nil { - if wait := now.Sub(*s.monitor.downscaleFailureAt); wait >= s.config.MonitorRetryWait { - requiredWait = util.Min(requiredWait, wait) - } + + if requestDescription != "" { + s.config.Warn("Wanted to send vm-monitor upscale request, but waiting on ongoing %s", requestDescription) } - // Retry downscale if denied - if wantMonitorDownscaleRequest && s.monitor.deniedDownscale != nil && resourcesForMonitorDownscale == s.monitor.deniedDownscale.requested { - if wait := now.Sub(s.monitor.deniedDownscale.at); wait >= s.config.MonitorDeniedDownscaleCooldown { - requiredWait = util.Min(requiredWait, wait) - } + return nil, nil + } + + // Can't make another request if we failed too recently: + if s.monitor.upscaleFailureAt != nil { + timeUntilFailureBackoffExpires := s.monitor.upscaleFailureAt.Add(s.config.MonitorRetryWait).Sub(now) + if timeUntilFailureBackoffExpires > 0 { + s.config.Warn("Wanted to send vm-monitor upscale request, but failed too recently") + return nil, &timeUntilFailureBackoffExpires } } - // If we're waiting on anything, add the action. - if requiredWait != maximumDuration { - actions.Wait = &ActionWait{Duration: requiredWait} + // Otherwise, we can make the request: + return &ActionMonitorUpscale{ + Current: *s.monitor.approved, + Target: requestResources, + }, nil +} + +func (s *State) calculateMonitorDownscaleAction( + now time.Time, + desiredResources api.Resources, + plannedUpscaleRequest bool, +) (*ActionMonitorDownscale, *time.Duration) { + // can't do anything if we don't have an active connection to the vm-monitor + if !s.monitor.active() { + if desiredResources.HasFieldLessThan(s.vm.Using()) { + s.config.Warn("Wanted to send vm-monitor downscale request, but there's no active connection") + } + return nil, nil } - return actions + requestResources := s.clampResources( + *s.monitor.approved, // current: what the monitor is already aware of + desiredResources, // target: what we'd like the VM to be using + desiredResources, // lower bound: any decrease is fine + *s.monitor.approved, // upper bound: don't increase (this is only downscaling!) + ) + + // Check validity of the request that we would send, before sending it + if requestResources.HasFieldGreaterThan(*s.monitor.approved) { + panic(fmt.Errorf( + "resources for vm-monitor downscaling are greater than what was last approved: %+v has field greater than %+v", + requestResources, + *s.monitor.approved, + )) + } + + wantToDoRequest := requestResources != *s.monitor.approved + if !wantToDoRequest { + return nil, nil + } + + // Can't make another request if there's already one ongoing (or if an upscaling request is + // planned) + if plannedUpscaleRequest { + s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on other planned upscale request") + return nil, nil + } else if s.monitor.ongoingRequest != nil { + var requestDescription string + if s.monitor.ongoingRequest.kind == monitorRequestKindDownscale && s.monitor.ongoingRequest.requested != requestResources { + requestDescription = "downscale request (for different resources)" + } else if s.monitor.ongoingRequest.kind == monitorRequestKindUpscale { + requestDescription = "upscale request" + } + + if requestDescription != "" { + s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on other ongoing %s", requestDescription) + } + return nil, nil + } + + // Can't make another request if we failed too recently: + if s.monitor.upscaleFailureAt != nil { + timeUntilFailureBackoffExpires := now.Sub(*s.monitor.downscaleFailureAt) + if timeUntilFailureBackoffExpires > 0 { + s.config.Warn("Wanted to send vm-monitor downscale request but failed too recently") + return nil, &timeUntilFailureBackoffExpires + } + } + + // Can't make another request if a recent request for resources less than or equal to the + // proposed request was denied. In general though, this should be handled by + // DesiredResourcesFromMetricsOrRequestedUpscaling, so it's we're better off panicking here. + if s.monitor.deniedDownscale != nil && !s.monitor.deniedDownscale.requested.HasFieldLessThan(requestResources) { + panic(errors.New( + "Wanted to send vm-monitor downscale request, but too soon after previously denied downscaling that should have been handled earlier", + )) + } + + // Nothing else to check, we're good to make the request + return &ActionMonitorDownscale{ + Current: *s.monitor.approved, + Target: requestResources, + }, nil } func (s *State) scalingConfig() api.ScalingConfig { @@ -398,7 +549,7 @@ func (s *State) scalingConfig() api.ScalingConfig { } } -func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) api.Resources { +func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) (api.Resources, *time.Duration) { // There's some annoying edge cases that this function has to be able to handle properly. For // the sake of completeness, they are: // @@ -426,7 +577,7 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) a // if we don't know what the compute unit is, don't do anything. if s.plugin.computeUnit == nil { s.config.Warn("Can't determine desired resources because compute unit hasn't been set yet") - return s.vm.Using() + return s.vm.Using(), nil } var goalCU uint32 @@ -453,9 +604,17 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) a // Update goalCU based on any requested upscaling or downscaling that was previously denied goalCU = util.Max(goalCU, s.requiredCUForRequestedUpscaling(*s.plugin.computeUnit)) - deniedDownscaleInEffect := s.deniedDownscaleInEffect(now) + + var deniedDownscaleAffectedResult bool + + timeUntilDeniedDownscaleExpired := s.timeUntilDeniedDownscaleExpired(now) + deniedDownscaleInEffect := timeUntilDeniedDownscaleExpired > 0 if deniedDownscaleInEffect { - goalCU = util.Max(goalCU, s.requiredCUForDeniedDownscale(*s.plugin.computeUnit, s.monitor.deniedDownscale.requested)) + reqCU := s.requiredCUForDeniedDownscale(*s.plugin.computeUnit, s.monitor.deniedDownscale.requested) + if reqCU > goalCU { + deniedDownscaleAffectedResult = true + goalCU = reqCU + } } // resources for the desired "goal" compute units @@ -487,14 +646,18 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) a // would have been factored into goalCU, affecting goalResources. Hence, the warning. s.config.Warn("Can't decrease desired resources to within VM maximum because of vm-monitor previously denied downscale request") } + preMaxResult := result result = result.Max(s.minRequiredResourcesForDeniedDownscale(*s.plugin.computeUnit, *s.monitor.deniedDownscale)) + if result != preMaxResult { + deniedDownscaleAffectedResult = true + } } // Check that the result is sound. // // With the current (naive) implementation, this is trivially ok. In future versions, it might // not be so simple, so it's good to have this integrity check here. - if !deniedDownscaleInEffect && result.HasFieldGreaterThan(s.vm.Max()) { + if !deniedDownscaleAffectedResult && result.HasFieldGreaterThan(s.vm.Max()) { panic(fmt.Errorf( "produced invalid desired state: result has field greater than max. this = %+v", *s, )) @@ -504,7 +667,12 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) a )) } - return result + var waitTime *time.Duration + if deniedDownscaleAffectedResult { + waitTime = &timeUntilDeniedDownscaleExpired + } + + return result, waitTime } // NB: we could just use s.plugin.computeUnit, but that's sometimes nil. This way, it's clear that @@ -529,10 +697,12 @@ func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources) uint3 return required } -func (s *State) deniedDownscaleInEffect(now time.Time) bool { - return s.monitor.deniedDownscale != nil && - // Previous denied downscaling attempts are in effect until the cooldown expires - now.Before(s.monitor.deniedDownscale.at.Add(s.config.MonitorDeniedDownscaleCooldown)) +func (s *State) timeUntilDeniedDownscaleExpired(now time.Time) time.Duration { + if s.monitor.deniedDownscale != nil { + return s.monitor.deniedDownscale.at.Add(s.config.MonitorDeniedDownscaleCooldown).Sub(now) + } else { + return 0 + } } // NB: like requiredCUForRequestedUpscaling, we make the caller provide the values so that it's @@ -546,38 +716,61 @@ func (s *State) requiredCUForDeniedDownscale(computeUnit, deniedResources api.Re } func (s *State) minRequiredResourcesForDeniedDownscale(computeUnit api.Resources, denied deniedDownscale) api.Resources { - var res api.Resources - - if denied.requested.VCPU < denied.current.VCPU { - // increase the value by one CU's worth - res.VCPU = computeUnit.VCPU * vmapi.MilliCPU(1+uint32(denied.requested.VCPU/computeUnit.VCPU)) + // for each resource, increase the value by one CU's worth, but not greater than the value we + // were at while attempting to downscale. + // + // phrasing it like this cleanly handles some subtle edge cases when denied.current isn't a + // multiple of the compute unit. + // FIXME: add test + return api.Resources{ + VCPU: util.Min(denied.current.VCPU, computeUnit.VCPU*vmapi.MilliCPU(1+uint32(denied.requested.VCPU/computeUnit.VCPU))), + Mem: util.Min(denied.current.Mem, computeUnit.Mem*(1+uint16(denied.requested.Mem/computeUnit.Mem))), + } +} + +// clampResources uses the directionality of the difference between s.vm.Using() and desired to +// clamp the desired resources with the uppper *or* lower bound +func (s *State) clampResources( + current api.Resources, + desired api.Resources, + lowerBound api.Resources, + upperBound api.Resources, +) api.Resources { + var cpu vmapi.MilliCPU + if desired.VCPU < current.VCPU { + cpu = util.Max(desired.VCPU, lowerBound.VCPU) + } else if desired.VCPU > current.VCPU { + cpu = util.Min(desired.VCPU, upperBound.VCPU) + } else { + cpu = current.VCPU } - if denied.requested.Mem < denied.current.Mem { - res.Mem = computeUnit.Mem * (1 + uint16(denied.requested.Mem/computeUnit.Mem)) + var mem uint16 + if desired.Mem < current.Mem { + mem = util.Max(desired.Mem, lowerBound.Mem) + } else if desired.Mem > current.Mem { + mem = util.Min(desired.Mem, upperBound.Mem) + } else { + mem = current.Mem } - return res + return api.Resources{VCPU: cpu, Mem: mem} } -func (s *State) boundResourcesByMonitorApproved(resources api.Resources) api.Resources { - var lowerBound api.Resources +func (s *State) monitorApprovedLowerBound() api.Resources { if s.monitor.approved != nil { - lowerBound = *s.monitor.approved + return *s.monitor.approved } else { - lowerBound = s.vm.Using() + return s.vm.Using() } - return resources.Max(lowerBound) } -func (s *State) boundResourcesByPluginApproved(resources api.Resources) api.Resources { - var upperBound api.Resources +func (s *State) pluginApprovedUpperBound() api.Resources { if s.plugin.permit != nil { - upperBound = *s.plugin.permit + return *s.plugin.permit } else { - upperBound = s.vm.Using() + return s.vm.Using() // FIXME: this isn't quite correct; this wouldn't allow down-then-upscale without the scheduler. } - return resources.Min(upperBound) } ////////////////////////////////////////// @@ -678,7 +871,6 @@ func (s *State) Monitor() MonitorHandle { func (h MonitorHandle) Reset() { h.s.monitor = monitorState{ - active: false, ongoingRequest: nil, requestedUpscale: nil, deniedDownscale: nil, @@ -689,7 +881,12 @@ func (h MonitorHandle) Reset() { } func (h MonitorHandle) Active(active bool) { - h.s.monitor.active = active + if active { + approved := h.s.vm.Using() + h.s.monitor.approved = &approved // TODO: this is racy + } else { + h.s.monitor.approved = nil + } } func (h MonitorHandle) UpscaleRequested(now time.Time, resources api.MoreResources) { @@ -700,14 +897,17 @@ func (h MonitorHandle) UpscaleRequested(now time.Time, resources api.MoreResourc } } -func (h MonitorHandle) StartingUpscaleRequest(now time.Time) { - h.s.monitor.ongoingRequest = &ongoingMonitorRequest{kind: monitorRequestKindUpscale} +func (h MonitorHandle) StartingUpscaleRequest(now time.Time, resources api.Resources) { + h.s.monitor.ongoingRequest = &ongoingMonitorRequest{ + kind: monitorRequestKindUpscale, + requested: resources, + } h.s.monitor.upscaleFailureAt = nil } -func (h MonitorHandle) UpscaleRequestSuccessful(now time.Time, resources api.Resources) { +func (h MonitorHandle) UpscaleRequestSuccessful(now time.Time) { + h.s.monitor.approved = &h.s.monitor.ongoingRequest.requested h.s.monitor.ongoingRequest = nil - h.s.monitor.approved = &resources } func (h MonitorHandle) UpscaleRequestFailed(now time.Time) { @@ -715,25 +915,28 @@ func (h MonitorHandle) UpscaleRequestFailed(now time.Time) { h.s.monitor.upscaleFailureAt = &now } -func (h MonitorHandle) StartingDownscaleRequest(now time.Time) { - h.s.monitor.ongoingRequest = &ongoingMonitorRequest{kind: monitorRequestKindDownscale} +func (h MonitorHandle) StartingDownscaleRequest(now time.Time, resources api.Resources) { + h.s.monitor.ongoingRequest = &ongoingMonitorRequest{ + kind: monitorRequestKindDownscale, + requested: resources, + } h.s.monitor.downscaleFailureAt = nil } -func (h MonitorHandle) DownscaleRequestAllowed(now time.Time, requested api.Resources) { +func (h MonitorHandle) DownscaleRequestAllowed(now time.Time) { + h.s.monitor.approved = &h.s.monitor.ongoingRequest.requested h.s.monitor.ongoingRequest = nil - h.s.monitor.approved = &requested h.s.monitor.deniedDownscale = nil } // Downscale request was successful but the monitor denied our request. -func (h MonitorHandle) DownscaleRequestDenied(now time.Time, current, requested api.Resources) { - h.s.monitor.ongoingRequest = nil +func (h MonitorHandle) DownscaleRequestDenied(now time.Time) { h.s.monitor.deniedDownscale = &deniedDownscale{ at: now, - current: current, - requested: requested, + current: *h.s.monitor.approved, + requested: h.s.monitor.ongoingRequest.requested, } + h.s.monitor.ongoingRequest = nil } func (h MonitorHandle) DownscaleRequestFailed(now time.Time) { diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 8117314aa..8d1795372 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -112,6 +112,7 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { }, // these don't really matter, because we're not using (*State).NextActions() PluginRequestTick: time.Second, + PluginDeniedRetryWait: time.Second, MonitorDeniedDownscaleCooldown: time.Second, MonitorRetryWait: time.Second, Warn: func(format string, args ...any) { @@ -145,11 +146,11 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { if c.deniedDownscale != nil { state.Monitor().Reset() state.Monitor().Active(true) - state.Monitor().StartingDownscaleRequest(now) - state.Monitor().DownscaleRequestDenied(now, c.vmUsing, *c.deniedDownscale) + state.Monitor().StartingDownscaleRequest(now, *c.deniedDownscale) + state.Monitor().DownscaleRequestDenied(now) } - actual := state.DesiredResourcesFromMetricsOrRequestedUpscaling(now) + actual, _ := state.DesiredResourcesFromMetricsOrRequestedUpscaling(now) if actual != c.expected { t.Errorf("expected output %+v but got %+v", c.expected, actual) } @@ -239,8 +240,9 @@ func createInitialState(opts ...initialStateOpt) *core.State { MemoryUsageFractionTarget: 0.5, }, PluginRequestTick: 5 * time.Second, + PluginDeniedRetryWait: 2 * time.Second, MonitorDeniedDownscaleCooldown: 5 * time.Second, - MonitorRetryWait: 5 * time.Second, + MonitorRetryWait: 3 * time.Second, Warn: func(string, ...any) {}, } @@ -293,7 +295,7 @@ func Test_NextActions(t *testing.T) { } // Thorough checks of a relatively simple flow - t.Run("BasicScaleupFlow", func(t *testing.T) { + t.Run("BasicScaleupAndDownFlow", func(t *testing.T) { warnings := []string{} clock := newFakeClock() state := createInitialState( @@ -316,18 +318,17 @@ func Test_NextActions(t *testing.T) { MemoryUsageBytes: 0.0, } state.UpdateMetrics(metrics) - + require.Equal(t, clock.elapsed(), 2*hundredMillis) // double-check that we agree about the desired resources - require.Equal( - t, - api.Resources{VCPU: 500, Mem: 2}, - state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.now), - ) + desiredResources, _ := state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.now) + require.Equal(t, api.Resources{VCPU: 500, Mem: 2}, desiredResources) + require.Empty(t, warnings) // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. actions := state.NextActions(clock.now) require.Equal(t, core.ActionSet{ + Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, @@ -337,7 +338,6 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - Wait: nil, // and, don't need to wait because plugin should be ongoing }, actions) require.Empty(t, warnings) // start the request: @@ -358,11 +358,17 @@ func Test_NextActions(t *testing.T) { ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, })) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 2*hundredMillis) + require.Equal(t, clock.elapsed(), 3*hundredMillis) // Scheduler approval is done, now we should be making the request to NeonVM actions = state.NextActions(clock.now) require.Equal(t, core.ActionSet{ + // expected to make a scheduler request every 5s; it's been 100ms since the last one, so + // if the NeonVM request didn't come back in time, we'd need to get woken up to start + // the next scheduler request. + Wait: &core.ActionWait{ + Duration: 5*time.Second - hundredMillis, + }, PluginRequest: nil, NeonVMRequest: &core.ActionNeonVMRequest{ Current: api.Resources{VCPU: 250, Mem: 1}, @@ -370,7 +376,6 @@ func Test_NextActions(t *testing.T) { }, MonitorDownscale: nil, MonitorUpscale: nil, - Wait: nil, // don't need to wait because NeonVM should be ongoing }, actions) require.Empty(t, warnings) // start the request: @@ -378,7 +383,9 @@ func Test_NextActions(t *testing.T) { clock.inc(hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back require.Equal(t, core.ActionSet{ - Wait: nil, + Wait: &core.ActionWait{ + Duration: 5*time.Second - 2*hundredMillis, + }, PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, @@ -387,11 +394,14 @@ func Test_NextActions(t *testing.T) { require.Empty(t, warnings) state.NeonVM().RequestSuccessful(clock.now) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 3*hundredMillis) + require.Equal(t, clock.elapsed(), 4*hundredMillis) // NeonVM change is done, now we should finish by notifying the vm-monitor actions = state.NextActions(clock.now) require.Equal(t, core.ActionSet{ + Wait: &core.ActionWait{ + Duration: 5*time.Second - 2*hundredMillis, // same as previous, clock hasn't changed + }, PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, @@ -399,23 +409,166 @@ func Test_NextActions(t *testing.T) { Current: api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, }, - Wait: nil, // don't need to wait because monitor request should be ongoing }, actions) require.Empty(t, warnings) // start the request: - state.Monitor().StartingUpscaleRequest(clock.now) + state.Monitor().StartingUpscaleRequest(clock.now, actions.MonitorUpscale.Target) clock.inc(hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back require.Equal(t, core.ActionSet{ - Wait: nil, + Wait: &core.ActionWait{ + Duration: 5*time.Second - 3*hundredMillis, + }, PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, }, state.NextActions(clock.now)) require.Empty(t, warnings) - state.Monitor().UpscaleRequestSuccessful(clock.now, actions.MonitorUpscale.Target) + state.Monitor().UpscaleRequestSuccessful(clock.now) + require.Empty(t, warnings) + require.Equal(t, clock.elapsed(), 5*hundredMillis) + + // And now, double-check that there's no sneaky follow-up actions before we change the + // metrics + actions = state.NextActions(clock.now) + require.Equal(t, core.ActionSet{ + Wait: &core.ActionWait{ + Duration: 5*time.Second - 3*hundredMillis, // same as previous, clock hasn't changed + }, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, actions) + require.Empty(t, warnings) + + // ---- Scaledown !!! ---- + + clock.inc(hundredMillis) + require.Equal(t, clock.elapsed(), 6*hundredMillis) + + // Set metrics back so that desired resources should now be zero + metrics = api.Metrics{ + LoadAverage1Min: 0.0, + LoadAverage5Min: 0.0, // unused + MemoryUsageBytes: 0.0, + } + state.UpdateMetrics(metrics) + // double-check that we agree about the new desired resources + desiredResources, _ = state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.now) + require.Equal(t, api.Resources{VCPU: 250, Mem: 1}, desiredResources) + require.Empty(t, warnings) + + // First step in downscaling is getting approval from the vm-monitor: + actions = state.NextActions(clock.now) + require.Equal(t, core.ActionSet{ + Wait: &core.ActionWait{ + Duration: 5*time.Second - 4*hundredMillis, + }, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: api.Resources{VCPU: 500, Mem: 2}, + Target: api.Resources{VCPU: 250, Mem: 1}, + }, + MonitorUpscale: nil, + }, actions) + require.Empty(t, warnings) + state.Monitor().StartingDownscaleRequest(clock.now, actions.MonitorDownscale.Target) + clock.inc(hundredMillis) + // should have nothing more to do; waiting on vm-monitor request to come back + require.Equal(t, core.ActionSet{ + Wait: &core.ActionWait{ + Duration: 5*time.Second - 5*hundredMillis, + }, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, state.NextActions(clock.now)) + require.Empty(t, warnings) + state.Monitor().DownscaleRequestAllowed(clock.now) + require.Empty(t, warnings) + require.Equal(t, clock.elapsed(), 7*hundredMillis) + + // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out + actions = state.NextActions(clock.now) + require.Equal(t, core.ActionSet{ + Wait: &core.ActionWait{ + Duration: 5*time.Second - 5*hundredMillis, // same as previous, clock hasn't changed + }, + PluginRequest: nil, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: api.Resources{VCPU: 500, Mem: 2}, + Target: api.Resources{VCPU: 250, Mem: 1}, + }, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, actions) + require.Empty(t, warnings) + state.NeonVM().StartingRequest(clock.now, actions.NeonVMRequest.Target) + clock.inc(hundredMillis) + // should have nothing more to do; waiting on NeonVM request to come back + require.Equal(t, core.ActionSet{ + Wait: &core.ActionWait{ + Duration: 5*time.Second - 6*hundredMillis, + }, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, state.NextActions(clock.now)) + require.Empty(t, warnings) + state.NeonVM().RequestSuccessful(clock.now) + require.Empty(t, warnings) + require.Equal(t, clock.elapsed(), 8*hundredMillis) + + // Request to NeonVM completed, it's time to inform the scheduler plugin: + actions = state.NextActions(clock.now) + require.Equal(t, core.ActionSet{ + Wait: nil, + PluginRequest: &core.ActionPluginRequest{ + LastPermit: &api.Resources{VCPU: 500, Mem: 2}, + Target: api.Resources{VCPU: 250, Mem: 1}, + Metrics: &metrics, + }, + // shouldn't have anything to say to the other components + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, actions) + require.Empty(t, warnings) + state.Plugin().StartingRequest(clock.now, actions.PluginRequest.Target) + clock.inc(hundredMillis) + // should have nothing more to do; waiting on plugin request to come back + require.Equal(t, core.ActionSet{ + Wait: nil, // and don't need to wait, because plugin req is ongoing + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, state.NextActions(clock.now)) + require.Empty(t, warnings) + require.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + Permit: api.Resources{VCPU: 250, Mem: 1}, + Migrate: nil, + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + })) + require.Empty(t, warnings) + require.Equal(t, clock.elapsed(), 9*hundredMillis) + + // Finally, check there's no leftover actions: + actions = state.NextActions(clock.now) + require.Equal(t, core.ActionSet{ + Wait: &core.ActionWait{ + Duration: 5*time.Second - hundredMillis, // request that just finished was started 100ms ago + }, + PluginRequest: nil, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }, actions) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 4*hundredMillis) }) } diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 3c1450786..047dbc6f5 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -97,7 +97,7 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge c.update(func(state *core.State) { logger.Info("Starting vm-monitor downscale request", zap.Any("action", action)) startTime = time.Now() - state.Monitor().StartingDownscaleRequest(startTime) + state.Monitor().StartingDownscaleRequest(startTime, action.Target) }) result, err := doSingleMonitorDownscaleRequest(ctx, ifaceLogger, monitor, action) @@ -124,12 +124,12 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge if !result.Ok { logger.Warn("vm-monitor denied downscale", logFields...) if unchanged { - state.Monitor().DownscaleRequestDenied(endTime, action.Current, action.Target) + state.Monitor().DownscaleRequestDenied(endTime) } } else { logger.Info("vm-monitor approved downscale", logFields...) if unchanged { - state.Monitor().DownscaleRequestAllowed(endTime, action.Target) + state.Monitor().DownscaleRequestAllowed(endTime) } } }) @@ -222,7 +222,7 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger c.update(func(state *core.State) { logger.Info("Starting vm-monitor upscale request", zap.Any("action", action)) startTime = time.Now() - state.Monitor().StartingUpscaleRequest(startTime) + state.Monitor().StartingUpscaleRequest(startTime, action.Target) }) err := doSingleMonitorUpscaleRequest(ctx, ifaceLogger, monitor, action) @@ -246,7 +246,7 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger logger.Info("vm-monitor upscale request successful", logFields...) if unchanged { - state.Monitor().UpscaleRequestSuccessful(endTime, action.Target) + state.Monitor().UpscaleRequestSuccessful(endTime) } }) } diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index bcf5b7670..f1d457350 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -77,6 +77,7 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * // Try to acquire the request lock, but if something happens while we're waiting, we'll // abort & retry on the next loop iteration (or maybe not, if last.actions changed). + // FIXME: remove request lock select { case <-ctx.Done(): return diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index f463f92bb..09ba8c3ad 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -314,6 +314,7 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util executorCore := executor.NewExecutorCore(coreExecLogger.Named("state"), r.vm, executor.Config{ DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), + PluginDeniedRetryWait: time.Second * time.Duration(r.global.config.Scheduler.RetryDeniedUpscaleSeconds), MonitorDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Monitor.RetryDeniedDownscaleSeconds), MonitorRetryWait: time.Second * time.Duration(r.global.config.Monitor.RetryFailedRequestSeconds), Warn: func(msg string, args ...any) { From 42929b68076e7b4bfed1dc750e515db147d35267 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 11:39:53 -0700 Subject: [PATCH 15/59] agent/executor: Remove reqeust lock usage --- pkg/agent/execbridge.go | 32 ++++------------- pkg/agent/executor/exec_monitor.go | 57 ------------------------------ pkg/agent/executor/exec_neonvm.go | 25 ------------- pkg/agent/executor/exec_plugin.go | 26 -------------- 4 files changed, 7 insertions(+), 133 deletions(-) diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go index 50d56a64c..0761a3dd1 100644 --- a/pkg/agent/execbridge.go +++ b/pkg/agent/execbridge.go @@ -12,7 +12,6 @@ import ( "github.com/neondatabase/autoscaling/pkg/agent/executor" "github.com/neondatabase/autoscaling/pkg/api" - "github.com/neondatabase/autoscaling/pkg/util" ) var ( @@ -39,11 +38,6 @@ func (iface *execPluginInterface) EmptyID() string { return "" } -// RequestLock implements executor.PluginInterface -func (iface *execPluginInterface) RequestLock() util.ChanMutex { - return iface.runner.requestLock -} - // GetHandle implements executor.PluginInterface func (iface *execPluginInterface) GetHandle() executor.PluginHandle { scheduler := iface.runner.scheduler.Load() @@ -101,11 +95,6 @@ func makeNeonVMInterface(r *Runner) *execNeonVMInterface { return &execNeonVMInterface{runner: r} } -// RequestLock implements executor.NeonVMInterface -func (iface *execNeonVMInterface) RequestLock() util.ChanMutex { - return iface.runner.requestLock -} - // Request implements executor.NeonVMInterface func (iface *execNeonVMInterface) Request(ctx context.Context, logger *zap.Logger, current, target api.Resources) error { iface.runner.recordResourceChange(current, target, iface.runner.global.metrics.neonvmRequestedChange) @@ -123,13 +112,12 @@ func (iface *execNeonVMInterface) Request(ctx context.Context, logger *zap.Logge //////////////////////////////////////////////////// type execMonitorInterface struct { - runner *Runner - core *executor.ExecutorCore - requestLock util.ChanMutex + runner *Runner + core *executor.ExecutorCore } func makeMonitorInterface(r *Runner, core *executor.ExecutorCore) *execMonitorInterface { - return &execMonitorInterface{runner: r, core: core, requestLock: util.NewChanMutex()} + return &execMonitorInterface{runner: r, core: core} } // EmptyID implements executor.MonitorInterface @@ -145,26 +133,20 @@ func (iface *execMonitorInterface) GetHandle() executor.MonitorHandle { } return &execMonitorHandle{ - runner: iface.runner, - dispatcher: dispatcher, - requestLock: iface.requestLock, + runner: iface.runner, + dispatcher: dispatcher, } } type execMonitorHandle struct { - runner *Runner - dispatcher *Dispatcher - requestLock util.ChanMutex + runner *Runner + dispatcher *Dispatcher } func (h *execMonitorHandle) ID() string { panic("todo") } -func (h *execMonitorHandle) RequestLock() util.ChanMutex { - return h.requestLock -} - func (h *execMonitorHandle) Downscale( ctx context.Context, logger *zap.Logger, diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 047dbc6f5..9c0ee9817 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -19,7 +19,6 @@ type MonitorInterface interface { type MonitorHandle interface { ID() string - RequestLock() util.ChanMutex Downscale(_ context.Context, _ *zap.Logger, current, target api.Resources) (*api.DownscaleResult, error) Upscale(_ context.Context, _ *zap.Logger, current, target api.Resources) error } @@ -27,19 +26,9 @@ type MonitorHandle interface { func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logger *zap.Logger) { var ( updates util.BroadcastReceiver = c.updates.NewReceiver() - requestLock util.ChanMutex = util.NewChanMutex() ifaceLogger *zap.Logger = logger.Named("client") ) - holdingRequestLock := false - releaseRequestLockIfHolding := func() { - if holdingRequestLock { - requestLock.Unlock() - holdingRequestLock = false - } - } - defer releaseRequestLockIfHolding() - // meant to be called while holding c's lock idUnchanged := func(current string) bool { if h := c.clients.Monitor.GetHandle(); h != nil { @@ -51,8 +40,6 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge last := c.getActions() for { - releaseRequestLockIfHolding() - // Always receive an update if there is one. This helps with reliability (better guarantees // about not missing updates) and means that the switch statements can be simpler. select { @@ -77,22 +64,6 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge monitor := c.clients.Monitor.GetHandle() - if monitor != nil { - requestLock = monitor.RequestLock() - - // Try to acquire the request lock, but if something happens while we're waiting, we'll - // abort & retry on the next loop iteration (or maybe not, if last.actions changed). - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - case <-requestLock.WaitLock(): - holdingRequestLock = true - } - } - var startTime time.Time c.update(func(state *core.State) { logger.Info("Starting vm-monitor downscale request", zap.Any("action", action)) @@ -152,19 +123,9 @@ func doSingleMonitorDownscaleRequest( func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger *zap.Logger) { var ( updates util.BroadcastReceiver = c.updates.NewReceiver() - requestLock util.ChanMutex = util.NewChanMutex() ifaceLogger *zap.Logger = logger.Named("client") ) - holdingRequestLock := false - releaseRequestLockIfHolding := func() { - if holdingRequestLock { - requestLock.Unlock() - holdingRequestLock = false - } - } - defer releaseRequestLockIfHolding() - // meant to be called while holding c's lock idUnchanged := func(current string) bool { if h := c.clients.Monitor.GetHandle(); h != nil { @@ -176,8 +137,6 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger last := c.getActions() for { - releaseRequestLockIfHolding() - // Always receive an update if there is one. This helps with reliability (better guarantees // about not missing updates) and means that the switch statements can be simpler. select { @@ -202,22 +161,6 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger monitor := c.clients.Monitor.GetHandle() - if monitor != nil { - requestLock = monitor.RequestLock() - - // Try to acquire the request lock, but if something happens while we're waiting, we'll - // abort & retry on the next loop iteration (or maybe not, if last.actions changed). - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - case <-requestLock.WaitLock(): - holdingRequestLock = true - } - } - var startTime time.Time c.update(func(state *core.State) { logger.Info("Starting vm-monitor upscale request", zap.Any("action", action)) diff --git a/pkg/agent/executor/exec_neonvm.go b/pkg/agent/executor/exec_neonvm.go index 62f89d988..2479b8f34 100644 --- a/pkg/agent/executor/exec_neonvm.go +++ b/pkg/agent/executor/exec_neonvm.go @@ -12,30 +12,17 @@ import ( ) type NeonVMInterface interface { - RequestLock() util.ChanMutex Request(_ context.Context, _ *zap.Logger, current, target api.Resources) error } func (c *ExecutorCoreWithClients) DoNeonVMRequests(ctx context.Context, logger *zap.Logger) { var ( updates util.BroadcastReceiver = c.updates.NewReceiver() - requestLock util.ChanMutex = c.clients.NeonVM.RequestLock() ifaceLogger *zap.Logger = logger.Named("client") ) - holdingRequestLock := false - releaseRequestLockIfHolding := func() { - if holdingRequestLock { - requestLock.Unlock() - holdingRequestLock = false - } - } - defer releaseRequestLockIfHolding() - last := c.getActions() for { - releaseRequestLockIfHolding() - // Always receive an update if there is one. This helps with reliability (better guarantees // about not missing updates) and means that the switch statements can be simpler. select { @@ -58,18 +45,6 @@ func (c *ExecutorCoreWithClients) DoNeonVMRequests(ctx context.Context, logger * action := *last.actions.NeonVMRequest - // Try to acquire the request lock, but if something happens while we're waiting, we'll - // abort & retry on the next loop iteration (or maybe not, if last.actions changed). - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - case <-requestLock.WaitLock(): - holdingRequestLock = true - } - var startTime time.Time c.update(func(state *core.State) { logger.Info("Starting NeonVM request", zap.Any("action", action)) diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index f1d457350..4f2f4826d 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -14,7 +14,6 @@ import ( type PluginInterface interface { EmptyID() string - RequestLock() util.ChanMutex GetHandle() PluginHandle } @@ -26,19 +25,9 @@ type PluginHandle interface { func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger *zap.Logger) { var ( updates util.BroadcastReceiver = c.updates.NewReceiver() - requestLock util.ChanMutex = c.clients.Plugin.RequestLock() ifaceLogger *zap.Logger = logger.Named("client") ) - holdingRequestLock := false - releaseRequestLockIfHolding := func() { - if holdingRequestLock { - requestLock.Unlock() - holdingRequestLock = false - } - } - defer releaseRequestLockIfHolding() - idUnchanged := func(current string) bool { if h := c.clients.Plugin.GetHandle(); h != nil { return current == h.ID() @@ -49,8 +38,6 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * last := c.getActions() for { - releaseRequestLockIfHolding() - // Always receive an update if there is one. This helps with reliability (better guarantees // about not missing updates) and means that the switch statements can be simpler. select { @@ -75,19 +62,6 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * pluginIface := c.clients.Plugin.GetHandle() - // Try to acquire the request lock, but if something happens while we're waiting, we'll - // abort & retry on the next loop iteration (or maybe not, if last.actions changed). - // FIXME: remove request lock - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - case <-requestLock.WaitLock(): - holdingRequestLock = true - } - // update the state to indicate that the request is starting. var startTime time.Time c.update(func(state *core.State) { From 6bfb7ceca2616af4c818925cf729ac526c98d791 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 11:40:44 -0700 Subject: [PATCH 16/59] agent/executor: Simplify request-if-iface-non-nil logic --- pkg/agent/executor/exec_monitor.go | 42 ++++++++++-------------------- pkg/agent/executor/exec_plugin.go | 22 ++++++---------- 2 files changed, 22 insertions(+), 42 deletions(-) diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 9c0ee9817..3ddf3c3fe 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -71,7 +71,14 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge state.Monitor().StartingDownscaleRequest(startTime, action.Target) }) - result, err := doSingleMonitorDownscaleRequest(ctx, ifaceLogger, monitor, action) + var result *api.DownscaleResult + var err error + + if monitor != nil { + result, err = monitor.Downscale(ctx, ifaceLogger, action.Current, action.Target) + } else { + err = errors.New("No currently active vm-monitor connection") + } endTime := time.Now() c.update(func(state *core.State) { @@ -107,19 +114,6 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge } } -func doSingleMonitorDownscaleRequest( - ctx context.Context, - logger *zap.Logger, - iface MonitorHandle, - action core.ActionMonitorDownscale, -) (*api.DownscaleResult, error) { - if iface == nil { - return nil, errors.New("No currently active vm-monitor connection") - } - - return iface.Downscale(ctx, logger, action.Current, action.Target) -} - func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger *zap.Logger) { var ( updates util.BroadcastReceiver = c.updates.NewReceiver() @@ -168,7 +162,12 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger state.Monitor().StartingUpscaleRequest(startTime, action.Target) }) - err := doSingleMonitorUpscaleRequest(ctx, ifaceLogger, monitor, action) + var err error + if monitor != nil { + err = monitor.Upscale(ctx, ifaceLogger, action.Current, action.Target) + } else { + err = errors.New("No currently active vm-monitor connection") + } endTime := time.Now() c.update(func(state *core.State) { @@ -194,16 +193,3 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger }) } } - -func doSingleMonitorUpscaleRequest( - ctx context.Context, - logger *zap.Logger, - iface MonitorHandle, - action core.ActionMonitorUpscale, -) error { - if iface == nil { - return errors.New("No currently active vm-monitor connection") - } - - return iface.Upscale(ctx, logger, action.Current, action.Target) -} diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index 4f2f4826d..735b9b235 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -70,7 +70,14 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * state.Plugin().StartingRequest(startTime, action.Target) }) - resp, err := doSinglePluginRequest(ctx, ifaceLogger, pluginIface, action) + var resp *api.PluginResponse + var err error + + if pluginIface != nil { + resp, err = pluginIface.Request(ctx, ifaceLogger, action.LastPermit, action.Target, action.Metrics) + } else { + err = errors.New("No currently enabled plugin handle") + } endTime := time.Now() c.update(func(state *core.State) { @@ -98,16 +105,3 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * }) } } - -func doSinglePluginRequest( - ctx context.Context, - logger *zap.Logger, - iface PluginHandle, - action core.ActionPluginRequest, -) (*api.PluginResponse, error) { - if iface == nil { - return nil, errors.New("No currently enabled plugin handle") - } - - return iface.Request(ctx, logger, action.LastPermit, action.Target, action.Metrics) -} From 6acf619f029994e4f69f0de49604cfee413f1595 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 11:47:53 -0700 Subject: [PATCH 17/59] agent: fix unimplemented (*execMonitorHandle).ID() --- pkg/agent/dispatcher.go | 10 ++++++++++ pkg/agent/execbridge.go | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pkg/agent/dispatcher.go b/pkg/agent/dispatcher.go index ac61f2567..682864936 100644 --- a/pkg/agent/dispatcher.go +++ b/pkg/agent/dispatcher.go @@ -12,6 +12,7 @@ import ( "sync/atomic" "time" + "github.com/google/uuid" "go.uber.org/zap" "nhooyr.io/websocket" "nhooyr.io/websocket/wsjson" @@ -39,6 +40,8 @@ type MonitorResult struct { // The Dispatcher is the main object managing the websocket connection to the // monitor. For more information on the protocol, see pkg/api/types.go type Dispatcher struct { + uniqueID string + // The underlying connection we are managing conn *websocket.Conn @@ -102,6 +105,7 @@ func NewDispatcher( } disp := &Dispatcher{ + uniqueID: uuid.NewString(), conn: conn, waiters: make(map[uint64]util.SignalSender[waiterResult]), runner: runner, @@ -256,6 +260,12 @@ func connectToMonitor( return c, &resp.Version, nil } +// UniqueID returns the unique ID assigned to this Dispatcher +// (it's a UUID) +func (disp *Dispatcher) UniqueID() string { + return disp.uniqueID +} + // ExitSignal returns a channel that is closed when the Dispatcher is no longer running func (disp *Dispatcher) ExitSignal() <-chan struct{} { return disp.exitSignal diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go index 0761a3dd1..bdc6a8d3e 100644 --- a/pkg/agent/execbridge.go +++ b/pkg/agent/execbridge.go @@ -144,7 +144,7 @@ type execMonitorHandle struct { } func (h *execMonitorHandle) ID() string { - panic("todo") + return h.dispatcher.UniqueID() } func (h *execMonitorHandle) Downscale( From f490ebddfe1687572d19c3da67284db1bec84c08 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 13:15:42 -0700 Subject: [PATCH 18/59] agent/executor: also log ActionSet returned --- pkg/agent/executor/core.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go index 81c9573ec..1aa34d552 100644 --- a/pkg/agent/executor/core.go +++ b/pkg/agent/executor/core.go @@ -73,6 +73,7 @@ func (c *ExecutorCore) getActions() timedActions { now := time.Now() c.stateLogger.Info("Recalculating ActionSet", zap.Time("now", now), zap.Any("state", c.core.Dump())) c.actions = &timedActions{calculatedAt: now, actions: c.core.NextActions(now)} + c.stateLogger.Info("New ActionSet", zap.Time("now", now), zap.Any("actions", c.actions.actions)) } return *c.actions From 2dc63d36dbbdd02ef10d237cbbab2d46d187e0e5 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 14:44:07 -0700 Subject: [PATCH 19/59] agent/core: [temporary] remove unused initialStateOpt constructors --- pkg/agent/core/state_test.go | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 8d1795372..c397d6855 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -173,27 +173,6 @@ type initialStateOpt struct { postCreate func(*api.VmInfo, *core.Config) } -func withComputeUnit(cu api.Resources) (o initialStateOpt) { - o.preCreate = func(p *initialStateParams) { p.computeUnit = cu } - return -} - -func withSizeRange(minCU, maxCU uint16) (o initialStateOpt) { - o.preCreate = func(p *initialStateParams) { - p.minCU = minCU - p.maxCU = maxCU - } - return -} - -func withVMUsing(res api.Resources) (o initialStateOpt) { - o.postCreate = func(vm *api.VmInfo, _ *core.Config) { - vm.Cpu.Use = res.VCPU - vm.Mem.Use = res.Mem - } - return -} - func withStoredWarnings(warnings *[]string) (o initialStateOpt) { o.postCreate = func(_ *api.VmInfo, config *core.Config) { config.Warn = func(format string, args ...any) { From 77bd72c351fca6b3bd9224a3a251c9dd05a9b929 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 14:55:06 -0700 Subject: [PATCH 20/59] agent: clean up unused fields, propagate VM updates --- pkg/agent/core/state.go | 4 + pkg/agent/executor/core.go | 17 +++ pkg/agent/globalstate.go | 21 ++-- pkg/agent/runner.go | 235 +++++++++---------------------------- 4 files changed, 88 insertions(+), 189 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 2abe15e6a..de3d367dc 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -782,6 +782,10 @@ func (s *State) Debug(enabled bool) { } func (s *State) UpdatedVM(vm api.VmInfo) { + // FIXME: overriding this is required right now because we trust that a successful request to + // NeonVM means the VM was already updated, which... isn't true, and otherwise we could run into + // sync issues. + vm.SetUsing(s.vm.Using()) s.vm = vm } diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go index 1aa34d552..8b60dcee7 100644 --- a/pkg/agent/executor/core.go +++ b/pkg/agent/executor/core.go @@ -90,6 +90,16 @@ func (c *ExecutorCore) update(with func(*core.State)) { with(c.core) } +// may change in the future +type StateDump = core.StateDump + +// StateDump copies and returns the current state inside the executor +func (c *ExecutorCore) StateDump() StateDump { + c.mu.Lock() + defer c.mu.Unlock() + return c.core.Dump() +} + // Updater returns a handle on the object used for making external changes to the ExecutorCore, // beyond what's provided by the various client (ish) interfaces func (c *ExecutorCore) Updater() ExecutorCoreUpdater { @@ -108,6 +118,13 @@ func (c ExecutorCoreUpdater) UpdateMetrics(metrics api.Metrics, withLock func()) }) } +func (c ExecutorCoreUpdater) UpdatedVM(vm api.VmInfo, withLock func()) { + c.core.update(func(state *core.State) { + state.UpdatedVM(vm) + withLock() + }) +} + // NewScheduler updates the inner state, calling (*core.State).Plugin().NewScheduler() func (c ExecutorCoreUpdater) NewScheduler(withLock func()) { c.core.update(func(state *core.State) { diff --git a/pkg/agent/globalstate.go b/pkg/agent/globalstate.go index 34e17996a..415a03ac5 100644 --- a/pkg/agent/globalstate.go +++ b/pkg/agent/globalstate.go @@ -345,23 +345,20 @@ func (s *agentState) loggerForRunner(vmName, podName util.NamespacedName) *zap.L // NB: caller must set Runner.status after creation func (s *agentState) newRunner(vmInfo api.VmInfo, podName util.NamespacedName, podIP string, restartCount int) *Runner { return &Runner{ - global: s, - status: nil, // set by calller - schedulerRespondedWithMigration: false, + global: s, + status: nil, // set by caller shutdown: nil, // set by (*Runner).Run - vm: vmInfo, + vmName: vmInfo.NamespacedName(), podName: podName, podIP: podIP, + memSlotSize: vmInfo.Mem.SlotSize, lock: util.NewChanMutex(), - requestLock: util.NewChanMutex(), - - lastMetrics: nil, - scheduler: atomic.Pointer[Scheduler]{}, - monitor: atomic.Pointer[Dispatcher]{}, - computeUnit: nil, - lastApproved: nil, - lastSchedulerError: nil, + + executorStateDump: nil, // set by (*Runner).Run + + scheduler: atomic.Pointer[Scheduler]{}, + monitor: atomic.Pointer[Dispatcher]{}, backgroundWorkerCount: atomic.Int64{}, backgroundPanic: make(chan error), diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index 09ba8c3ad..b04edffa1 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -47,7 +47,6 @@ import ( "bytes" "context" "encoding/json" - "errors" "fmt" "io" "net/http" @@ -58,6 +57,7 @@ import ( "go.uber.org/zap" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ktypes "k8s.io/apimachinery/pkg/types" @@ -88,45 +88,19 @@ type Runner struct { // is set exactly once, by (*Runner).Run shutdown context.CancelFunc - // vm stores some common information about the VM. - // - // This field MUST NOT be read or updated without holding lock. - vm api.VmInfo + vmName util.NamespacedName podName util.NamespacedName podIP string - // schedulerRespondedWithMigration is true iff the scheduler has returned an api.PluginResponse - // indicating that it was prompted to start migrating the VM. - // - // This field MUST NOT be updated without holding BOTH lock and requestLock. - // - // This field MAY be read while holding EITHER lock or requestLock. - schedulerRespondedWithMigration bool - - // lock guards the values of all mutable fields. The immutable fields are: - // - global - // - status - // - podName - // - podIP - // - logger - // - backgroundPanic - // lock MUST NOT be held while interacting with the network. The appropriate synchronization to - // ensure we don't send conflicting requests is provided by requestLock. - lock util.ChanMutex + memSlotSize *resource.Quantity - // requestLock must be held during any request to the scheduler plugin or any patch request to - // NeonVM. - // - // requestLock MUST NOT be held while performing any interactions with the network, apart from - // those listed above. - requestLock util.ChanMutex + // lock guards the values of all mutable fields - namely, scheduler and monitor (which may be + // read without the lock, but the lock must be acquired to lock them). + lock util.ChanMutex - // lastMetrics stores the most recent metrics we've received from the VM - // - // This field is exclusively set by the getMetricsLoop background worker, and will never change - // from non-nil to nil. The data behind each pointer is immutable, but the value of the pointer - // itself is not. - lastMetrics *api.Metrics + // executorStateDump is set by (*Runner).Run and provides a way to get the state of the + // "executor" + executorStateDump func() executor.StateDump // scheduler is the current scheduler that we're communicating with, or nil if there isn't one. // Each scheduler's info field is immutable. When a scheduler is replaced, only the pointer @@ -135,19 +109,6 @@ type Runner struct { // monitor, if non nil, stores the current Dispatcher in use for communicating with the // vm-monitor monitor atomic.Pointer[Dispatcher] - // computeUnit is the latest Compute Unit reported by a scheduler. It may be nil, if we haven't - // been able to contact one yet. - // - // This field MUST NOT be updated without holding BOTH lock and requestLock. - computeUnit *api.Resources - - // lastApproved is the last resource allocation that a scheduler has approved. It may be nil, if - // we haven't been able to contact one yet. - lastApproved *api.Resources - - // lastSchedulerError provides the error that occurred - if any - during the most recent request - // to the current scheduler. This field is not nil only when scheduler is not nil. - lastSchedulerError error // backgroundWorkerCount tracks the current number of background workers. It is exclusively // updated by r.spawnBackgroundWorker @@ -196,16 +157,10 @@ type Scheduler struct { // RunnerState is the serializable state of the Runner, extracted by its State method type RunnerState struct { - PodIP string `json:"podIP"` - VM api.VmInfo `json:"vm"` - LastMetrics *api.Metrics `json:"lastMetrics"` - Scheduler *SchedulerState `json:"scheduler"` - ComputeUnit *api.Resources `json:"computeUnit"` - LastApproved *api.Resources `json:"lastApproved"` - LastSchedulerError error `json:"lastSchedulerError"` - BackgroundWorkerCount int64 `json:"backgroundWorkerCount"` - - SchedulerRespondedWithMigration bool `json:"migrationStarted"` + PodIP string `json:"podIP"` + ExecutorState executor.StateDump `json:"executorState"` + Scheduler *SchedulerState `json:"scheduler"` + BackgroundWorkerCount int64 `json:"backgroundWorkerCount"` } // SchedulerState is the state of a Scheduler, constructed as part of a Runner's State Method @@ -230,17 +185,17 @@ func (r *Runner) State(ctx context.Context) (*RunnerState, error) { } } + var executorState *executor.StateDump + if r.executorStateDump != nil /* may be nil if r.Run() hasn't fully started yet */ { + s := r.executorStateDump() + executorState = &s + } + return &RunnerState{ - LastMetrics: r.lastMetrics, - Scheduler: scheduler, - ComputeUnit: r.computeUnit, - LastApproved: r.lastApproved, - LastSchedulerError: r.lastSchedulerError, - VM: r.vm, PodIP: r.podIP, + ExecutorState: *executorState, + Scheduler: scheduler, BackgroundWorkerCount: r.backgroundWorkerCount.Load(), - - SchedulerRespondedWithMigration: r.schedulerRespondedWithMigration, }, nil } @@ -253,7 +208,7 @@ func (r *Runner) Spawn(ctx context.Context, logger *zap.Logger, vmInfoUpdated ut r.status.update(r.global, func(stat podStatus) podStatus { stat.endState = &podStatusEndState{ ExitKind: podStatusExitPanicked, - Error: fmt.Errorf("Runner %v panicked: %v", r.vm.NamespacedName(), err), + Error: fmt.Errorf("Runner %v panicked: %v", stat.vmInfo.NamespacedName(), err), Time: now, } return stat @@ -308,10 +263,16 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util schedulerWatch.Using(*scheduler) } + getVmInfo := func() api.VmInfo { + r.status.mu.Lock() + defer r.status.mu.Unlock() + return r.status.vmInfo + } + execLogger := logger.Named("exec") coreExecLogger := execLogger.Named("core") - executorCore := executor.NewExecutorCore(coreExecLogger.Named("state"), r.vm, executor.Config{ + executorCore := executor.NewExecutorCore(coreExecLogger, getVmInfo(), executor.Config{ DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), PluginDeniedRetryWait: time.Second * time.Duration(r.global.config.Scheduler.RetryDeniedUpscaleSeconds), @@ -322,6 +283,8 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util }, }) + r.executorStateDump = executorCore.StateDump + pluginIface := makePluginInterface(r, executorCore) neonvmIface := makeNeonVMInterface(r) monitorIface := makeMonitorInterface(r, executorCore) @@ -335,15 +298,26 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util logger.Info("Starting background workers") - // FIXME: make these timeouts/delays separately defined constants, or configurable + // FIXME: make this timeout/delay a separately defined constant, or configurable mainDeadlockChecker := r.lock.DeadlockChecker(250*time.Millisecond, time.Second) - reqDeadlockChecker := r.requestLock.DeadlockChecker(5*time.Second, time.Second) + r.spawnBackgroundWorker(ctx, logger, "deadlock checker", ignoreLogger(mainDeadlockChecker)) r.spawnBackgroundWorker(ctx, logger, "podStatus updater", func(c context.Context, l *zap.Logger) { r.status.periodicallyRefreshState(c, l, r.global) }) - r.spawnBackgroundWorker(ctx, logger, "deadlock checker (main)", ignoreLogger(mainDeadlockChecker)) - r.spawnBackgroundWorker(ctx, logger, "deadlock checker (request lock)", ignoreLogger(reqDeadlockChecker)) + r.spawnBackgroundWorker(ctx, logger, "VmInfo updater", func(c context.Context, l *zap.Logger) { + for { + select { + case <-ctx.Done(): + return + case <-vmInfoUpdated.Recv(): + vm := getVmInfo() + ecwc.Updater().UpdatedVM(vm, func() { + l.Info("VmInfo updated", zap.Any("vmInfo", vm)) + }) + } + } + }) r.spawnBackgroundWorker(ctx, logger, "track scheduler", func(c context.Context, l *zap.Logger) { r.trackSchedulerLoop(c, l, scheduler, schedulerWatch, func(withLock func()) { ecwc.Updater().NewScheduler(withLock) @@ -654,10 +628,8 @@ startScheduler: defer r.lock.Unlock() newScheduler(func() { - logger.Info(fmt.Sprintf("%s scheduler pod", verb)) - r.scheduler.Store(sched) - r.lastSchedulerError = nil + logger.Info(fmt.Sprintf("%s scheduler pod", verb)) }) return recvFatal @@ -814,8 +786,8 @@ func (r *Runner) doNeonVMRequest(ctx context.Context, target api.Resources) erro // FIXME: We should check the returned VM object here, in case the values are different. // // Also relevant: - _, err = r.global.vmClient.NeonvmV1().VirtualMachines(r.vm.Namespace). - Patch(requestCtx, r.vm.Name, ktypes.JSONPatchType, patchPayload, metav1.PatchOptions{}) + _, err = r.global.vmClient.NeonvmV1().VirtualMachines(r.vmName.Namespace). + Patch(requestCtx, r.vmName.Name, ktypes.JSONPatchType, patchPayload, metav1.PatchOptions{}) if err != nil { r.global.metrics.neonvmRequestsOutbound.WithLabelValues(fmt.Sprintf("[error: %s]", util.RootError(err))).Inc() @@ -849,7 +821,7 @@ func (r *Runner) recordResourceChange(current, target api.Resources, metrics res direction := getDirection(target.Mem > current.Mem) // Avoid floating-point inaccuracy. - byteTotal := r.vm.Mem.SlotSize.Value() * int64(abs.Mem) + byteTotal := r.memSlotSize.Value() * int64(abs.Mem) mib := int64(1 << 20) floatMB := float64(byteTotal/mib) + float64(byteTotal%mib)/float64(mib) @@ -864,7 +836,7 @@ func doMonitorDownscale( target api.Resources, ) (*api.DownscaleResult, error) { r := dispatcher.runner - rawResources := target.ConvertToAllocation(r.vm.Mem.SlotSize) + rawResources := target.ConvertToAllocation(r.memSlotSize) timeout := time.Second * time.Duration(r.global.config.Monitor.ResponseTimeoutSeconds) @@ -885,7 +857,7 @@ func doMonitorUpscale( target api.Resources, ) error { r := dispatcher.runner - rawResources := target.ConvertToAllocation(r.vm.Mem.SlotSize) + rawResources := target.ConvertToAllocation(r.memSlotSize) timeout := time.Second * time.Duration(r.global.config.Monitor.ResponseTimeoutSeconds) @@ -918,7 +890,7 @@ func (s *Scheduler) DoRequest( reqBody, err := json.Marshal(reqData) if err != nil { - return nil, s.handlePreRequestError(fmt.Errorf("Error encoding request JSON: %w", err)) + return nil, fmt.Errorf("Error encoding request JSON: %w", err) } timeout := time.Second * time.Duration(s.runner.global.config.Scaling.RequestTimeoutSeconds) @@ -929,7 +901,7 @@ func (s *Scheduler) DoRequest( request, err := http.NewRequestWithContext(reqCtx, http.MethodPost, url, bytes.NewReader(reqBody)) if err != nil { - return nil, s.handlePreRequestError(fmt.Errorf("Error building request to %q: %w", url, err)) + return nil, fmt.Errorf("Error building request to %q: %w", url, err) } request.Header.Set("content-type", "application/json") @@ -939,7 +911,7 @@ func (s *Scheduler) DoRequest( if err != nil { description := fmt.Sprintf("[error doing request: %s]", util.RootError(err)) s.runner.global.metrics.schedulerRequests.WithLabelValues(description).Inc() - return nil, s.handleRequestError(reqData, fmt.Errorf("Error doing request: %w", err)) + return nil, fmt.Errorf("Error doing request: %w", err) } defer response.Body.Close() @@ -947,113 +919,22 @@ func (s *Scheduler) DoRequest( respBody, err := io.ReadAll(response.Body) if err != nil { - var handle func(*api.AgentRequest, error) error - if response.StatusCode == 200 { - handle = s.handleRequestError - } else { - // if status != 200, fatal for the same reasons as the != 200 check lower down - handle = s.handleFatalError - } - - return nil, handle(reqData, fmt.Errorf("Error reading body for response: %w", err)) + return nil, fmt.Errorf("Error reading body for response: %w", err) } if response.StatusCode != 200 { // Fatal because 4XX implies our state doesn't match theirs, 5XX means we can't assume // current contents of the state, and anything other than 200, 4XX, or 5XX shouldn't happen - return nil, s.handleFatalError( - reqData, - fmt.Errorf("Received response status %d body %q", response.StatusCode, string(respBody)), - ) + return nil, fmt.Errorf("Received response status %d body %q", response.StatusCode, string(respBody)) } var respData api.PluginResponse if err := json.Unmarshal(respBody, &respData); err != nil { // Fatal because invalid JSON might also be semantically invalid - return nil, s.handleRequestError(reqData, fmt.Errorf("Bad JSON response: %w", err)) + return nil, fmt.Errorf("Bad JSON response: %w", err) } logger.Info("Received response from scheduler", zap.Any("response", respData)) return &respData, nil } - -// handlePreRequestError appropriately handles updating the Scheduler and its Runner's state to -// reflect that an error occurred. It returns the error passed to it -// -// This method will update s.runner.lastSchedulerError if s.runner.scheduler == s. -// -// This method MUST be called while holding s.runner.requestLock AND NOT s.runner.lock. -func (s *Scheduler) handlePreRequestError(err error) error { - if err == nil { - panic(errors.New("handlePreRequestError called with nil error")) - } - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - if s.runner.scheduler.Load() == s { - s.runner.lastSchedulerError = err - } - - return err -} - -// handleRequestError appropriately handles updating the Scheduler and its Runner's state to reflect -// that an error occurred while making a request. It returns the error passed to it -// -// This method will update s.runner.{lastApproved,lastSchedulerError} if s.runner.scheduler == s. -// -// This method MUST be called while holding s.runner.requestLock AND NOT s.runner.lock. -func (s *Scheduler) handleRequestError(req *api.AgentRequest, err error) error { - if err == nil { - panic(errors.New("handleRequestError called with nil error")) - } - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - if s.runner.scheduler.Load() == s { - s.runner.lastSchedulerError = err - - // Because downscaling s.runner.vm must be done before any request that decreases its - // resources, any request greater than the current usage must be an increase, which the - // scheduler may or may not have approved. So: If we decreased and the scheduler failed, we - // can't assume it didn't register the decrease. If we want to increase and the scheduler - // failed, we can't assume it *did* register the increase. In both cases, the registered - // state for a well-behaved scheduler will be >= our state. - // - // note: this is also replicated below, in handleFatalError. - lastApproved := s.runner.vm.Using() - s.runner.lastApproved = &lastApproved - } - - return err -} - -// handleError appropriately handles updating the Scheduler and its Runner's state to reflect that -// a fatal error occurred. It returns the error passed to it -// -// This method will update s.runner.{lastApproved,lastSchedulerError} if s.runner.scheduler == s, in -// addition to s.fatalError. -// -// This method MUST be called while holding s.runner.requestLock AND NOT s.runner.lock. -func (s *Scheduler) handleFatalError(req *api.AgentRequest, err error) error { - if err == nil { - panic(errors.New("handleFatalError called with nil error")) - } - - s.runner.lock.Lock() - defer s.runner.lock.Unlock() - - s.fatalError = err - - if s.runner.scheduler.Load() == s { - s.runner.lastSchedulerError = err - // for reasoning on lastApproved, see handleRequestError. - lastApproved := s.runner.vm.Using() - s.runner.lastApproved = &lastApproved - } - - return err -} From 5e1a38d87edb8764b30ee2d958ea07362609dbaf Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 15:22:22 -0700 Subject: [PATCH 21/59] agent: clean up unused fields, part 2 (scheduler info) --- pkg/agent/runner.go | 88 +++++++++------------------------------------ 1 file changed, 16 insertions(+), 72 deletions(-) diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index b04edffa1..fcdb98103 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -125,34 +125,6 @@ type Scheduler struct { // info holds the immutable information we use to connect to and describe the scheduler info schedwatch.SchedulerInfo - - // registered is true only once a call to this Scheduler's Register() method has been made - // - // All methods that make a request to the scheduler will first call Register() if registered is - // false. - // - // This field MUST NOT be updated without holding BOTH runner.requestLock AND runner.lock - // - // This field MAY be read while holding EITHER runner.requestLock OR runner.lock. - registered bool - - // fatalError is non-nil if an error occurred while communicating with the scheduler that we - // cannot recover from. - // - // Examples of fatal errors: - // - // * HTTP response status 4XX or 5XX - we don't know the plugin's state - // * Semantically invalid response - either a logic error occurred, or the plugin's state - // doesn't match ours - // - // This field MUST NOT be updated without holding BOTH runner.requestLock AND runner.lock. - // - // This field MAY be read while holding EITHER runner.requestLock OR runner.lock. - fatalError error - - // fatal is used for signalling that fatalError has been set (and so we should look for a new - // scheduler) - fatal util.SignalSender[struct{}] } // RunnerState is the serializable state of the Runner, extracted by its State method @@ -165,9 +137,7 @@ type RunnerState struct { // SchedulerState is the state of a Scheduler, constructed as part of a Runner's State Method type SchedulerState struct { - Info schedwatch.SchedulerInfo `json:"info"` - Registered bool `json:"registered"` - FatalError error `json:"fatalError"` + Info schedwatch.SchedulerInfo `json:"info"` } func (r *Runner) State(ctx context.Context) (*RunnerState, error) { @@ -179,9 +149,7 @@ func (r *Runner) State(ctx context.Context) (*RunnerState, error) { var scheduler *SchedulerState if sched := r.scheduler.Load(); sched != nil { scheduler = &SchedulerState{ - Info: sched.info, - Registered: sched.registered, - FatalError: sched.fatalError, + Info: sched.info, } } @@ -588,8 +556,6 @@ func (r *Runner) trackSchedulerLoop( minWait time.Duration = 5 * time.Second // minimum time we have to wait between scheduler starts okForNew <-chan time.Time // channel that sends when we've waited long enough for a new scheduler currentInfo schedwatch.SchedulerInfo - fatal util.SignalReceiver[struct{}] - failed bool ) if init == nil { @@ -603,50 +569,35 @@ startScheduler: lastStart = time.Now() okForNew = time.After(minWait) - failed = false // Set the current scheduler - fatal = func() util.SignalReceiver[struct{}] { - logger := logger.With(zap.Object("scheduler", currentInfo)) - + { verb := "Setting" if init == nil || init.UID != currentInfo.UID { verb = "Updating" } - sendFatal, recvFatal := util.NewSingleSignalPair[struct{}]() - sched := &Scheduler{ - runner: r, - info: currentInfo, - registered: false, - fatalError: nil, - fatal: sendFatal, + runner: r, + info: currentInfo, } - r.lock.Lock() - defer r.lock.Unlock() - - newScheduler(func() { - r.scheduler.Store(sched) - logger.Info(fmt.Sprintf("%s scheduler pod", verb)) - }) + func() { + r.lock.Lock() + defer r.lock.Unlock() - return recvFatal - }() + newScheduler(func() { + r.scheduler.Store(sched) + logger.Info(fmt.Sprintf("%s scheduler pod", verb), zap.Object("scheduler", currentInfo)) + }) + }() + } - // Start watching for the current scheduler to be deleted or have fatally errored + // Start watching for the current scheduler to be deleted for { select { case <-ctx.Done(): return - case <-fatal.Recv(): - logger.Info( - "Waiting for new scheduler because current fatally errored", - zap.Object("scheduler", currentInfo), - ) - failed = true - goto waitForNewScheduler case info := <-schedulerWatch.Deleted: matched := func() bool { r.lock.Lock() @@ -686,16 +637,9 @@ waitForNewScheduler: select { case <-okForNew: default: - var endingMode string - if failed { - endingMode = "failed" - } else { - endingMode = "ended" - } - // Not ready yet; let's log something about it: logger.Info( - fmt.Sprintf("Scheduler %s quickly. Respecting minimum delay before switching to a new one", endingMode), + "Scheduler ended quickly. Respecting minimum delay before switching to a new one", zap.Duration("activeTime", time.Since(lastStart)), zap.Duration("delay", minWait), ) select { From c4de84cb8057e4f6cc85ca05ee8d884caf71fd7a Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 16:23:45 -0700 Subject: [PATCH 22/59] move fakeClock new testhelpers package --- pkg/agent/core/state_test.go | 123 ++++++++++++---------------- pkg/agent/core/testhelpers/clock.go | 41 ++++++++++ 2 files changed, 92 insertions(+), 72 deletions(-) create mode 100644 pkg/agent/core/testhelpers/clock.go diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index c397d6855..e0c4e06b4 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -13,6 +13,7 @@ import ( vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" "github.com/neondatabase/autoscaling/pkg/agent/core" + helpers "github.com/neondatabase/autoscaling/pkg/agent/core/testhelpers" "github.com/neondatabase/autoscaling/pkg/api" ) @@ -234,39 +235,17 @@ func createInitialState(opts ...initialStateOpt) *core.State { return core.NewState(vm, config) } -type fakeClock struct { - base time.Time - now time.Time -} - -func newFakeClock() *fakeClock { - base, err := time.Parse(time.RFC3339, "2000-01-01T00:00:00Z") // a nice round number, to make things easier - if err != nil { - panic(err) - } - - return &fakeClock{base: base, now: base} -} - -func (c *fakeClock) inc(duration time.Duration) { - c.now = c.now.Add(duration) -} - -func (c *fakeClock) elapsed() time.Duration { - return c.now.Sub(c.base) -} - func Test_NextActions(t *testing.T) { - simulateInitialSchedulerRequest := func(t *testing.T, state *core.State, clock *fakeClock, reqTime time.Duration) { + simulateInitialSchedulerRequest := func(t *testing.T, state *core.State, clock *helpers.FakeClock, reqTime time.Duration) { state.Plugin().NewScheduler() - actions := state.NextActions(clock.now) + actions := state.NextActions(clock.Now()) require.NotNil(t, actions.PluginRequest) action := actions.PluginRequest require.Nil(t, action.LastPermit) - state.Plugin().StartingRequest(clock.now, action.Target) - clock.inc(reqTime) - require.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + state.Plugin().StartingRequest(clock.Now(), action.Target) + clock.Inc(reqTime) + require.NoError(t, state.Plugin().RequestSuccessful(clock.Now(), api.PluginResponse{ Permit: action.Target, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, // TODO: make this configurable... somehow. @@ -276,7 +255,7 @@ func Test_NextActions(t *testing.T) { // Thorough checks of a relatively simple flow t.Run("BasicScaleupAndDownFlow", func(t *testing.T) { warnings := []string{} - clock := newFakeClock() + clock := helpers.NewFakeClock() state := createInitialState( withStoredWarnings(&warnings), ) @@ -290,22 +269,22 @@ func Test_NextActions(t *testing.T) { require.Equal(t, warnings, []string{"Can't determine desired resources because compute unit hasn't been set yet"}) warnings = nil // reset - clock.inc(hundredMillis) + clock.Inc(hundredMillis) metrics := api.Metrics{ LoadAverage1Min: 0.3, LoadAverage5Min: 0.0, // unused MemoryUsageBytes: 0.0, } state.UpdateMetrics(metrics) - require.Equal(t, clock.elapsed(), 2*hundredMillis) + require.Equal(t, clock.Elapsed(), 2*hundredMillis) // double-check that we agree about the desired resources - desiredResources, _ := state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.now) + desiredResources, _ := state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.Now()) require.Equal(t, api.Resources{VCPU: 500, Mem: 2}, desiredResources) require.Empty(t, warnings) // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. - actions := state.NextActions(clock.now) + actions := state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ Wait: nil, PluginRequest: &core.ActionPluginRequest{ @@ -320,8 +299,8 @@ func Test_NextActions(t *testing.T) { }, actions) require.Empty(t, warnings) // start the request: - state.Plugin().StartingRequest(clock.now, actions.PluginRequest.Target) - clock.inc(hundredMillis) + state.Plugin().StartingRequest(clock.Now(), actions.PluginRequest.Target) + clock.Inc(hundredMillis) // should have nothing more to do; waiting on plugin request to come back require.Equal(t, core.ActionSet{ Wait: nil, @@ -329,18 +308,18 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.now)) + }, state.NextActions(clock.Now())) require.Empty(t, warnings) - require.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + require.NoError(t, state.Plugin().RequestSuccessful(clock.Now(), api.PluginResponse{ Permit: api.Resources{VCPU: 500, Mem: 2}, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, })) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 3*hundredMillis) + require.Equal(t, clock.Elapsed(), 3*hundredMillis) // Scheduler approval is done, now we should be making the request to NeonVM - actions = state.NextActions(clock.now) + actions = state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ // expected to make a scheduler request every 5s; it's been 100ms since the last one, so // if the NeonVM request didn't come back in time, we'd need to get woken up to start @@ -358,8 +337,8 @@ func Test_NextActions(t *testing.T) { }, actions) require.Empty(t, warnings) // start the request: - state.NeonVM().StartingRequest(clock.now, actions.NeonVMRequest.Target) - clock.inc(hundredMillis) + state.NeonVM().StartingRequest(clock.Now(), actions.NeonVMRequest.Target) + clock.Inc(hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ @@ -369,14 +348,14 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.now)) + }, state.NextActions(clock.Now())) require.Empty(t, warnings) - state.NeonVM().RequestSuccessful(clock.now) + state.NeonVM().RequestSuccessful(clock.Now()) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 4*hundredMillis) + require.Equal(t, clock.Elapsed(), 4*hundredMillis) // NeonVM change is done, now we should finish by notifying the vm-monitor - actions = state.NextActions(clock.now) + actions = state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 2*hundredMillis, // same as previous, clock hasn't changed @@ -391,8 +370,8 @@ func Test_NextActions(t *testing.T) { }, actions) require.Empty(t, warnings) // start the request: - state.Monitor().StartingUpscaleRequest(clock.now, actions.MonitorUpscale.Target) - clock.inc(hundredMillis) + state.Monitor().StartingUpscaleRequest(clock.Now(), actions.MonitorUpscale.Target) + clock.Inc(hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ @@ -402,15 +381,15 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.now)) + }, state.NextActions(clock.Now())) require.Empty(t, warnings) - state.Monitor().UpscaleRequestSuccessful(clock.now) + state.Monitor().UpscaleRequestSuccessful(clock.Now()) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 5*hundredMillis) + require.Equal(t, clock.Elapsed(), 5*hundredMillis) // And now, double-check that there's no sneaky follow-up actions before we change the // metrics - actions = state.NextActions(clock.now) + actions = state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 3*hundredMillis, // same as previous, clock hasn't changed @@ -424,8 +403,8 @@ func Test_NextActions(t *testing.T) { // ---- Scaledown !!! ---- - clock.inc(hundredMillis) - require.Equal(t, clock.elapsed(), 6*hundredMillis) + clock.Inc(hundredMillis) + require.Equal(t, clock.Elapsed(), 6*hundredMillis) // Set metrics back so that desired resources should now be zero metrics = api.Metrics{ @@ -435,12 +414,12 @@ func Test_NextActions(t *testing.T) { } state.UpdateMetrics(metrics) // double-check that we agree about the new desired resources - desiredResources, _ = state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.now) + desiredResources, _ = state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.Now()) require.Equal(t, api.Resources{VCPU: 250, Mem: 1}, desiredResources) require.Empty(t, warnings) // First step in downscaling is getting approval from the vm-monitor: - actions = state.NextActions(clock.now) + actions = state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 4*hundredMillis, @@ -454,8 +433,8 @@ func Test_NextActions(t *testing.T) { MonitorUpscale: nil, }, actions) require.Empty(t, warnings) - state.Monitor().StartingDownscaleRequest(clock.now, actions.MonitorDownscale.Target) - clock.inc(hundredMillis) + state.Monitor().StartingDownscaleRequest(clock.Now(), actions.MonitorDownscale.Target) + clock.Inc(hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ @@ -465,14 +444,14 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.now)) + }, state.NextActions(clock.Now())) require.Empty(t, warnings) - state.Monitor().DownscaleRequestAllowed(clock.now) + state.Monitor().DownscaleRequestAllowed(clock.Now()) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 7*hundredMillis) + require.Equal(t, clock.Elapsed(), 7*hundredMillis) // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out - actions = state.NextActions(clock.now) + actions = state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 5*hundredMillis, // same as previous, clock hasn't changed @@ -486,8 +465,8 @@ func Test_NextActions(t *testing.T) { MonitorUpscale: nil, }, actions) require.Empty(t, warnings) - state.NeonVM().StartingRequest(clock.now, actions.NeonVMRequest.Target) - clock.inc(hundredMillis) + state.NeonVM().StartingRequest(clock.Now(), actions.NeonVMRequest.Target) + clock.Inc(hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ @@ -497,14 +476,14 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.now)) + }, state.NextActions(clock.Now())) require.Empty(t, warnings) - state.NeonVM().RequestSuccessful(clock.now) + state.NeonVM().RequestSuccessful(clock.Now()) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 8*hundredMillis) + require.Equal(t, clock.Elapsed(), 8*hundredMillis) // Request to NeonVM completed, it's time to inform the scheduler plugin: - actions = state.NextActions(clock.now) + actions = state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ Wait: nil, PluginRequest: &core.ActionPluginRequest{ @@ -518,8 +497,8 @@ func Test_NextActions(t *testing.T) { MonitorUpscale: nil, }, actions) require.Empty(t, warnings) - state.Plugin().StartingRequest(clock.now, actions.PluginRequest.Target) - clock.inc(hundredMillis) + state.Plugin().StartingRequest(clock.Now(), actions.PluginRequest.Target) + clock.Inc(hundredMillis) // should have nothing more to do; waiting on plugin request to come back require.Equal(t, core.ActionSet{ Wait: nil, // and don't need to wait, because plugin req is ongoing @@ -527,18 +506,18 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.now)) + }, state.NextActions(clock.Now())) require.Empty(t, warnings) - require.NoError(t, state.Plugin().RequestSuccessful(clock.now, api.PluginResponse{ + require.NoError(t, state.Plugin().RequestSuccessful(clock.Now(), api.PluginResponse{ Permit: api.Resources{VCPU: 250, Mem: 1}, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, })) require.Empty(t, warnings) - require.Equal(t, clock.elapsed(), 9*hundredMillis) + require.Equal(t, clock.Elapsed(), 9*hundredMillis) // Finally, check there's no leftover actions: - actions = state.NextActions(clock.now) + actions = state.NextActions(clock.Now()) require.Equal(t, core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - hundredMillis, // request that just finished was started 100ms ago diff --git a/pkg/agent/core/testhelpers/clock.go b/pkg/agent/core/testhelpers/clock.go new file mode 100644 index 000000000..2e66cb126 --- /dev/null +++ b/pkg/agent/core/testhelpers/clock.go @@ -0,0 +1,41 @@ +package testhelpers + +import ( + "fmt" + "time" +) + +// FakeClock is a small facility that makes it easy to operation on duration since start with +// relative times. +type FakeClock struct { + base time.Time + now time.Time +} + +// NewFakeClock creates a new fake clock, with the initial time set to an unspecified, round number. +func NewFakeClock() *FakeClock { + base, err := time.Parse(time.RFC3339, "2000-01-01T00:00:00Z") // a nice round number, to make things easier + if err != nil { + panic(err) + } + + return &FakeClock{base: base, now: base} +} + +// Now returns the current time of the clock +func (c *FakeClock) Now() time.Time { + return c.now +} + +// Inc adds duration to the current time of the clock +func (c *FakeClock) Inc(duration time.Duration) { + if duration < 0 { + panic(fmt.Errorf("(*FakeClock).Inc() called with negative duration %s", duration)) + } + c.now = c.now.Add(duration) +} + +// Elapsed returns the total time added (via Inc) since the clock was started +func (c *FakeClock) Elapsed() time.Duration { + return c.now.Sub(c.base) +} From b6cd8c121c9141ea88280f6c335f6a2891f6f19d Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 16:33:57 -0700 Subject: [PATCH 23/59] move createInitialState to testhelpers --- pkg/agent/core/state_test.go | 78 +------------------ pkg/agent/core/testhelpers/construct.go | 99 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 76 deletions(-) create mode 100644 pkg/agent/core/testhelpers/construct.go diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index e0c4e06b4..9fa95f3d3 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -10,8 +10,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" - vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" - "github.com/neondatabase/autoscaling/pkg/agent/core" helpers "github.com/neondatabase/autoscaling/pkg/agent/core/testhelpers" "github.com/neondatabase/autoscaling/pkg/api" @@ -163,78 +161,6 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { } } -type initialStateParams struct { - computeUnit api.Resources - minCU uint16 - maxCU uint16 -} - -type initialStateOpt struct { - preCreate func(*initialStateParams) - postCreate func(*api.VmInfo, *core.Config) -} - -func withStoredWarnings(warnings *[]string) (o initialStateOpt) { - o.postCreate = func(_ *api.VmInfo, config *core.Config) { - config.Warn = func(format string, args ...any) { - *warnings = append(*warnings, fmt.Sprintf(format, args...)) - } - } - return -} - -func createInitialState(opts ...initialStateOpt) *core.State { - pre := initialStateParams{ - computeUnit: api.Resources{VCPU: 250, Mem: 1}, - minCU: 1, - maxCU: 4, - } - for _, o := range opts { - if o.preCreate != nil { - o.preCreate(&pre) - } - } - - vm := api.VmInfo{ - Name: "test", - Namespace: "test", - Cpu: api.VmCpuInfo{ - Min: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, - Use: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, - Max: vmapi.MilliCPU(pre.maxCU) * pre.computeUnit.VCPU, - }, - Mem: api.VmMemInfo{ - SlotSize: resource.NewQuantity(1<<30 /* 1 Gi */, resource.BinarySI), - Min: pre.minCU * pre.computeUnit.Mem, - Use: pre.minCU * pre.computeUnit.Mem, - Max: pre.maxCU * pre.computeUnit.Mem, - }, - ScalingConfig: nil, - AlwaysMigrate: false, - ScalingEnabled: true, - } - - config := core.Config{ - DefaultScalingConfig: api.ScalingConfig{ - LoadAverageFractionTarget: 0.5, - MemoryUsageFractionTarget: 0.5, - }, - PluginRequestTick: 5 * time.Second, - PluginDeniedRetryWait: 2 * time.Second, - MonitorDeniedDownscaleCooldown: 5 * time.Second, - MonitorRetryWait: 3 * time.Second, - Warn: func(string, ...any) {}, - } - - for _, o := range opts { - if o.postCreate != nil { - o.postCreate(&vm, &config) - } - } - - return core.NewState(vm, config) -} - func Test_NextActions(t *testing.T) { simulateInitialSchedulerRequest := func(t *testing.T, state *core.State, clock *helpers.FakeClock, reqTime time.Duration) { state.Plugin().NewScheduler() @@ -256,8 +182,8 @@ func Test_NextActions(t *testing.T) { t.Run("BasicScaleupAndDownFlow", func(t *testing.T) { warnings := []string{} clock := helpers.NewFakeClock() - state := createInitialState( - withStoredWarnings(&warnings), + state := helpers.CreateInitialState( + helpers.WithStoredWarnings(&warnings), ) hundredMillis := 100 * time.Millisecond diff --git a/pkg/agent/core/testhelpers/construct.go b/pkg/agent/core/testhelpers/construct.go new file mode 100644 index 000000000..cdf3566b4 --- /dev/null +++ b/pkg/agent/core/testhelpers/construct.go @@ -0,0 +1,99 @@ +package testhelpers + +import ( + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/resource" + + vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" + + "github.com/neondatabase/autoscaling/pkg/agent/core" + "github.com/neondatabase/autoscaling/pkg/api" +) + +var DefaultConfig = core.Config{ + DefaultScalingConfig: api.ScalingConfig{ + LoadAverageFractionTarget: 0.5, + MemoryUsageFractionTarget: 0.5, + }, + PluginRequestTick: 5 * time.Second, + PluginDeniedRetryWait: 2 * time.Second, + MonitorDeniedDownscaleCooldown: 5 * time.Second, + MonitorRetryWait: 3 * time.Second, + Warn: func(string, ...any) {}, +} + +type InitialStateOpt struct { + preCreate func(*initialStateParams) + postCreate func(*api.VmInfo, *core.Config) +} + +type initialStateParams struct { + computeUnit api.Resources + minCU uint16 + maxCU uint16 +} + +func CreateInitialState(opts ...InitialStateOpt) *core.State { + pre := initialStateParams{ + computeUnit: api.Resources{VCPU: 250, Mem: 1}, + minCU: 1, + maxCU: 4, + } + for _, o := range opts { + if o.preCreate != nil { + o.preCreate(&pre) + } + } + + vm := api.VmInfo{ + Name: "test", + Namespace: "test", + Cpu: api.VmCpuInfo{ + Min: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, + Use: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, + Max: vmapi.MilliCPU(pre.maxCU) * pre.computeUnit.VCPU, + }, + Mem: api.VmMemInfo{ + SlotSize: resource.NewQuantity(1<<30 /* 1 Gi */, resource.BinarySI), + Min: pre.minCU * pre.computeUnit.Mem, + Use: pre.minCU * pre.computeUnit.Mem, + Max: pre.maxCU * pre.computeUnit.Mem, + }, + ScalingConfig: nil, + AlwaysMigrate: false, + ScalingEnabled: true, + } + + config := core.Config{ + DefaultScalingConfig: api.ScalingConfig{ + LoadAverageFractionTarget: 0.5, + MemoryUsageFractionTarget: 0.5, + }, + PluginRequestTick: 5 * time.Second, + PluginDeniedRetryWait: 2 * time.Second, + MonitorDeniedDownscaleCooldown: 5 * time.Second, + MonitorRetryWait: 3 * time.Second, + Warn: func(string, ...any) {}, + } + + for _, o := range opts { + if o.postCreate != nil { + o.postCreate(&vm, &config) + } + } + + return core.NewState(vm, config) +} + +func WithStoredWarnings(warnings *[]string) InitialStateOpt { + return InitialStateOpt{ + preCreate: nil, + postCreate: func(_ *api.VmInfo, config *core.Config) { + config.Warn = func(format string, args ...any) { + *warnings = append(*warnings, fmt.Sprintf(format, args...)) + } + }, + } +} From aa5af482243e1b7427a997c630fed1a71dd5a83a Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 17:05:12 -0700 Subject: [PATCH 24/59] touchup test config --- pkg/agent/core/state_test.go | 20 +++++++ pkg/agent/core/testhelpers/construct.go | 73 ++++++++----------------- 2 files changed, 43 insertions(+), 50 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 9fa95f3d3..aa7a001de 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -161,6 +161,25 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { } } +var DefaultInitialStateConfig = helpers.InitialStateConfig{ + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + MemorySlotSize: resource.MustParse("1Gi"), + + MinCU: 1, + MaxCU: 4, + Core: core.Config{ + DefaultScalingConfig: api.ScalingConfig{ + LoadAverageFractionTarget: 0.5, + MemoryUsageFractionTarget: 0.5, + }, + PluginRequestTick: 5 * time.Second, + PluginDeniedRetryWait: 2 * time.Second, + MonitorDeniedDownscaleCooldown: 5 * time.Second, + MonitorRetryWait: 3 * time.Second, + Warn: func(string, ...any) {}, + }, +} + func Test_NextActions(t *testing.T) { simulateInitialSchedulerRequest := func(t *testing.T, state *core.State, clock *helpers.FakeClock, reqTime time.Duration) { state.Plugin().NewScheduler() @@ -183,6 +202,7 @@ func Test_NextActions(t *testing.T) { warnings := []string{} clock := helpers.NewFakeClock() state := helpers.CreateInitialState( + DefaultInitialStateConfig, helpers.WithStoredWarnings(&warnings), ) diff --git a/pkg/agent/core/testhelpers/construct.go b/pkg/agent/core/testhelpers/construct.go index cdf3566b4..6fb852404 100644 --- a/pkg/agent/core/testhelpers/construct.go +++ b/pkg/agent/core/testhelpers/construct.go @@ -2,7 +2,6 @@ package testhelpers import ( "fmt" - "time" "k8s.io/apimachinery/pkg/api/resource" @@ -12,38 +11,25 @@ import ( "github.com/neondatabase/autoscaling/pkg/api" ) -var DefaultConfig = core.Config{ - DefaultScalingConfig: api.ScalingConfig{ - LoadAverageFractionTarget: 0.5, - MemoryUsageFractionTarget: 0.5, - }, - PluginRequestTick: 5 * time.Second, - PluginDeniedRetryWait: 2 * time.Second, - MonitorDeniedDownscaleCooldown: 5 * time.Second, - MonitorRetryWait: 3 * time.Second, - Warn: func(string, ...any) {}, -} +type InitialStateConfig struct { + ComputeUnit api.Resources + MemorySlotSize resource.Quantity -type InitialStateOpt struct { - preCreate func(*initialStateParams) - postCreate func(*api.VmInfo, *core.Config) + MinCU uint16 + MaxCU uint16 + + Core core.Config } -type initialStateParams struct { - computeUnit api.Resources - minCU uint16 - maxCU uint16 +type InitialStateOpt struct { + preCreate func(*InitialStateConfig) + postCreate func(*api.VmInfo) } -func CreateInitialState(opts ...InitialStateOpt) *core.State { - pre := initialStateParams{ - computeUnit: api.Resources{VCPU: 250, Mem: 1}, - minCU: 1, - maxCU: 4, - } +func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *core.State { for _, o := range opts { if o.preCreate != nil { - o.preCreate(&pre) + o.preCreate(&config) } } @@ -51,47 +37,34 @@ func CreateInitialState(opts ...InitialStateOpt) *core.State { Name: "test", Namespace: "test", Cpu: api.VmCpuInfo{ - Min: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, - Use: vmapi.MilliCPU(pre.minCU) * pre.computeUnit.VCPU, - Max: vmapi.MilliCPU(pre.maxCU) * pre.computeUnit.VCPU, + Min: vmapi.MilliCPU(config.MinCU) * config.ComputeUnit.VCPU, + Use: vmapi.MilliCPU(config.MinCU) * config.ComputeUnit.VCPU, + Max: vmapi.MilliCPU(config.MaxCU) * config.ComputeUnit.VCPU, }, Mem: api.VmMemInfo{ - SlotSize: resource.NewQuantity(1<<30 /* 1 Gi */, resource.BinarySI), - Min: pre.minCU * pre.computeUnit.Mem, - Use: pre.minCU * pre.computeUnit.Mem, - Max: pre.maxCU * pre.computeUnit.Mem, + SlotSize: &config.MemorySlotSize, + Min: config.MinCU * config.ComputeUnit.Mem, + Use: config.MinCU * config.ComputeUnit.Mem, + Max: config.MaxCU * config.ComputeUnit.Mem, }, ScalingConfig: nil, AlwaysMigrate: false, ScalingEnabled: true, } - config := core.Config{ - DefaultScalingConfig: api.ScalingConfig{ - LoadAverageFractionTarget: 0.5, - MemoryUsageFractionTarget: 0.5, - }, - PluginRequestTick: 5 * time.Second, - PluginDeniedRetryWait: 2 * time.Second, - MonitorDeniedDownscaleCooldown: 5 * time.Second, - MonitorRetryWait: 3 * time.Second, - Warn: func(string, ...any) {}, - } - for _, o := range opts { if o.postCreate != nil { - o.postCreate(&vm, &config) + o.postCreate(&vm) } } - return core.NewState(vm, config) + return core.NewState(vm, config.Core) } func WithStoredWarnings(warnings *[]string) InitialStateOpt { return InitialStateOpt{ - preCreate: nil, - postCreate: func(_ *api.VmInfo, config *core.Config) { - config.Warn = func(format string, args ...any) { + preCreate: func(c *InitialStateConfig) { + c.Core.Warn = func(format string, args ...any) { *warnings = append(*warnings, fmt.Sprintf(format, args...)) } }, From 63d134828ff9f3123b3941fa25081602dbef0b0c Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 17:09:02 -0700 Subject: [PATCH 25/59] fix missing field --- pkg/agent/core/testhelpers/construct.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/agent/core/testhelpers/construct.go b/pkg/agent/core/testhelpers/construct.go index 6fb852404..6eab8b58f 100644 --- a/pkg/agent/core/testhelpers/construct.go +++ b/pkg/agent/core/testhelpers/construct.go @@ -63,6 +63,7 @@ func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *cor func WithStoredWarnings(warnings *[]string) InitialStateOpt { return InitialStateOpt{ + postCreate: nil, preCreate: func(c *InitialStateConfig) { c.Core.Warn = func(format string, args ...any) { *warnings = append(*warnings, fmt.Sprintf(format, args...)) From c003477a72e94d7ff333654c3c441ef8b1efa26c Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 20:18:21 -0700 Subject: [PATCH 26/59] agent/core: make test assertions more streamlined --- pkg/agent/core/state_test.go | 204 +++++++++++---------------- pkg/agent/core/testhelpers/assert.go | 162 +++++++++++++++++++++ pkg/agent/core/testhelpers/clock.go | 26 +++- 3 files changed, 267 insertions(+), 125 deletions(-) create mode 100644 pkg/agent/core/testhelpers/assert.go diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index aa7a001de..a7050b693 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -5,7 +5,6 @@ import ( "testing" "time" - "github.com/stretchr/testify/require" "golang.org/x/exp/slices" "k8s.io/apimachinery/pkg/api/resource" @@ -181,29 +180,14 @@ var DefaultInitialStateConfig = helpers.InitialStateConfig{ } func Test_NextActions(t *testing.T) { - simulateInitialSchedulerRequest := func(t *testing.T, state *core.State, clock *helpers.FakeClock, reqTime time.Duration) { - state.Plugin().NewScheduler() - - actions := state.NextActions(clock.Now()) - require.NotNil(t, actions.PluginRequest) - action := actions.PluginRequest - require.Nil(t, action.LastPermit) - state.Plugin().StartingRequest(clock.Now(), action.Target) - clock.Inc(reqTime) - require.NoError(t, state.Plugin().RequestSuccessful(clock.Now(), api.PluginResponse{ - Permit: action.Target, - Migrate: nil, - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, // TODO: make this configurable... somehow. - })) - } - // Thorough checks of a relatively simple flow t.Run("BasicScaleupAndDownFlow", func(t *testing.T) { - warnings := []string{} - clock := helpers.NewFakeClock() + a := helpers.NewAssert(t) + + clock := helpers.NewFakeClock(t) state := helpers.CreateInitialState( DefaultInitialStateConfig, - helpers.WithStoredWarnings(&warnings), + helpers.WithStoredWarnings(a.StoredWarnings()), ) hundredMillis := 100 * time.Millisecond @@ -211,62 +195,74 @@ func Test_NextActions(t *testing.T) { state.Plugin().NewScheduler() state.Monitor().Active(true) - simulateInitialSchedulerRequest(t, state, clock, hundredMillis) - require.Equal(t, warnings, []string{"Can't determine desired resources because compute unit hasn't been set yet"}) - warnings = nil // reset + // Send initial scheduler request: + actions := a. + WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). + NextActions(state, clock.Now(), core.ActionSet{ + Wait: nil, + PluginRequest: &core.ActionPluginRequest{ + LastPermit: nil, + Target: api.Resources{VCPU: 250, Mem: 1}, + Metrics: nil, + }, + NeonVMRequest: nil, + MonitorDownscale: nil, + MonitorUpscale: nil, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) + clock.Inc(hundredMillis).AssertEquals(1 * hundredMillis) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: actions.PluginRequest.Target, + Migrate: nil, + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + }) clock.Inc(hundredMillis) - metrics := api.Metrics{ + lastMetrics := api.Metrics{ LoadAverage1Min: 0.3, LoadAverage5Min: 0.0, // unused MemoryUsageBytes: 0.0, } - state.UpdateMetrics(metrics) - require.Equal(t, clock.Elapsed(), 2*hundredMillis) + a.Do(state.UpdateMetrics, lastMetrics) + clock.Elapsed().AssertEquals(2 * hundredMillis) // double-check that we agree about the desired resources - desiredResources, _ := state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.Now()) - require.Equal(t, api.Resources{VCPU: 500, Mem: 2}, desiredResources) - require.Empty(t, warnings) + a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). + Equals(api.Resources{VCPU: 500, Mem: 2}, helpers.Nil[*time.Duration]()) // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. - actions := state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, - Metrics: &metrics, + Metrics: &lastMetrics, }, // shouldn't have anything to say to the other components NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, actions) - require.Empty(t, warnings) + }) // start the request: - state.Plugin().StartingRequest(clock.Now(), actions.PluginRequest.Target) + a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) clock.Inc(hundredMillis) // should have nothing more to do; waiting on plugin request to come back - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: nil, PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.Now())) - require.Empty(t, warnings) - require.NoError(t, state.Plugin().RequestSuccessful(clock.Now(), api.PluginResponse{ + }) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ Permit: api.Resources{VCPU: 500, Mem: 2}, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, - })) - require.Empty(t, warnings) - require.Equal(t, clock.Elapsed(), 3*hundredMillis) + }) + clock.Elapsed().AssertEquals(3 * hundredMillis) // Scheduler approval is done, now we should be making the request to NeonVM - actions = state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ // expected to make a scheduler request every 5s; it's been 100ms since the last one, so // if the NeonVM request didn't come back in time, we'd need to get woken up to start // the next scheduler request. @@ -280,13 +276,12 @@ func Test_NextActions(t *testing.T) { }, MonitorDownscale: nil, MonitorUpscale: nil, - }, actions) - require.Empty(t, warnings) + }) // start the request: - state.NeonVM().StartingRequest(clock.Now(), actions.NeonVMRequest.Target) - clock.Inc(hundredMillis) + a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) + clock.Inc(hundredMillis).AssertEquals(4 * hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 2*hundredMillis, }, @@ -294,15 +289,11 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.Now())) - require.Empty(t, warnings) - state.NeonVM().RequestSuccessful(clock.Now()) - require.Empty(t, warnings) - require.Equal(t, clock.Elapsed(), 4*hundredMillis) + }) + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // NeonVM change is done, now we should finish by notifying the vm-monitor - actions = state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 2*hundredMillis, // same as previous, clock hasn't changed }, @@ -313,13 +304,12 @@ func Test_NextActions(t *testing.T) { Current: api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, }, - }, actions) - require.Empty(t, warnings) + }) // start the request: - state.Monitor().StartingUpscaleRequest(clock.Now(), actions.MonitorUpscale.Target) - clock.Inc(hundredMillis) + a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), actions.MonitorUpscale.Target) + clock.Inc(hundredMillis).AssertEquals(5 * hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 3*hundredMillis, }, @@ -327,16 +317,12 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.Now())) - require.Empty(t, warnings) - state.Monitor().UpscaleRequestSuccessful(clock.Now()) - require.Empty(t, warnings) - require.Equal(t, clock.Elapsed(), 5*hundredMillis) + }) + a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) // And now, double-check that there's no sneaky follow-up actions before we change the // metrics - actions = state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 3*hundredMillis, // same as previous, clock hasn't changed }, @@ -344,29 +330,25 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, actions) - require.Empty(t, warnings) + }) // ---- Scaledown !!! ---- - clock.Inc(hundredMillis) - require.Equal(t, clock.Elapsed(), 6*hundredMillis) + clock.Inc(hundredMillis).AssertEquals(6 * hundredMillis) // Set metrics back so that desired resources should now be zero - metrics = api.Metrics{ + lastMetrics = api.Metrics{ LoadAverage1Min: 0.0, LoadAverage5Min: 0.0, // unused MemoryUsageBytes: 0.0, } - state.UpdateMetrics(metrics) + a.Do(state.UpdateMetrics, lastMetrics) // double-check that we agree about the new desired resources - desiredResources, _ = state.DesiredResourcesFromMetricsOrRequestedUpscaling(clock.Now()) - require.Equal(t, api.Resources{VCPU: 250, Mem: 1}, desiredResources) - require.Empty(t, warnings) + a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). + Equals(api.Resources{VCPU: 250, Mem: 1}, helpers.Nil[*time.Duration]()) // First step in downscaling is getting approval from the vm-monitor: - actions = state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 4*hundredMillis, }, @@ -377,12 +359,11 @@ func Test_NextActions(t *testing.T) { Target: api.Resources{VCPU: 250, Mem: 1}, }, MonitorUpscale: nil, - }, actions) - require.Empty(t, warnings) - state.Monitor().StartingDownscaleRequest(clock.Now(), actions.MonitorDownscale.Target) - clock.Inc(hundredMillis) + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) + clock.Inc(hundredMillis).AssertEquals(7 * hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 5*hundredMillis, }, @@ -390,15 +371,11 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.Now())) - require.Empty(t, warnings) - state.Monitor().DownscaleRequestAllowed(clock.Now()) - require.Empty(t, warnings) - require.Equal(t, clock.Elapsed(), 7*hundredMillis) + }) + a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out - actions = state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 5*hundredMillis, // same as previous, clock hasn't changed }, @@ -409,12 +386,11 @@ func Test_NextActions(t *testing.T) { }, MonitorDownscale: nil, MonitorUpscale: nil, - }, actions) - require.Empty(t, warnings) - state.NeonVM().StartingRequest(clock.Now(), actions.NeonVMRequest.Target) - clock.Inc(hundredMillis) + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) + clock.Inc(hundredMillis).AssertEquals(8 * hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back - require.Equal(t, core.ActionSet{ + a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 6*hundredMillis, }, @@ -422,49 +398,40 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.Now())) - require.Empty(t, warnings) - state.NeonVM().RequestSuccessful(clock.Now()) - require.Empty(t, warnings) - require.Equal(t, clock.Elapsed(), 8*hundredMillis) + }) + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // Request to NeonVM completed, it's time to inform the scheduler plugin: - actions = state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + actions = a.NextActions(state, clock.Now(), core.ActionSet{ Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 500, Mem: 2}, Target: api.Resources{VCPU: 250, Mem: 1}, - Metrics: &metrics, + Metrics: &lastMetrics, }, // shouldn't have anything to say to the other components NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, actions) - require.Empty(t, warnings) - state.Plugin().StartingRequest(clock.Now(), actions.PluginRequest.Target) - clock.Inc(hundredMillis) + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) + clock.Inc(hundredMillis).AssertEquals(9 * hundredMillis) // should have nothing more to do; waiting on plugin request to come back - require.Equal(t, core.ActionSet{ + a.NextActions(state, clock.Now(), core.ActionSet{ Wait: nil, // and don't need to wait, because plugin req is ongoing PluginRequest: nil, NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, state.NextActions(clock.Now())) - require.Empty(t, warnings) - require.NoError(t, state.Plugin().RequestSuccessful(clock.Now(), api.PluginResponse{ + }) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ Permit: api.Resources{VCPU: 250, Mem: 1}, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, - })) - require.Empty(t, warnings) - require.Equal(t, clock.Elapsed(), 9*hundredMillis) + }) // Finally, check there's no leftover actions: - actions = state.NextActions(clock.Now()) - require.Equal(t, core.ActionSet{ + a.NextActions(state, clock.Now(), core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - hundredMillis, // request that just finished was started 100ms ago }, @@ -472,7 +439,6 @@ func Test_NextActions(t *testing.T) { NeonVMRequest: nil, MonitorDownscale: nil, MonitorUpscale: nil, - }, actions) - require.Empty(t, warnings) + }) }) } diff --git a/pkg/agent/core/testhelpers/assert.go b/pkg/agent/core/testhelpers/assert.go new file mode 100644 index 000000000..0e08bb3a7 --- /dev/null +++ b/pkg/agent/core/testhelpers/assert.go @@ -0,0 +1,162 @@ +package testhelpers + +import ( + "errors" + "fmt" + "reflect" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/neondatabase/autoscaling/pkg/agent/core" +) + +type Assert struct { + t *testing.T + storedWarnings *[]string + + tinfo transactionInfo +} + +type transactionInfo struct { + expectedWarnings []string +} + +// NewAssert creates a new Assert object wrapping the provided *testing.T +func NewAssert(t *testing.T) Assert { + return Assert{ + t: t, + storedWarnings: &[]string{}, + tinfo: transactionInfo{ + expectedWarnings: nil, + }, + } +} + +// StoredWarnings returns a reference to the warnings that will be checked, intended to be used with +// the InitialStateOpt constructor WithStoredWarnings +func (a Assert) StoredWarnings() *[]string { + return a.storedWarnings +} + +// WithWarnings returns an Assert that expects the given warnings to be emitted on each operation +func (a Assert) WithWarnings(warnings ...string) Assert { + a.tinfo.expectedWarnings = warnings + return a +} + +// NextActions calls core.NextActions() and checks that the value matches expected, returning it +func (a Assert) NextActions(state *core.State, now time.Time, expected core.ActionSet) core.ActionSet { + actions := state.NextActions(now) + assert.Equal(a.t, expected, actions) + assert.Equal(a.t, a.tinfo.expectedWarnings, *a.storedWarnings) + if a.t.Failed() { + a.t.FailNow() + } + *a.storedWarnings = nil + return actions +} + +// Nil returns a type-erased zero value of T, typically for use when a typed nil is necessary +func Nil[T any]() any { + var t T + return any(t) +} + +// Do calls the function with the provided arguments, checking that no unexpected warnings were +// generated +// +// This is only valid for functions that return nothing. +func (a Assert) Do(f any, args ...any) { + a.Call(f, args...).Equals( /* empty args list means no returns */ ) +} + +// NoError calls the function with the provided arguments, checking that the error it returns is +// nil, and that no unexpected warnings were generated. +func (a Assert) NoError(f any, args ...any) { + a.Call(f, args...).Equals(nil) +} + +// Call sets up a prepared function call, which will not be executed until one of its methods is +// actually called, which will perform all the relevant checks. +// +// Variadic functions are not supported. +func (a Assert) Call(f any, args ...any) PreparedFunctionCall { + fv := reflect.ValueOf(f) + fTy := fv.Type() + if fTy.Kind() != reflect.Func { + panic(errors.New("f must be a function")) + } else if fTy.IsVariadic() { + panic(errors.New("f is variadic")) + } + + var argValues []reflect.Value + for _, a := range args { + argValues = append(argValues, reflect.ValueOf(a)) + } + + return PreparedFunctionCall{a: a, f: fv, args: argValues} +} + +// PreparedFunctionCall is a function call that has been set up by (Assert).Call() but not executed +type PreparedFunctionCall struct { + a Assert + f reflect.Value + args []reflect.Value +} + +// Equals calls the prepared function, checking that all the return values are equal to what's +// expected, and that no unexpected warnings were generated. +func (f PreparedFunctionCall) Equals(expected ...any) { + fTy := f.f.Type() + + numOut := fTy.NumOut() + if len(expected) != numOut { + panic(fmt.Errorf( + "Mismatched number of out parameters from function: func has %d but expected len is %d", + numOut, + len(expected), + )) + } + + type unknownInterface any + + var actualReturnTypes []reflect.Type + var expectedReturnTypes []reflect.Type + for i := 0; i < numOut; i += 1 { + actual := fTy.Out(i) + actualReturnTypes = append(actualReturnTypes, actual) + + // Can't call reflect.Value.Type on nil, so if we're given a nil value, we have to be a + // little more permissive. + var expectedTy reflect.Type + if expected[i] != nil { + expectedTy = reflect.TypeOf(expected[i]) + } else if actual.Kind() == reflect.Interface { + // well, the actual value can be a nil interface too, so it's probably fine + expectedTy = actual + } else { + // but... if the actual value isn't an interface, there's a problem + expectedTy = reflect.TypeOf((*unknownInterface)(nil)).Elem() + } + expectedReturnTypes = append(expectedReturnTypes, expectedTy) + } + + if !reflect.DeepEqual(expectedReturnTypes, actualReturnTypes) { + panic(fmt.Errorf( + "provided return types not equal to the function's: function has %v, but expected has %v", + actualReturnTypes, + expectedReturnTypes, + )) + } + + returnValues := f.f.Call(f.args) + for i := range returnValues { + assert.Equal(f.a.t, expected[i], returnValues[i].Interface()) + } + assert.Equal(f.a.t, f.a.tinfo.expectedWarnings, *f.a.storedWarnings) + if f.a.t.Failed() { + f.a.t.FailNow() + } +} diff --git a/pkg/agent/core/testhelpers/clock.go b/pkg/agent/core/testhelpers/clock.go index 2e66cb126..14bed3a05 100644 --- a/pkg/agent/core/testhelpers/clock.go +++ b/pkg/agent/core/testhelpers/clock.go @@ -2,24 +2,28 @@ package testhelpers import ( "fmt" + "testing" "time" + + "github.com/stretchr/testify/require" ) // FakeClock is a small facility that makes it easy to operation on duration since start with // relative times. type FakeClock struct { + t *testing.T base time.Time now time.Time } // NewFakeClock creates a new fake clock, with the initial time set to an unspecified, round number. -func NewFakeClock() *FakeClock { +func NewFakeClock(t *testing.T) *FakeClock { base, err := time.Parse(time.RFC3339, "2000-01-01T00:00:00Z") // a nice round number, to make things easier if err != nil { panic(err) } - return &FakeClock{base: base, now: base} + return &FakeClock{t: t, base: base, now: base} } // Now returns the current time of the clock @@ -27,15 +31,25 @@ func (c *FakeClock) Now() time.Time { return c.now } +// Elapsed returns the total time added (via Inc) since the clock was started +func (c *FakeClock) Elapsed() Elapsed { + return Elapsed{c.t, c.now.Sub(c.base)} +} + // Inc adds duration to the current time of the clock -func (c *FakeClock) Inc(duration time.Duration) { +func (c *FakeClock) Inc(duration time.Duration) Elapsed { if duration < 0 { panic(fmt.Errorf("(*FakeClock).Inc() called with negative duration %s", duration)) } c.now = c.now.Add(duration) + return c.Elapsed() } -// Elapsed returns the total time added (via Inc) since the clock was started -func (c *FakeClock) Elapsed() time.Duration { - return c.now.Sub(c.base) +type Elapsed struct { + t *testing.T + time.Duration +} + +func (e Elapsed) AssertEquals(expected time.Duration) { + require.Equal(e.t, expected, e.Duration) } From 1dd4a7b11a54c6e6297c3c7f8b42c3211edc5bd5 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 20:25:15 -0700 Subject: [PATCH 27/59] simplify more --- pkg/agent/core/state_test.go | 40 ++++++++++++++++------------ pkg/agent/core/testhelpers/assert.go | 16 +---------- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index a7050b693..b1e5c58de 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -190,15 +190,21 @@ func Test_NextActions(t *testing.T) { helpers.WithStoredWarnings(a.StoredWarnings()), ) + var actions core.ActionSet + updateActions := func() core.ActionSet { + actions = state.NextActions(clock.Now()) + return actions + } + hundredMillis := 100 * time.Millisecond state.Plugin().NewScheduler() state.Monitor().Active(true) // Send initial scheduler request: - actions := a. - WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). - NextActions(state, clock.Now(), core.ActionSet{ + a.WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). + Call(updateActions). + Equals(core.ActionSet{ Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: nil, @@ -231,7 +237,7 @@ func Test_NextActions(t *testing.T) { // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 250, Mem: 1}, @@ -247,7 +253,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) clock.Inc(hundredMillis) // should have nothing more to do; waiting on plugin request to come back - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: nil, PluginRequest: nil, NeonVMRequest: nil, @@ -262,7 +268,7 @@ func Test_NextActions(t *testing.T) { clock.Elapsed().AssertEquals(3 * hundredMillis) // Scheduler approval is done, now we should be making the request to NeonVM - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ // expected to make a scheduler request every 5s; it's been 100ms since the last one, so // if the NeonVM request didn't come back in time, we'd need to get woken up to start // the next scheduler request. @@ -281,7 +287,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) clock.Inc(hundredMillis).AssertEquals(4 * hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 2*hundredMillis, }, @@ -293,7 +299,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // NeonVM change is done, now we should finish by notifying the vm-monitor - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 2*hundredMillis, // same as previous, clock hasn't changed }, @@ -309,7 +315,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), actions.MonitorUpscale.Target) clock.Inc(hundredMillis).AssertEquals(5 * hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 3*hundredMillis, }, @@ -322,7 +328,7 @@ func Test_NextActions(t *testing.T) { // And now, double-check that there's no sneaky follow-up actions before we change the // metrics - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 3*hundredMillis, // same as previous, clock hasn't changed }, @@ -348,7 +354,7 @@ func Test_NextActions(t *testing.T) { Equals(api.Resources{VCPU: 250, Mem: 1}, helpers.Nil[*time.Duration]()) // First step in downscaling is getting approval from the vm-monitor: - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 4*hundredMillis, }, @@ -363,7 +369,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) clock.Inc(hundredMillis).AssertEquals(7 * hundredMillis) // should have nothing more to do; waiting on vm-monitor request to come back - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 5*hundredMillis, }, @@ -375,7 +381,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 5*hundredMillis, // same as previous, clock hasn't changed }, @@ -390,7 +396,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) clock.Inc(hundredMillis).AssertEquals(8 * hundredMillis) // should have nothing more to do; waiting on NeonVM request to come back - a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - 6*hundredMillis, }, @@ -402,7 +408,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // Request to NeonVM completed, it's time to inform the scheduler plugin: - actions = a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 500, Mem: 2}, @@ -417,7 +423,7 @@ func Test_NextActions(t *testing.T) { a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) clock.Inc(hundredMillis).AssertEquals(9 * hundredMillis) // should have nothing more to do; waiting on plugin request to come back - a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: nil, // and don't need to wait, because plugin req is ongoing PluginRequest: nil, NeonVMRequest: nil, @@ -431,7 +437,7 @@ func Test_NextActions(t *testing.T) { }) // Finally, check there's no leftover actions: - a.NextActions(state, clock.Now(), core.ActionSet{ + a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{ Duration: 5*time.Second - hundredMillis, // request that just finished was started 100ms ago }, diff --git a/pkg/agent/core/testhelpers/assert.go b/pkg/agent/core/testhelpers/assert.go index 0e08bb3a7..f14c4c1a9 100644 --- a/pkg/agent/core/testhelpers/assert.go +++ b/pkg/agent/core/testhelpers/assert.go @@ -5,11 +5,8 @@ import ( "fmt" "reflect" "testing" - "time" "github.com/stretchr/testify/assert" - - "github.com/neondatabase/autoscaling/pkg/agent/core" ) type Assert struct { @@ -46,18 +43,6 @@ func (a Assert) WithWarnings(warnings ...string) Assert { return a } -// NextActions calls core.NextActions() and checks that the value matches expected, returning it -func (a Assert) NextActions(state *core.State, now time.Time, expected core.ActionSet) core.ActionSet { - actions := state.NextActions(now) - assert.Equal(a.t, expected, actions) - assert.Equal(a.t, a.tinfo.expectedWarnings, *a.storedWarnings) - if a.t.Failed() { - a.t.FailNow() - } - *a.storedWarnings = nil - return actions -} - // Nil returns a type-erased zero value of T, typically for use when a typed nil is necessary func Nil[T any]() any { var t T @@ -159,4 +144,5 @@ func (f PreparedFunctionCall) Equals(expected ...any) { if f.a.t.Failed() { f.a.t.FailNow() } + *f.a.storedWarnings = nil } From 3e40d2ef7b3690405c52c2b834c838f2e42ade2a Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 20:29:08 -0700 Subject: [PATCH 28/59] allow missing fields in core.ActionSet, to make tests smaller --- .golangci.yml | 2 +- pkg/agent/core/state_test.go | 65 ++---------------------------------- 2 files changed, 3 insertions(+), 64 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 523d877c4..540aa3ad2 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -65,7 +65,7 @@ linters-settings: - '^github\.com/neondatabase/autoscaling/pkg/util/watch\.HandlerFuncs$' # vmapi.{VirtualMachine,VirtualMachineSpec,VirtualMachineMigration,VirtualMachineMigrationSpec} - '^github\.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1\.VirtualMachine(Migration)?(Spec)?$' - - '^github\.com/neondatabase/autoscaling/pkg/agent/core\.Action$' + - '^github\.com/neondatabase/autoscaling/pkg/agent/core\.ActionSet$' # see: gci: diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index b1e5c58de..bf924a3f2 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -205,15 +205,11 @@ func Test_NextActions(t *testing.T) { a.WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). Call(updateActions). Equals(core.ActionSet{ - Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: nil, Target: api.Resources{VCPU: 250, Mem: 1}, Metrics: nil, }, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) clock.Inc(hundredMillis).AssertEquals(1 * hundredMillis) @@ -238,28 +234,17 @@ func Test_NextActions(t *testing.T) { // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. a.Call(updateActions).Equals(core.ActionSet{ - Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, Metrics: &lastMetrics, }, - // shouldn't have anything to say to the other components - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) // start the request: a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) clock.Inc(hundredMillis) // should have nothing more to do; waiting on plugin request to come back - a.Call(updateActions).Equals(core.ActionSet{ - Wait: nil, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, - }) + a.Call(updateActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ Permit: api.Resources{VCPU: 500, Mem: 2}, Migrate: nil, @@ -275,13 +260,10 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - hundredMillis, }, - PluginRequest: nil, NeonVMRequest: &core.ActionNeonVMRequest{ Current: api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, }, - MonitorDownscale: nil, - MonitorUpscale: nil, }) // start the request: a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) @@ -291,10 +273,6 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 2*hundredMillis, }, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) a.Do(state.NeonVM().RequestSuccessful, clock.Now()) @@ -303,9 +281,6 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 2*hundredMillis, // same as previous, clock hasn't changed }, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, MonitorUpscale: &core.ActionMonitorUpscale{ Current: api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, @@ -319,10 +294,6 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 3*hundredMillis, }, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) @@ -332,10 +303,6 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 3*hundredMillis, // same as previous, clock hasn't changed }, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) // ---- Scaledown !!! ---- @@ -358,13 +325,10 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 4*hundredMillis, }, - PluginRequest: nil, - NeonVMRequest: nil, MonitorDownscale: &core.ActionMonitorDownscale{ Current: api.Resources{VCPU: 500, Mem: 2}, Target: api.Resources{VCPU: 250, Mem: 1}, }, - MonitorUpscale: nil, }) a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) clock.Inc(hundredMillis).AssertEquals(7 * hundredMillis) @@ -373,10 +337,6 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 5*hundredMillis, }, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) @@ -385,13 +345,10 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 5*hundredMillis, // same as previous, clock hasn't changed }, - PluginRequest: nil, NeonVMRequest: &core.ActionNeonVMRequest{ Current: api.Resources{VCPU: 500, Mem: 2}, Target: api.Resources{VCPU: 250, Mem: 1}, }, - MonitorDownscale: nil, - MonitorUpscale: nil, }) a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) clock.Inc(hundredMillis).AssertEquals(8 * hundredMillis) @@ -400,36 +357,22 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - 6*hundredMillis, }, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // Request to NeonVM completed, it's time to inform the scheduler plugin: a.Call(updateActions).Equals(core.ActionSet{ - Wait: nil, PluginRequest: &core.ActionPluginRequest{ LastPermit: &api.Resources{VCPU: 500, Mem: 2}, Target: api.Resources{VCPU: 250, Mem: 1}, Metrics: &lastMetrics, }, // shouldn't have anything to say to the other components - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) clock.Inc(hundredMillis).AssertEquals(9 * hundredMillis) // should have nothing more to do; waiting on plugin request to come back - a.Call(updateActions).Equals(core.ActionSet{ - Wait: nil, // and don't need to wait, because plugin req is ongoing - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, - }) + a.Call(updateActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ Permit: api.Resources{VCPU: 250, Mem: 1}, Migrate: nil, @@ -441,10 +384,6 @@ func Test_NextActions(t *testing.T) { Wait: &core.ActionWait{ Duration: 5*time.Second - hundredMillis, // request that just finished was started 100ms ago }, - PluginRequest: nil, - NeonVMRequest: nil, - MonitorDownscale: nil, - MonitorUpscale: nil, }) }) } From b910be44f750fa7c743086acb4d21b5b80e45bc1 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 20:46:03 -0700 Subject: [PATCH 29/59] make durations simpler --- pkg/agent/core/state_test.go | 73 +++++++++++++++--------------------- 1 file changed, 31 insertions(+), 42 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index bf924a3f2..935eb8980 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -179,6 +179,15 @@ var DefaultInitialStateConfig = helpers.InitialStateConfig{ }, } +// helper function to parse a duration +func duration(s string) time.Duration { + d, err := time.ParseDuration(s) + if err != nil { + panic(fmt.Errorf("failed to parse duration: %w", err)) + } + return d +} + func Test_NextActions(t *testing.T) { // Thorough checks of a relatively simple flow t.Run("BasicScaleupAndDownFlow", func(t *testing.T) { @@ -196,7 +205,9 @@ func Test_NextActions(t *testing.T) { return actions } - hundredMillis := 100 * time.Millisecond + clockTick := func() helpers.Elapsed { + return clock.Inc(100 * time.Millisecond) + } state.Plugin().NewScheduler() state.Monitor().Active(true) @@ -212,21 +223,20 @@ func Test_NextActions(t *testing.T) { }, }) a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) - clock.Inc(hundredMillis).AssertEquals(1 * hundredMillis) + clockTick().AssertEquals(duration("0.1s")) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ Permit: actions.PluginRequest.Target, Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, }) - clock.Inc(hundredMillis) + clockTick().AssertEquals(duration("0.2s")) lastMetrics := api.Metrics{ LoadAverage1Min: 0.3, LoadAverage5Min: 0.0, // unused MemoryUsageBytes: 0.0, } a.Do(state.UpdateMetrics, lastMetrics) - clock.Elapsed().AssertEquals(2 * hundredMillis) // double-check that we agree about the desired resources a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). Equals(api.Resources{VCPU: 500, Mem: 2}, helpers.Nil[*time.Duration]()) @@ -242,7 +252,7 @@ func Test_NextActions(t *testing.T) { }) // start the request: a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) - clock.Inc(hundredMillis) + clockTick().AssertEquals(duration("0.3s")) // should have nothing more to do; waiting on plugin request to come back a.Call(updateActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ @@ -250,16 +260,13 @@ func Test_NextActions(t *testing.T) { Migrate: nil, ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, }) - clock.Elapsed().AssertEquals(3 * hundredMillis) // Scheduler approval is done, now we should be making the request to NeonVM a.Call(updateActions).Equals(core.ActionSet{ // expected to make a scheduler request every 5s; it's been 100ms since the last one, so // if the NeonVM request didn't come back in time, we'd need to get woken up to start // the next scheduler request. - Wait: &core.ActionWait{ - Duration: 5*time.Second - hundredMillis, - }, + Wait: &core.ActionWait{Duration: duration("4.9s")}, NeonVMRequest: &core.ActionNeonVMRequest{ Current: api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, @@ -267,20 +274,16 @@ func Test_NextActions(t *testing.T) { }) // start the request: a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) - clock.Inc(hundredMillis).AssertEquals(4 * hundredMillis) + clockTick().AssertEquals(duration("0.4s")) // should have nothing more to do; waiting on NeonVM request to come back a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 2*hundredMillis, - }, + Wait: &core.ActionWait{Duration: duration("4.8s")}, }) a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // NeonVM change is done, now we should finish by notifying the vm-monitor a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 2*hundredMillis, // same as previous, clock hasn't changed - }, + Wait: &core.ActionWait{Duration: duration("4.8s")}, // same as previous, clock hasn't changed MonitorUpscale: &core.ActionMonitorUpscale{ Current: api.Resources{VCPU: 250, Mem: 1}, Target: api.Resources{VCPU: 500, Mem: 2}, @@ -288,26 +291,22 @@ func Test_NextActions(t *testing.T) { }) // start the request: a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), actions.MonitorUpscale.Target) - clock.Inc(hundredMillis).AssertEquals(5 * hundredMillis) + clockTick().AssertEquals(duration("0.5s")) // should have nothing more to do; waiting on vm-monitor request to come back a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 3*hundredMillis, - }, + Wait: &core.ActionWait{Duration: duration("4.7s")}, }) a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) // And now, double-check that there's no sneaky follow-up actions before we change the // metrics a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 3*hundredMillis, // same as previous, clock hasn't changed - }, + Wait: &core.ActionWait{Duration: duration("4.7s")}, // same as previous, clock hasn't changed }) // ---- Scaledown !!! ---- - clock.Inc(hundredMillis).AssertEquals(6 * hundredMillis) + clockTick().AssertEquals(duration("0.6s")) // Set metrics back so that desired resources should now be zero lastMetrics = api.Metrics{ @@ -322,41 +321,33 @@ func Test_NextActions(t *testing.T) { // First step in downscaling is getting approval from the vm-monitor: a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 4*hundredMillis, - }, + Wait: &core.ActionWait{Duration: duration("4.6s")}, MonitorDownscale: &core.ActionMonitorDownscale{ Current: api.Resources{VCPU: 500, Mem: 2}, Target: api.Resources{VCPU: 250, Mem: 1}, }, }) a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) - clock.Inc(hundredMillis).AssertEquals(7 * hundredMillis) + clockTick().AssertEquals(duration("0.7s")) // should have nothing more to do; waiting on vm-monitor request to come back a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 5*hundredMillis, - }, + Wait: &core.ActionWait{Duration: duration("4.5s")}, }) a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 5*hundredMillis, // same as previous, clock hasn't changed - }, + Wait: &core.ActionWait{Duration: duration("4.5s")}, // same as previous, clock hasn't changed NeonVMRequest: &core.ActionNeonVMRequest{ Current: api.Resources{VCPU: 500, Mem: 2}, Target: api.Resources{VCPU: 250, Mem: 1}, }, }) a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) - clock.Inc(hundredMillis).AssertEquals(8 * hundredMillis) + clockTick().AssertEquals(duration("0.8s")) // should have nothing more to do; waiting on NeonVM request to come back a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - 6*hundredMillis, - }, + Wait: &core.ActionWait{Duration: duration("4.4s")}, }) a.Do(state.NeonVM().RequestSuccessful, clock.Now()) @@ -370,7 +361,7 @@ func Test_NextActions(t *testing.T) { // shouldn't have anything to say to the other components }) a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) - clock.Inc(hundredMillis).AssertEquals(9 * hundredMillis) + clockTick().AssertEquals(duration("0.9s")) // should have nothing more to do; waiting on plugin request to come back a.Call(updateActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ @@ -381,9 +372,7 @@ func Test_NextActions(t *testing.T) { // Finally, check there's no leftover actions: a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{ - Duration: 5*time.Second - hundredMillis, // request that just finished was started 100ms ago - }, + Wait: &core.ActionWait{Duration: duration("4.9s")}, // request that just finished was started 100ms ago }) }) } From 3f0cf946bfedea18a8da3e7265ce74c5de86f34b Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 30 Sep 2023 20:48:40 -0700 Subject: [PATCH 30/59] de-indent --- pkg/agent/core/state_test.go | 350 +++++++++++++++++------------------ 1 file changed, 174 insertions(+), 176 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 935eb8980..7bee34044 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -188,191 +188,189 @@ func duration(s string) time.Duration { return d } -func Test_NextActions(t *testing.T) { - // Thorough checks of a relatively simple flow - t.Run("BasicScaleupAndDownFlow", func(t *testing.T) { - a := helpers.NewAssert(t) - - clock := helpers.NewFakeClock(t) - state := helpers.CreateInitialState( - DefaultInitialStateConfig, - helpers.WithStoredWarnings(a.StoredWarnings()), - ) +// Thorough checks of a relatively simple flow +func TestBasicScaleUpAndDownFlow(t *testing.T) { + a := helpers.NewAssert(t) + + clock := helpers.NewFakeClock(t) + state := helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + ) + + var actions core.ActionSet + updateActions := func() core.ActionSet { + actions = state.NextActions(clock.Now()) + return actions + } - var actions core.ActionSet - updateActions := func() core.ActionSet { - actions = state.NextActions(clock.Now()) - return actions - } - - clockTick := func() helpers.Elapsed { - return clock.Inc(100 * time.Millisecond) - } - - state.Plugin().NewScheduler() - state.Monitor().Active(true) - - // Send initial scheduler request: - a.WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). - Call(updateActions). - Equals(core.ActionSet{ - PluginRequest: &core.ActionPluginRequest{ - LastPermit: nil, - Target: api.Resources{VCPU: 250, Mem: 1}, - Metrics: nil, - }, - }) - a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) - clockTick().AssertEquals(duration("0.1s")) - a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ - Permit: actions.PluginRequest.Target, - Migrate: nil, - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, - }) + clockTick := func() helpers.Elapsed { + return clock.Inc(100 * time.Millisecond) + } + + state.Plugin().NewScheduler() + state.Monitor().Active(true) - clockTick().AssertEquals(duration("0.2s")) - lastMetrics := api.Metrics{ - LoadAverage1Min: 0.3, - LoadAverage5Min: 0.0, // unused - MemoryUsageBytes: 0.0, - } - a.Do(state.UpdateMetrics, lastMetrics) - // double-check that we agree about the desired resources - a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). - Equals(api.Resources{VCPU: 500, Mem: 2}, helpers.Nil[*time.Duration]()) - - // Now that the initial scheduler request is done, and we have metrics that indicate - // scale-up would be a good idea, we should be contacting the scheduler to get approval. - a.Call(updateActions).Equals(core.ActionSet{ + // Send initial scheduler request: + a.WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). + Call(updateActions). + Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ - LastPermit: &api.Resources{VCPU: 250, Mem: 1}, - Target: api.Resources{VCPU: 500, Mem: 2}, - Metrics: &lastMetrics, + LastPermit: nil, + Target: api.Resources{VCPU: 250, Mem: 1}, + Metrics: nil, }, }) - // start the request: - a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) - clockTick().AssertEquals(duration("0.3s")) - // should have nothing more to do; waiting on plugin request to come back - a.Call(updateActions).Equals(core.ActionSet{}) - a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ - Permit: api.Resources{VCPU: 500, Mem: 2}, - Migrate: nil, - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, - }) + a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) + clockTick().AssertEquals(duration("0.1s")) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: actions.PluginRequest.Target, + Migrate: nil, + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + }) - // Scheduler approval is done, now we should be making the request to NeonVM - a.Call(updateActions).Equals(core.ActionSet{ - // expected to make a scheduler request every 5s; it's been 100ms since the last one, so - // if the NeonVM request didn't come back in time, we'd need to get woken up to start - // the next scheduler request. - Wait: &core.ActionWait{Duration: duration("4.9s")}, - NeonVMRequest: &core.ActionNeonVMRequest{ - Current: api.Resources{VCPU: 250, Mem: 1}, - Target: api.Resources{VCPU: 500, Mem: 2}, - }, - }) - // start the request: - a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) - clockTick().AssertEquals(duration("0.4s")) - // should have nothing more to do; waiting on NeonVM request to come back - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.8s")}, - }) - a.Do(state.NeonVM().RequestSuccessful, clock.Now()) - - // NeonVM change is done, now we should finish by notifying the vm-monitor - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.8s")}, // same as previous, clock hasn't changed - MonitorUpscale: &core.ActionMonitorUpscale{ - Current: api.Resources{VCPU: 250, Mem: 1}, - Target: api.Resources{VCPU: 500, Mem: 2}, - }, - }) - // start the request: - a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), actions.MonitorUpscale.Target) - clockTick().AssertEquals(duration("0.5s")) - // should have nothing more to do; waiting on vm-monitor request to come back - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.7s")}, - }) - a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) + clockTick().AssertEquals(duration("0.2s")) + lastMetrics := api.Metrics{ + LoadAverage1Min: 0.3, + LoadAverage5Min: 0.0, // unused + MemoryUsageBytes: 0.0, + } + a.Do(state.UpdateMetrics, lastMetrics) + // double-check that we agree about the desired resources + a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). + Equals(api.Resources{VCPU: 500, Mem: 2}, helpers.Nil[*time.Duration]()) + + // Now that the initial scheduler request is done, and we have metrics that indicate + // scale-up would be a good idea, we should be contacting the scheduler to get approval. + a.Call(updateActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: &api.Resources{VCPU: 250, Mem: 1}, + Target: api.Resources{VCPU: 500, Mem: 2}, + Metrics: &lastMetrics, + }, + }) + // start the request: + a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) + clockTick().AssertEquals(duration("0.3s")) + // should have nothing more to do; waiting on plugin request to come back + a.Call(updateActions).Equals(core.ActionSet{}) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: api.Resources{VCPU: 500, Mem: 2}, + Migrate: nil, + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + }) - // And now, double-check that there's no sneaky follow-up actions before we change the - // metrics - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.7s")}, // same as previous, clock hasn't changed - }) + // Scheduler approval is done, now we should be making the request to NeonVM + a.Call(updateActions).Equals(core.ActionSet{ + // expected to make a scheduler request every 5s; it's been 100ms since the last one, so + // if the NeonVM request didn't come back in time, we'd need to get woken up to start + // the next scheduler request. + Wait: &core.ActionWait{Duration: duration("4.9s")}, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: api.Resources{VCPU: 250, Mem: 1}, + Target: api.Resources{VCPU: 500, Mem: 2}, + }, + }) + // start the request: + a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) + clockTick().AssertEquals(duration("0.4s")) + // should have nothing more to do; waiting on NeonVM request to come back + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, + }) + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + + // NeonVM change is done, now we should finish by notifying the vm-monitor + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, // same as previous, clock hasn't changed + MonitorUpscale: &core.ActionMonitorUpscale{ + Current: api.Resources{VCPU: 250, Mem: 1}, + Target: api.Resources{VCPU: 500, Mem: 2}, + }, + }) + // start the request: + a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), actions.MonitorUpscale.Target) + clockTick().AssertEquals(duration("0.5s")) + // should have nothing more to do; waiting on vm-monitor request to come back + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.7s")}, + }) + a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) - // ---- Scaledown !!! ---- - - clockTick().AssertEquals(duration("0.6s")) - - // Set metrics back so that desired resources should now be zero - lastMetrics = api.Metrics{ - LoadAverage1Min: 0.0, - LoadAverage5Min: 0.0, // unused - MemoryUsageBytes: 0.0, - } - a.Do(state.UpdateMetrics, lastMetrics) - // double-check that we agree about the new desired resources - a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). - Equals(api.Resources{VCPU: 250, Mem: 1}, helpers.Nil[*time.Duration]()) - - // First step in downscaling is getting approval from the vm-monitor: - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.6s")}, - MonitorDownscale: &core.ActionMonitorDownscale{ - Current: api.Resources{VCPU: 500, Mem: 2}, - Target: api.Resources{VCPU: 250, Mem: 1}, - }, - }) - a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) - clockTick().AssertEquals(duration("0.7s")) - // should have nothing more to do; waiting on vm-monitor request to come back - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.5s")}, - }) - a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) - - // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.5s")}, // same as previous, clock hasn't changed - NeonVMRequest: &core.ActionNeonVMRequest{ - Current: api.Resources{VCPU: 500, Mem: 2}, - Target: api.Resources{VCPU: 250, Mem: 1}, - }, - }) - a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) - clockTick().AssertEquals(duration("0.8s")) - // should have nothing more to do; waiting on NeonVM request to come back - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.4s")}, - }) - a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + // And now, double-check that there's no sneaky follow-up actions before we change the + // metrics + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.7s")}, // same as previous, clock hasn't changed + }) - // Request to NeonVM completed, it's time to inform the scheduler plugin: - a.Call(updateActions).Equals(core.ActionSet{ - PluginRequest: &core.ActionPluginRequest{ - LastPermit: &api.Resources{VCPU: 500, Mem: 2}, - Target: api.Resources{VCPU: 250, Mem: 1}, - Metrics: &lastMetrics, - }, - // shouldn't have anything to say to the other components - }) - a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) - clockTick().AssertEquals(duration("0.9s")) - // should have nothing more to do; waiting on plugin request to come back - a.Call(updateActions).Equals(core.ActionSet{}) - a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ - Permit: api.Resources{VCPU: 250, Mem: 1}, - Migrate: nil, - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, - }) + // ---- Scaledown !!! ---- - // Finally, check there's no leftover actions: - a.Call(updateActions).Equals(core.ActionSet{ - Wait: &core.ActionWait{Duration: duration("4.9s")}, // request that just finished was started 100ms ago - }) + clockTick().AssertEquals(duration("0.6s")) + + // Set metrics back so that desired resources should now be zero + lastMetrics = api.Metrics{ + LoadAverage1Min: 0.0, + LoadAverage5Min: 0.0, // unused + MemoryUsageBytes: 0.0, + } + a.Do(state.UpdateMetrics, lastMetrics) + // double-check that we agree about the new desired resources + a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). + Equals(api.Resources{VCPU: 250, Mem: 1}, helpers.Nil[*time.Duration]()) + + // First step in downscaling is getting approval from the vm-monitor: + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.6s")}, + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: api.Resources{VCPU: 500, Mem: 2}, + Target: api.Resources{VCPU: 250, Mem: 1}, + }, + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) + clockTick().AssertEquals(duration("0.7s")) + // should have nothing more to do; waiting on vm-monitor request to come back + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.5s")}, + }) + a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) + + // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.5s")}, // same as previous, clock hasn't changed + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: api.Resources{VCPU: 500, Mem: 2}, + Target: api.Resources{VCPU: 250, Mem: 1}, + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) + clockTick().AssertEquals(duration("0.8s")) + // should have nothing more to do; waiting on NeonVM request to come back + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.4s")}, + }) + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + + // Request to NeonVM completed, it's time to inform the scheduler plugin: + a.Call(updateActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: &api.Resources{VCPU: 500, Mem: 2}, + Target: api.Resources{VCPU: 250, Mem: 1}, + Metrics: &lastMetrics, + }, + // shouldn't have anything to say to the other components + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) + clockTick().AssertEquals(duration("0.9s")) + // should have nothing more to do; waiting on plugin request to come back + a.Call(updateActions).Equals(core.ActionSet{}) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: api.Resources{VCPU: 250, Mem: 1}, + Migrate: nil, + ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + }) + + // Finally, check there's no leftover actions: + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.9s")}, // request that just finished was started 100ms ago }) } From 9fbc4e08725ce3ff669a6cf0ed932be3fb10d291 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 1 Oct 2023 17:24:29 -0700 Subject: [PATCH 31/59] add more tests! (and fix the bugs they caught) --- pkg/agent/core/state.go | 95 +++-- pkg/agent/core/state_test.go | 441 +++++++++++++++++++++--- pkg/agent/core/testhelpers/assert.go | 4 +- pkg/agent/core/testhelpers/construct.go | 48 ++- 4 files changed, 502 insertions(+), 86 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index de3d367dc..b681d0ac3 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -200,7 +200,11 @@ func NewState(vm api.VmInfo, config Config) *State { func (s *State) NextActions(now time.Time) ActionSet { var actions ActionSet - desiredResources, desiredResourcesRequiredWait := s.DesiredResourcesFromMetricsOrRequestedUpscaling(now) + desiredResources, calcDesiredResourcesWait := s.DesiredResourcesFromMetricsOrRequestedUpscaling(now) + if calcDesiredResourcesWait == nil { + // our handling later on is easier if we can assume it's non-nil + calcDesiredResourcesWait = func(ActionSet) *time.Duration { return nil } + } // ---- // Requests to the scheduler plugin: @@ -245,7 +249,7 @@ func (s *State) NextActions(now time.Time) ActionSet { requiredWait := maximumDuration requiredWaits := []*time.Duration{ - desiredResourcesRequiredWait, + calcDesiredResourcesWait(actions), pluginRequiredWait, monitorUpscaleRequiredWait, monitorDownscaleRequiredWait, @@ -277,8 +281,8 @@ func (s *State) calculatePluginAction( requestResources := s.clampResources( s.vm.Using(), desiredResources, - s.vm.Using(), // don't decrease below VM using (decrease happens *before* telling the plugin) - desiredResources, // but any increase is ok + ptr(s.vm.Using()), // don't decrease below VM using (decrease happens *before* telling the plugin) + nil, // but any increase is ok ) // resources if we're just informing the plugin of current resource usage. currentResources := s.vm.Using() @@ -359,6 +363,8 @@ func (s *State) calculatePluginAction( } } +func ptr[T any](t T) *T { return &t } + func (s *State) calculateNeonVMAction( now time.Time, desiredResources api.Resources, @@ -367,10 +373,10 @@ func (s *State) calculateNeonVMAction( ) *ActionNeonVMRequest { // clamp desiredResources to what we're allowed to make a request for desiredResources = s.clampResources( - s.vm.Using(), // current: what we're using already - desiredResources, // target: desired resources - desiredResources.Max(s.monitorApprovedLowerBound()), // lower bound: downscaling that the monitor has approved - desiredResources.Min(s.pluginApprovedUpperBound()), // upper bound: upscaling that the plugin has approved + s.vm.Using(), // current: what we're using already + desiredResources, // target: desired resources + ptr(s.monitorApprovedLowerBound()), // lower bound: downscaling that the monitor has approved + ptr(s.pluginApprovedUpperBound()), // upper bound: upscaling that the plugin has approved ) // If we're already using the desired resources, then no need to make a request @@ -395,7 +401,7 @@ func (s *State) calculateNeonVMAction( } if len(reqs) != 0 { - s.config.Warn("Wanted to make a request to NeonVM API, but there's already %s ongoing", strings.Join(reqs, " and ")) + s.config.Warn("Wanted to make a request to NeonVM API, but there's already %s", strings.Join(reqs, " and ")) } return nil @@ -412,10 +418,10 @@ func (s *State) calculateMonitorUpscaleAction( } requestResources := s.clampResources( - *s.monitor.approved, // current: last resources we got the OK from the monitor on - s.vm.Using(), // target: what the VM is currently using - *s.monitor.approved, // don't decrease below what the monitor is currently set to (this is an *upscale* request) - desiredResources, // don't increase above desired resources + *s.monitor.approved, // current: last resources we got the OK from the monitor on + s.vm.Using(), // target: what the VM is currently using + ptr(*s.monitor.approved), // don't decrease below what the monitor is currently set to (this is an *upscale* request) + ptr(desiredResources.Max(*s.monitor.approved)), // don't increase above desired resources ) // Check validity of the request that we would send, before sending it @@ -477,10 +483,10 @@ func (s *State) calculateMonitorDownscaleAction( } requestResources := s.clampResources( - *s.monitor.approved, // current: what the monitor is already aware of - desiredResources, // target: what we'd like the VM to be using - desiredResources, // lower bound: any decrease is fine - *s.monitor.approved, // upper bound: don't increase (this is only downscaling!) + *s.monitor.approved, // current: what the monitor is already aware of + desiredResources, // target: what we'd like the VM to be using + nil, // lower bound: any decrease is fine + ptr(*s.monitor.approved), // upper bound: don't increase (this is only downscaling!) ) // Check validity of the request that we would send, before sending it @@ -517,7 +523,7 @@ func (s *State) calculateMonitorDownscaleAction( } // Can't make another request if we failed too recently: - if s.monitor.upscaleFailureAt != nil { + if s.monitor.downscaleFailureAt != nil { timeUntilFailureBackoffExpires := now.Sub(*s.monitor.downscaleFailureAt) if timeUntilFailureBackoffExpires > 0 { s.config.Warn("Wanted to send vm-monitor downscale request but failed too recently") @@ -528,7 +534,7 @@ func (s *State) calculateMonitorDownscaleAction( // Can't make another request if a recent request for resources less than or equal to the // proposed request was denied. In general though, this should be handled by // DesiredResourcesFromMetricsOrRequestedUpscaling, so it's we're better off panicking here. - if s.monitor.deniedDownscale != nil && !s.monitor.deniedDownscale.requested.HasFieldLessThan(requestResources) { + if s.timeUntilDeniedDownscaleExpired(now) > 0 && !s.monitor.deniedDownscale.requested.HasFieldLessThan(requestResources) { panic(errors.New( "Wanted to send vm-monitor downscale request, but too soon after previously denied downscaling that should have been handled earlier", )) @@ -549,7 +555,7 @@ func (s *State) scalingConfig() api.ScalingConfig { } } -func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) (api.Resources, *time.Duration) { +func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) (api.Resources, func(ActionSet) *time.Duration) { // There's some annoying edge cases that this function has to be able to handle properly. For // the sake of completeness, they are: // @@ -667,12 +673,15 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( )) } - var waitTime *time.Duration - if deniedDownscaleAffectedResult { - waitTime = &timeUntilDeniedDownscaleExpired + calculateWaitTime := func(actions ActionSet) *time.Duration { + if deniedDownscaleAffectedResult && actions.MonitorDownscale == nil && s.monitor.ongoingRequest == nil { + return &timeUntilDeniedDownscaleExpired + } else { + return nil + } } - return result, waitTime + return result, calculateWaitTime } // NB: we could just use s.plugin.computeUnit, but that's sometimes nil. This way, it's clear that @@ -733,25 +742,36 @@ func (s *State) minRequiredResourcesForDeniedDownscale(computeUnit api.Resources func (s *State) clampResources( current api.Resources, desired api.Resources, - lowerBound api.Resources, - upperBound api.Resources, + lowerBound *api.Resources, + upperBound *api.Resources, ) api.Resources { - var cpu vmapi.MilliCPU - if desired.VCPU < current.VCPU { + // Internal validity checks: + if lowerBound != nil && lowerBound.HasFieldGreaterThan(current) { + panic(fmt.Errorf( + "clampResources called with invalid arguments: lowerBound=%+v has field greater than current=%+v", + lowerBound, + current, + )) + } else if upperBound != nil && upperBound.HasFieldLessThan(current) { + panic(fmt.Errorf( + "clampResources called with invalid arguments: upperBound=%+v has field less than current=%+v", + upperBound, + current, + )) + } + + cpu := desired.VCPU + if desired.VCPU < current.VCPU && lowerBound != nil { cpu = util.Max(desired.VCPU, lowerBound.VCPU) - } else if desired.VCPU > current.VCPU { + } else if desired.VCPU > current.VCPU && upperBound != nil { cpu = util.Min(desired.VCPU, upperBound.VCPU) - } else { - cpu = current.VCPU } - var mem uint16 - if desired.Mem < current.Mem { + mem := desired.Mem + if desired.Mem < current.Mem && lowerBound != nil { mem = util.Max(desired.Mem, lowerBound.Mem) - } else if desired.Mem > current.Mem { + } else if desired.Mem > current.Mem && upperBound != nil { mem = util.Min(desired.Mem, upperBound.Mem) - } else { - mem = current.Mem } return api.Resources{VCPU: cpu, Mem: mem} @@ -896,7 +916,7 @@ func (h MonitorHandle) Active(active bool) { func (h MonitorHandle) UpscaleRequested(now time.Time, resources api.MoreResources) { h.s.monitor.requestedUpscale = &requestedUpscale{ at: now, - base: h.s.vm.Using(), // TODO: this is racy (maybe the resources were different when the monitor originally made the request) + base: *h.s.monitor.approved, requested: resources, } } @@ -930,7 +950,6 @@ func (h MonitorHandle) StartingDownscaleRequest(now time.Time, resources api.Res func (h MonitorHandle) DownscaleRequestAllowed(now time.Time) { h.s.monitor.approved = &h.s.monitor.ongoingRequest.requested h.s.monitor.ongoingRequest = nil - h.s.monitor.deniedDownscale = nil } // Downscale request was successful but the monitor denied our request. diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 7bee34044..39a8afbbb 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -160,8 +160,10 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { } } +var DefaultComputeUnit = api.Resources{VCPU: 250, Mem: 1} + var DefaultInitialStateConfig = helpers.InitialStateConfig{ - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + ComputeUnit: DefaultComputeUnit, MemorySlotSize: resource.MustParse("1Gi"), MinCU: 1, @@ -179,6 +181,38 @@ var DefaultInitialStateConfig = helpers.InitialStateConfig{ }, } +func getDesiredResources(state *core.State, now time.Time) api.Resources { + res, _ := state.DesiredResourcesFromMetricsOrRequestedUpscaling(now) + return res +} + +func doInitialPluginRequest( + a helpers.Assert, + state *core.State, + clock *helpers.FakeClock, + requestTime time.Duration, + computeUnit api.Resources, + metrics *api.Metrics, + resources api.Resources, +) { + a.WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). + Call(state.NextActions, clock.Now()). + Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: nil, + Target: resources, + Metrics: metrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resources) + clock.Inc(requestTime) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resources, + Migrate: nil, + ComputeUnit: computeUnit, + }) +} + // helper function to parse a duration func duration(s string) time.Duration { d, err := time.ParseDuration(s) @@ -188,47 +222,39 @@ func duration(s string) time.Duration { return d } -// Thorough checks of a relatively simple flow +func ptr[T any](t T) *T { + return &t +} + +// Thorough checks of a relatively simple flow - scaling from 1 CU to 2 CU and back down. func TestBasicScaleUpAndDownFlow(t *testing.T) { a := helpers.NewAssert(t) - clock := helpers.NewFakeClock(t) + clockTick := func() helpers.Elapsed { + return clock.Inc(100 * time.Millisecond) + } + resForCU := func(cu uint16) api.Resources { + return DefaultComputeUnit.Mul(cu) + } + state := helpers.CreateInitialState( DefaultInitialStateConfig, helpers.WithStoredWarnings(a.StoredWarnings()), + helpers.WithTestingLogfWarnings(t), ) - var actions core.ActionSet updateActions := func() core.ActionSet { actions = state.NextActions(clock.Now()) return actions } - clockTick := func() helpers.Elapsed { - return clock.Inc(100 * time.Millisecond) - } - state.Plugin().NewScheduler() state.Monitor().Active(true) // Send initial scheduler request: - a.WithWarnings("Can't determine desired resources because compute unit hasn't been set yet"). - Call(updateActions). - Equals(core.ActionSet{ - PluginRequest: &core.ActionPluginRequest{ - LastPermit: nil, - Target: api.Resources{VCPU: 250, Mem: 1}, - Metrics: nil, - }, - }) - a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) - clockTick().AssertEquals(duration("0.1s")) - a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ - Permit: actions.PluginRequest.Target, - Migrate: nil, - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, - }) + doInitialPluginRequest(a, state, clock, duration("0.1s"), DefaultComputeUnit, nil, resForCU(1)) + // Set metrics clockTick().AssertEquals(duration("0.2s")) lastMetrics := api.Metrics{ LoadAverage1Min: 0.3, @@ -237,15 +263,15 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { } a.Do(state.UpdateMetrics, lastMetrics) // double-check that we agree about the desired resources - a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). - Equals(api.Resources{VCPU: 500, Mem: 2}, helpers.Nil[*time.Duration]()) + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(2)) // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. a.Call(updateActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ - LastPermit: &api.Resources{VCPU: 250, Mem: 1}, - Target: api.Resources{VCPU: 500, Mem: 2}, + LastPermit: ptr(resForCU(1)), + Target: resForCU(2), Metrics: &lastMetrics, }, }) @@ -255,9 +281,9 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { // should have nothing more to do; waiting on plugin request to come back a.Call(updateActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ - Permit: api.Resources{VCPU: 500, Mem: 2}, + Permit: resForCU(2), Migrate: nil, - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + ComputeUnit: DefaultComputeUnit, }) // Scheduler approval is done, now we should be making the request to NeonVM @@ -267,8 +293,8 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { // the next scheduler request. Wait: &core.ActionWait{Duration: duration("4.9s")}, NeonVMRequest: &core.ActionNeonVMRequest{ - Current: api.Resources{VCPU: 250, Mem: 1}, - Target: api.Resources{VCPU: 500, Mem: 2}, + Current: resForCU(1), + Target: resForCU(2), }, }) // start the request: @@ -284,8 +310,8 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.8s")}, // same as previous, clock hasn't changed MonitorUpscale: &core.ActionMonitorUpscale{ - Current: api.Resources{VCPU: 250, Mem: 1}, - Target: api.Resources{VCPU: 500, Mem: 2}, + Current: resForCU(1), + Target: resForCU(2), }, }) // start the request: @@ -315,15 +341,15 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { } a.Do(state.UpdateMetrics, lastMetrics) // double-check that we agree about the new desired resources - a.Call(state.DesiredResourcesFromMetricsOrRequestedUpscaling, clock.Now()). - Equals(api.Resources{VCPU: 250, Mem: 1}, helpers.Nil[*time.Duration]()) + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(1)) // First step in downscaling is getting approval from the vm-monitor: a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.6s")}, MonitorDownscale: &core.ActionMonitorDownscale{ - Current: api.Resources{VCPU: 500, Mem: 2}, - Target: api.Resources{VCPU: 250, Mem: 1}, + Current: resForCU(2), + Target: resForCU(1), }, }) a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) @@ -338,8 +364,8 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { a.Call(updateActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.5s")}, // same as previous, clock hasn't changed NeonVMRequest: &core.ActionNeonVMRequest{ - Current: api.Resources{VCPU: 500, Mem: 2}, - Target: api.Resources{VCPU: 250, Mem: 1}, + Current: resForCU(2), + Target: resForCU(1), }, }) a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) @@ -353,8 +379,8 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { // Request to NeonVM completed, it's time to inform the scheduler plugin: a.Call(updateActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ - LastPermit: &api.Resources{VCPU: 500, Mem: 2}, - Target: api.Resources{VCPU: 250, Mem: 1}, + LastPermit: ptr(resForCU(2)), + Target: resForCU(1), Metrics: &lastMetrics, }, // shouldn't have anything to say to the other components @@ -364,9 +390,9 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { // should have nothing more to do; waiting on plugin request to come back a.Call(updateActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ - Permit: api.Resources{VCPU: 250, Mem: 1}, + Permit: resForCU(1), Migrate: nil, - ComputeUnit: api.Resources{VCPU: 250, Mem: 1}, + ComputeUnit: DefaultComputeUnit, }) // Finally, check there's no leftover actions: @@ -374,3 +400,330 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { Wait: &core.ActionWait{Duration: duration("4.9s")}, // request that just finished was started 100ms ago }) } + +// Test that in a stable state, requests to the plugin happen exactly every Config.PluginRequestTick +func TestPeriodicPluginRequest(t *testing.T) { + a := helpers.NewAssert(t) + clock := helpers.NewFakeClock(t) + + state := helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + ) + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + metrics := api.Metrics{ + LoadAverage1Min: 0.0, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + } + resources := DefaultComputeUnit + + a.Do(state.UpdateMetrics, metrics) + + base := duration("0s") + clock.Elapsed().AssertEquals(base) + + clockTick := duration("100ms") + reqDuration := duration("50ms") + reqEvery := DefaultInitialStateConfig.Core.PluginRequestTick + endTime := duration("20s") + + doInitialPluginRequest(a, state, clock, clockTick, DefaultComputeUnit, &metrics, resources) + + for clock.Elapsed().Duration < endTime { + timeSinceScheduledRequest := (clock.Elapsed().Duration - base) % reqEvery + + if timeSinceScheduledRequest != 0 { + timeUntilNextRequest := reqEvery - timeSinceScheduledRequest + a.Call(state.NextActions, clock.Now()).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: timeUntilNextRequest}, + }) + clock.Inc(clockTick) + } else { + a.Call(state.NextActions, clock.Now()).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: &resources, + Target: resources, + Metrics: &metrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resources) + a.Call(state.NextActions, clock.Now()).Equals(core.ActionSet{}) + clock.Inc(reqDuration) + a.Call(state.NextActions, clock.Now()).Equals(core.ActionSet{}) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resources, + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + clock.Inc(clockTick - reqDuration) + } + } +} + +// Checks that when downscaling is denied, we both (a) try again with higher resources, or (b) wait +// to retry if there aren't higher resources to try with. +func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { + a := helpers.NewAssert(t) + clock := helpers.NewFakeClock(t) + clockTickDuration := duration("0.1s") + clockTick := func() helpers.Elapsed { + return clock.Inc(clockTickDuration) + } + resForCU := func(cu uint16) api.Resources { + return DefaultComputeUnit.Mul(cu) + } + + state := helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + helpers.WithMinMaxCU(1, 8), + helpers.WithInitialCU(6), // NOTE: Start at 6 CU, so we're trying to scale down immediately. + helpers.WithConfigSetting(func(c *core.Config) { + // values close to the default, so request timing works out a little better. + c.PluginRequestTick = duration("6s") + c.MonitorDeniedDownscaleCooldown = duration("4s") + }), + ) + + var actions core.ActionSet + updateActions := func() core.ActionSet { + actions = state.NextActions(clock.Now()) + return actions + } + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + doInitialPluginRequest(a, state, clock, duration("0.1s"), DefaultComputeUnit, nil, resForCU(6)) + + // Set metrics + clockTick().AssertEquals(duration("0.2s")) + metrics := api.Metrics{ + LoadAverage1Min: 0.0, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + } + a.Do(state.UpdateMetrics, metrics) + // double-check that we agree about the desired resources + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(1)) + + // Broadly the idea here is that we should be trying to request downscaling from the vm-monitor, + // and retrying with progressively higher values until either we get approved, or we run out of + // options, at which point we should wait until later to re-request downscaling. + // + // This behavior results in linear retry passes. + // + // For this test, we: + // 1. Deny all requests in the first pass + // 2. Approve only down to 3 CU on the second pass + // a. triggers NeonVM request + // b. triggers plugin request + // 3. Deny all requests in the third pass (i.e. stay at 3 CU) + // 4. Approve down to 1 CU on the fourth pass + // a. triggers NeonVM request + // b. triggers plugin request + // + // ---- + // + // First pass: deny all requests. Should try from 1 to 5 CU. + clock.Elapsed().AssertEquals(duration("0.2s")) + currentPluginWait := duration("5.8s") + for cu := uint16(1); cu <= 5; cu += 1 { + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: currentPluginWait}, + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(6), + Target: resForCU(cu), + }, + }) + // Do the request: + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(cu)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: currentPluginWait}, + }) + clockTick() + currentPluginWait -= clockTickDuration + a.Do(state.Monitor().DownscaleRequestDenied, clock.Now()) + } + // At the end, we should be waiting to retry downscaling: + a.Call(updateActions).Equals(core.ActionSet{ + // Taken from DefaultInitialStateConfig.Core.MonitorDeniedDownscaleCooldown + Wait: &core.ActionWait{Duration: duration("4s")}, + }) + + clock.Inc(duration("4s")).AssertEquals(duration("4.7s")) + currentPluginWait = duration("1.3s") + + // Second pass: Approve only down to 3 CU, then NeonVM & plugin requests. + for cu := uint16(1); cu <= 3; cu += 1 { + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: currentPluginWait}, + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(6), + Target: resForCU(cu), + }, + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(cu)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: currentPluginWait}, + }) + clockTick() + currentPluginWait -= clockTickDuration + if cu < 3 /* deny up to 3 */ { + a.Do(state.Monitor().DownscaleRequestDenied, clock.Now()) + } else { + a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) + } + } + // At this point, waiting 3.9s for next attempt to downscale below 3 CU (last request was + // successful, but the one before it wasn't), and 1s for plugin tick. + // Also, because downscaling was approved, we should want to make a NeonVM request to do that. + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("1s")}, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(6), + Target: resForCU(3), + }, + }) + // Make the request: + a.Do(state.NeonVM().StartingRequest, time.Now(), resForCU(3)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("1s")}, + }) + clockTick().AssertEquals(duration("5.1s")) + a.Do(state.NeonVM().RequestSuccessful, time.Now()) + // Successfully scaled down, so we should now inform the plugin. But also, we'll want to retry + // the downscale request to vm-monitor once the retry is up: + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("3.8s")}, + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(6)), + Target: resForCU(3), + Metrics: &metrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(3)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("3.8s")}, + }) + clockTick().AssertEquals(duration("5.2s")) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(3), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + // ... And *now* there's nothing left to do but wait until downscale wait expires: + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("3.7s")}, + }) + + // so, wait for that: + clock.Inc(duration("3.7s")).AssertEquals(duration("8.9s")) + + // Third pass: deny all requests. + currentPluginWait = duration("2.2s") + for cu := uint16(1); cu < 3; cu += 1 { + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: currentPluginWait}, + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(3), + Target: resForCU(cu), + }, + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(cu)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: currentPluginWait}, + }) + clockTick() + currentPluginWait -= clockTickDuration + a.Do(state.Monitor().DownscaleRequestDenied, clock.Now()) + } + clock.Elapsed().AssertEquals(duration("9.1s")) + // At the end, we should be waiting to retry downscaling (but actually, the regular plugin + // request is coming up sooner). + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("2s")}, + }) + // ... so, wait for that plugin request/response, and then wait to retry downscaling: + clock.Inc(duration("2s")).AssertEquals(duration("11.1s")) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("2s")}, // still want to retry vm-monitor downscaling + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(3)), + Target: resForCU(3), + Metrics: &metrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(3)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("2s")}, // still waiting on retrying vm-monitor downscaling + }) + clockTick().AssertEquals(duration("11.2s")) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(3), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("1.9s")}, // yep, still waiting on retrying vm-monitor downscaling + }) + + clock.Inc(duration("2s")).AssertEquals(duration("13.2s")) + + // Fourth pass: approve down to 1 CU + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("3.9s")}, // waiting for plugin request tick + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(3), + Target: resForCU(1), + }, + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(1)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("3.9s")}, // still waiting on plugin + }) + clockTick().AssertEquals(duration("13.3s")) + a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) + // Still waiting on plugin request tick, but we can make a NeonVM request to enact the + // downscaling right away ! + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("3.8s")}, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(3), + Target: resForCU(1), + }, + }) + a.Do(state.NeonVM().StartingRequest, time.Now(), resForCU(1)) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("3.8s")}, // yep, still waiting on the plugin + }) + clockTick().AssertEquals(duration("13.4s")) + a.Do(state.NeonVM().RequestSuccessful, time.Now()) + // Successfully downscaled, so now we should inform the plugin. Not waiting on any retries. + a.Call(updateActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(3)), + Target: resForCU(1), + Metrics: &metrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(1)) + a.Call(updateActions).Equals(core.ActionSet{ + // not waiting on anything! + }) + clockTick().AssertEquals(duration("13.5s")) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(1), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + // And now there's truly nothing left to do. Back to waiting on plugin request tick :) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("5.9s")}, + }) +} diff --git a/pkg/agent/core/testhelpers/assert.go b/pkg/agent/core/testhelpers/assert.go index f14c4c1a9..bb57a823e 100644 --- a/pkg/agent/core/testhelpers/assert.go +++ b/pkg/agent/core/testhelpers/assert.go @@ -26,7 +26,7 @@ func NewAssert(t *testing.T) Assert { t: t, storedWarnings: &[]string{}, tinfo: transactionInfo{ - expectedWarnings: nil, + expectedWarnings: []string{}, }, } } @@ -144,5 +144,5 @@ func (f PreparedFunctionCall) Equals(expected ...any) { if f.a.t.Failed() { f.a.t.FailNow() } - *f.a.storedWarnings = nil + *f.a.storedWarnings = []string{} } diff --git a/pkg/agent/core/testhelpers/construct.go b/pkg/agent/core/testhelpers/construct.go index 6eab8b58f..4c66f822e 100644 --- a/pkg/agent/core/testhelpers/construct.go +++ b/pkg/agent/core/testhelpers/construct.go @@ -2,6 +2,7 @@ package testhelpers import ( "fmt" + "testing" "k8s.io/apimachinery/pkg/api/resource" @@ -23,7 +24,7 @@ type InitialStateConfig struct { type InitialStateOpt struct { preCreate func(*InitialStateConfig) - postCreate func(*api.VmInfo) + postCreate func(InitialStateConfig, *api.VmInfo) } func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *core.State { @@ -54,7 +55,7 @@ func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *cor for _, o := range opts { if o.postCreate != nil { - o.postCreate(&vm) + o.postCreate(config, &vm) } } @@ -65,9 +66,52 @@ func WithStoredWarnings(warnings *[]string) InitialStateOpt { return InitialStateOpt{ postCreate: nil, preCreate: func(c *InitialStateConfig) { + warn := c.Core.Warn c.Core.Warn = func(format string, args ...any) { *warnings = append(*warnings, fmt.Sprintf(format, args...)) + warn(format, args...) } }, } } + +func WithTestingLogfWarnings(t *testing.T) InitialStateOpt { + return InitialStateOpt{ + postCreate: nil, + preCreate: func(c *InitialStateConfig) { + warn := c.Core.Warn + c.Core.Warn = func(format string, args ...any) { + t.Logf(format, args...) + warn(format, args...) + } + }, + } +} + +func WithMinMaxCU(minCU, maxCU uint16) InitialStateOpt { + return InitialStateOpt{ + preCreate: func(c *InitialStateConfig) { + c.MinCU = minCU + c.MaxCU = maxCU + }, + postCreate: nil, + } +} + +func WithInitialCU(cu uint16) InitialStateOpt { + return InitialStateOpt{ + preCreate: nil, + postCreate: func(c InitialStateConfig, vm *api.VmInfo) { + vm.SetUsing(c.ComputeUnit.Mul(cu)) + }, + } +} + +func WithConfigSetting(f func(*core.Config)) InitialStateOpt { + return InitialStateOpt{ + preCreate: func(c *InitialStateConfig) { + f(&c.Core) + }, + postCreate: nil, + } +} From 1d04b29e6faf2b580a0ecf3c23d72f5782577f71 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 1 Oct 2023 17:43:36 -0700 Subject: [PATCH 32/59] fix lints --- pkg/agent/core/state_test.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 39a8afbbb..5b6206c78 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -233,9 +233,7 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { clockTick := func() helpers.Elapsed { return clock.Inc(100 * time.Millisecond) } - resForCU := func(cu uint16) api.Resources { - return DefaultComputeUnit.Mul(cu) - } + resForCU := DefaultComputeUnit.Mul state := helpers.CreateInitialState( DefaultInitialStateConfig, @@ -473,9 +471,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { clockTick := func() helpers.Elapsed { return clock.Inc(clockTickDuration) } - resForCU := func(cu uint16) api.Resources { - return DefaultComputeUnit.Mul(cu) - } + resForCU := DefaultComputeUnit.Mul state := helpers.CreateInitialState( DefaultInitialStateConfig, From e0463d1255136d0af6765594ac9e3150a827a78c Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 1 Oct 2023 18:56:53 -0700 Subject: [PATCH 33/59] agent/executor: fix TOCTOU issues with new updateIfActionsUnchanged --- pkg/agent/executor/core.go | 43 +++++++++++++---- pkg/agent/executor/exec_monitor.go | 74 ++++++++++++------------------ pkg/agent/executor/exec_neonvm.go | 27 ++++------- pkg/agent/executor/exec_plugin.go | 32 +++++-------- 4 files changed, 87 insertions(+), 89 deletions(-) diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go index 8b60dcee7..93cf5512b 100644 --- a/pkg/agent/executor/core.go +++ b/pkg/agent/executor/core.go @@ -24,8 +24,10 @@ type ExecutorCore struct { stateLogger *zap.Logger - core *core.State - actions *timedActions + core *core.State + + actions *timedActions + lastActionsID timedActionsID updates *util.Broadcaster } @@ -38,11 +40,12 @@ type ClientSet struct { func NewExecutorCore(stateLogger *zap.Logger, vm api.VmInfo, config core.Config) *ExecutorCore { return &ExecutorCore{ - mu: sync.Mutex{}, - stateLogger: stateLogger, - core: core.NewState(vm, config), - actions: nil, // (*ExecutorCore).getActions() checks if this is nil - updates: util.NewBroadcaster(), + mu: sync.Mutex{}, + stateLogger: stateLogger, + core: core.NewState(vm, config), + actions: nil, // (*ExecutorCore).getActions() checks if this is nil + lastActionsID: -1, + updates: util.NewBroadcaster(), } } @@ -59,7 +62,10 @@ func (c *ExecutorCore) WithClients(clients ClientSet) ExecutorCoreWithClients { } } +type timedActionsID int64 + type timedActions struct { + id timedActionsID calculatedAt time.Time actions core.ActionSet } @@ -69,10 +75,13 @@ func (c *ExecutorCore) getActions() timedActions { defer c.mu.Unlock() if c.actions == nil { + id := c.lastActionsID + 1 + // NOTE: Even though we cache the actions generated using time.Now(), it's *generally* ok. now := time.Now() c.stateLogger.Info("Recalculating ActionSet", zap.Time("now", now), zap.Any("state", c.core.Dump())) - c.actions = &timedActions{calculatedAt: now, actions: c.core.NextActions(now)} + c.actions = &timedActions{id: id, calculatedAt: now, actions: c.core.NextActions(now)} + c.lastActionsID = id c.stateLogger.Info("New ActionSet", zap.Time("now", now), zap.Any("actions", c.actions.actions)) } @@ -90,6 +99,24 @@ func (c *ExecutorCore) update(with func(*core.State)) { with(c.core) } +// updateIfActionsUnchanged is like update, but if the actions have been changed, then the function +// is not called and this returns false. +// +// Otherwise, if the actions are up-to-date, then this is equivalent to c.update(with), and returns true. +func (c *ExecutorCore) updateIfActionsUnchanged(actions timedActions, with func(*core.State)) (updated bool) { + c.mu.Lock() + defer c.mu.Unlock() + + if actions.id != c.lastActionsID { + return false + } + + c.updates.Broadcast() + c.actions = nil + with(c.core) + return true +} + // may change in the future type StateDump = core.StateDump diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 3ddf3c3fe..34c4b59f1 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -38,51 +38,44 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge } } - last := c.getActions() for { - // Always receive an update if there is one. This helps with reliability (better guarantees - // about not missing updates) and means that the switch statements can be simpler. + // Wait until the state's changed, or we're done. select { + case <-ctx.Done(): + return case <-updates.Wait(): - updates.Awake() - last = c.getActions() - default: } - // Wait until we're supposed to make a request. + last := c.getActions() if last.actions.MonitorDownscale == nil { - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - } + continue // nothing to do; wait until the state changes. } + var startTime time.Time + var monitorIface MonitorHandle action := *last.actions.MonitorDownscale - monitor := c.clients.Monitor.GetHandle() - - var startTime time.Time - c.update(func(state *core.State) { + if updated := c.updateIfActionsUnchanged(last, func(state *core.State) { logger.Info("Starting vm-monitor downscale request", zap.Any("action", action)) startTime = time.Now() + monitorIface = c.clients.Monitor.GetHandle() state.Monitor().StartingDownscaleRequest(startTime, action.Target) - }) + }); !updated { + continue // state has changed, retry. + } var result *api.DownscaleResult var err error - if monitor != nil { - result, err = monitor.Downscale(ctx, ifaceLogger, action.Current, action.Target) + if monitorIface != nil { + result, err = monitorIface.Downscale(ctx, ifaceLogger, action.Current, action.Target) } else { err = errors.New("No currently active vm-monitor connection") } endTime := time.Now() c.update(func(state *core.State) { - unchanged := idUnchanged(monitor.ID()) + unchanged := idUnchanged(monitorIface.ID()) logFields := []zap.Field{ zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime)), @@ -129,49 +122,42 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger } } - last := c.getActions() for { - // Always receive an update if there is one. This helps with reliability (better guarantees - // about not missing updates) and means that the switch statements can be simpler. + // Wait until the state's changed, or we're done. select { + case <-ctx.Done(): + return case <-updates.Wait(): - updates.Awake() - last = c.getActions() - default: } - // Wait until we're supposed to make a request. + last := c.getActions() if last.actions.MonitorUpscale == nil { - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - } + continue // nothing to do; wait until the state changes. } + var startTime time.Time + var monitorIface MonitorHandle action := *last.actions.MonitorUpscale - monitor := c.clients.Monitor.GetHandle() - - var startTime time.Time - c.update(func(state *core.State) { + if updated := c.updateIfActionsUnchanged(last, func(state *core.State) { logger.Info("Starting vm-monitor upscale request", zap.Any("action", action)) startTime = time.Now() + monitorIface = c.clients.Monitor.GetHandle() state.Monitor().StartingUpscaleRequest(startTime, action.Target) - }) + }); !updated { + continue // state has changed, retry. + } var err error - if monitor != nil { - err = monitor.Upscale(ctx, ifaceLogger, action.Current, action.Target) + if monitorIface != nil { + err = monitorIface.Upscale(ctx, ifaceLogger, action.Current, action.Target) } else { err = errors.New("No currently active vm-monitor connection") } endTime := time.Now() c.update(func(state *core.State) { - unchanged := idUnchanged(monitor.ID()) + unchanged := idUnchanged(monitorIface.ID()) logFields := []zap.Field{ zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime)), diff --git a/pkg/agent/executor/exec_neonvm.go b/pkg/agent/executor/exec_neonvm.go index 2479b8f34..da19fa2f1 100644 --- a/pkg/agent/executor/exec_neonvm.go +++ b/pkg/agent/executor/exec_neonvm.go @@ -21,36 +21,29 @@ func (c *ExecutorCoreWithClients) DoNeonVMRequests(ctx context.Context, logger * ifaceLogger *zap.Logger = logger.Named("client") ) - last := c.getActions() for { - // Always receive an update if there is one. This helps with reliability (better guarantees - // about not missing updates) and means that the switch statements can be simpler. + // Wait until the state's changed, or we're done. select { + case <-ctx.Done(): + return case <-updates.Wait(): - updates.Awake() - last = c.getActions() - default: } - // Wait until we're supposed to make a request. + last := c.getActions() if last.actions.NeonVMRequest == nil { - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - } + continue // nothing to do; wait until the state changes. } + var startTime time.Time action := *last.actions.NeonVMRequest - var startTime time.Time - c.update(func(state *core.State) { + if updated := c.updateIfActionsUnchanged(last, func(state *core.State) { logger.Info("Starting NeonVM request", zap.Any("action", action)) startTime = time.Now() state.NeonVM().StartingRequest(startTime, action.Target) - }) + }); !updated { + continue // state has changed, retry. + } err := c.clients.NeonVM.Request(ctx, ifaceLogger, action.Current, action.Target) endTime := time.Now() diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index 735b9b235..82d3cd198 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -36,39 +36,31 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * } } - last := c.getActions() for { - // Always receive an update if there is one. This helps with reliability (better guarantees - // about not missing updates) and means that the switch statements can be simpler. + // Wait until the state's changed, or we're done. select { + case <-ctx.Done(): + return case <-updates.Wait(): - updates.Awake() - last = c.getActions() - default: } - // Wait until we're supposed to make a request. + last := c.getActions() if last.actions.PluginRequest == nil { - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - // NB: don't .Awake(); allow that to be handled at the top of the loop. - continue - } + continue // nothing to do; wait until the state changes. } + var startTime time.Time + var pluginIface PluginHandle action := *last.actions.PluginRequest - pluginIface := c.clients.Plugin.GetHandle() - - // update the state to indicate that the request is starting. - var startTime time.Time - c.update(func(state *core.State) { + if updated := c.updateIfActionsUnchanged(last, func(state *core.State) { logger.Info("Starting plugin request", zap.Any("action", action)) startTime = time.Now() + pluginIface = c.clients.Plugin.GetHandle() state.Plugin().StartingRequest(startTime, action.Target) - }) + }); !updated { + continue // state has changed, retry. + } var resp *api.PluginResponse var err error From 7c1aa8104366a48b2d215d57f7f0a6c1b9d80248 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 1 Oct 2023 19:01:38 -0700 Subject: [PATCH 34/59] revert Makefile changes --- Makefile | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 6c66f2e12..9e51582dc 100644 --- a/Makefile +++ b/Makefile @@ -96,12 +96,15 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust fmt: ## Run go fmt against code. go fmt ./... -.PHONY: lint -lint: ## Run golangci-lint against code. - golangci-lint run +.PHONY: vet +vet: ## Run go vet against code. + # `go vet` requires gcc + # ref https://github.com/golang/go/issues/56755 + CGO_ENABLED=0 go vet ./... + .PHONY: test -test: fmt envtest ## Run tests. +test: fmt vet envtest ## Run tests. # chmodding KUBEBUILDER_ASSETS dir to make it deletable by owner, # otherwise it fails with actions/checkout on self-hosted GitHub runners # ref: https://github.com/kubernetes-sigs/controller-runtime/pull/2245 @@ -113,7 +116,7 @@ test: fmt envtest ## Run tests. ##@ Build .PHONY: build -build: fmt bin/vm-builder bin/vm-builder-generic ## Build all neonvm binaries. +build: fmt vet bin/vm-builder bin/vm-builder-generic ## Build all neonvm binaries. go build -o bin/controller neonvm/main.go go build -o bin/vxlan-controller neonvm/tools/vxlan/controller/main.go go build -o bin/runner neonvm/runner/main.go @@ -127,7 +130,7 @@ bin/vm-builder-generic: ## Build vm-builder-generic binary. CGO_ENABLED=0 go build -o bin/vm-builder-generic neonvm/tools/vm-builder-generic/main.go .PHONY: run -run: fmt ## Run a controller from your host. +run: fmt vet ## Run a controller from your host. go run ./neonvm/main.go .PHONY: vm-monitor From 0c6133a434616d2f4ee9f1ecf3110fa58ef06167 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 1 Oct 2023 19:18:19 -0700 Subject: [PATCH 35/59] remove an unnecessary change --- pkg/agent/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/agent/config.go b/pkg/agent/config.go index 1a36e2d75..e6ade9189 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -12,12 +12,12 @@ import ( ) type Config struct { - DumpState *DumpStateConfig `json:"dumpState"` Scaling ScalingConfig `json:"scaling"` Metrics MetricsConfig `json:"metrics"` Scheduler SchedulerConfig `json:"scheduler"` Monitor MonitorConfig `json:"monitor"` Billing *billing.Config `json:"billing,omitempty"` + DumpState *DumpStateConfig `json:"dumpState"` } type MonitorConfig struct { From 4098f264094002915715792788bf2bbc2f0145b3 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 6 Oct 2023 14:34:39 -0700 Subject: [PATCH 36/59] add pkg/agent/ARCHITECTURE.md --- ARCHITECTURE.md | 2 + pkg/agent/ARCHITECTURE.md | 156 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 pkg/agent/ARCHITECTURE.md diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 337c5a2e5..e8e73d0a6 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -25,6 +25,8 @@ This document should be up-to-date. If it isn't, that's a mistake (open an issue This isn't the only architecture document. You may also want to look at: +* [`pkg/agent/ARCHITECTURE.md`](pkg/agent/ARCHITECTURE.md) — detail on the implementation of the + autoscaler-agent * [`pkg/plugin/ARCHITECTURE.md`](pkg/plugin/ARCHITECTURE.md) — detail on the implementation of the scheduler plugin * [`neondatabase/vm-monitor`](https://github.com/neondatabase/vm-monitor) - diff --git a/pkg/agent/ARCHITECTURE.md b/pkg/agent/ARCHITECTURE.md new file mode 100644 index 000000000..c0d8e18fb --- /dev/null +++ b/pkg/agent/ARCHITECTURE.md @@ -0,0 +1,156 @@ +# autoscaler-agent: Architecture + +The purpose of this document is to provide some context about _what_ the autoscaler-agent does, +_why_ it does that, and _how_ that's implemented internally. + +This document is expected to remain up-to-date. If it isn't, that's a mistake (open an issue!). + +**Table of contents:** + +* [What](#what) +* [Why](#why) +* [How](#how) + * [`agent.Runner`](#agentrunner) + * [Scaling, end-to-end](#scaling-end-to-end) + +## What + +The autoscaler-agent is a k8s node daemon that is responsible for: + +1. Fetching current load metrics from VMs +2. Communicating with vm-monitor running inside each VM to: + 1. Request permission for downscaling + 2. Notify about recent upscaling + 3. _Receive_ requests for "immediate upscaling" from the vm-monitor +3. Communicating with the scheduler plugin to: + 1. Request permission for upscaling + 2. Notify about recent downscaling +4. Modifying the NeonVM VirtualMachine object according to the desired scaling, based on load + metrics and requested upscaling, if any. +5. Generating and sending billing data representing the current CPU usage from each VM. + +So, in general, the idea is that the autoscaler-agent uses metrics from the VM to figure out what +the "desired" resources should be, and then: + +- if scaling up, contacts: scheduler plugin → NeonVM → vm-monitor +- if scaling down, contacts: vm-monitor → NeonVM → scheduler plugin + +See also: the root-level [`ARCHITECTURE.md`](../../ARCHITECTURE.md). + +## Why + +This section provides some historical context. + +Originally, the autoscaler-agent was a sidecar container per-VM that was directly responsible for +the above communications, and only acted on behalf of that single VM. + +We eventually moved the autoscaler-agent from a per-VM to per-node (i.e. k8s node), mostly wrapping +our own lifecycle handling around the existing architecture, using k8s watch events (via +`pkg/util/watch`) to get notified when VMs were started or stopped, in [#4]. + +[#4]: https://github.com/neondatabase/autoscaling/pull/4 + +Then, we needed to generate billing events based on the resource usage from these VMs. +Our options were to implement that as a new component, as part of the NeonVM controller, or within +the autoscaler-agent. We included it in the autoscaler-agent because we already had the objects +store (`pkg/util/watch.Store`) from the existing watch events, and already had it deployed. That was +implemented in `pkg/agent/billing/` in [#49]. + +[#49]: https://github.com/neondatabase/autoscaling/pull/49 + +--- + +Another significant change came when we switched to the vm-monitor from its predecessor, +vm-informant, which came with moving from a bi-directional REST API (with a server for each VM in +the autoscaler-agent) to a websocket connection from the autoscaler-agent to the vm-monitor. +Support for the vm-informant was removed with #506. + +[#506]: https://github.com/neondatabase/autoscaling/pull/506 + +## How + +This final section discusses the _actual_ architecture of the autoscaler-agent - what its internal +components are, and how they interact with each other. + +--- + +At a high level, these are the components of the autoscaler-agent: + +- Initial setup and main loop (`entrypoint.go`, `config.go`, and `args.go`), called by + `cmd/autoscaler-agent/main.go`. + - Receives events from the VM event watcher (`watch.go`) + - Updates the "global state" (`globalstate.go`) +- Per-VM communication and scaling logic (`runner.go`) + - Tracks the current scheduler to communicate with (`schedwatch/trackcurrent.go`) + - Communication with vm-monitor managed by (`dispatcher.go`) + - Pure scaling logic state machine implemented in `core/` + - "Execution" of the state machine's recommendations in `executor/` + - Implementations of the executor's interfaces in `execbridge.go` +- Billing events collection (`billing/billing.go`) and sending (`billing/send.go`), + using the VM watcher. +- Prometheus metrics on port 9100 (`prommetrics.go` and `billing/prommetrics.go`) +- Internal state dump server on port 10300 (`dumpstate.go`) + +### `agent.Runner` + +The most complex piece of the autoscaler-agent is the implementation of the per-VM communication and +scaling logic. + +At a high level, for the lifetime of a single VM's pod[^vm-pod], all the while it has autoscaling +enabled[^autoscaling-enabled] and is not currently migrating[^migrating], there's an `agent.Runner` +responsible for interacting both _with_ the VM, and _on behalf of_ the VM. + +Internally, the `Runner` spawns a handful of goroutines using the `spawnBackgroundWorker` method, so +that a panic in any individual worker causes the `Runner` (and all its threads) to restart, without +taking down the whole autoscaler-agent. Restarts must happen some minimum duration after the +`Runner` was originally started, to mitigate the impact of any crashloops. + +Threads created by & for the `Runner` are responsible for, among other things: + +- Maintaining/tracking connections to vm-monitor and scheduler plugin +- Individual executor threads for requests to vm-monitor, scheduler plugin, NeonVM k8s API + +[^vm-pod]: Reminder: The VM is just an object in Kubernetes. The NeonVM controller ensures that + there's a "runner pod" executing that VM. When there's a migration + +[^autoscaling-enabled]: Autoscaling is off by default, and requires the + `autoscaling.neon.tech/enabled` label on the VM object to be set to `"true"`. If a VM is + modified so that changes, then it's handled in the same way as if the VM started or stopped. + +[^migrating]: Scaling while migrating is not supported by QEMU, but in the future, we may still + maintain the `Runner` while the VM is migrating. + +### Scaling, end-to-end + +```mermaid +sequenceDiagram + + participant runner.go + participant execbridge.go + participant executor/ + participant core/ + + loop StateUpdates + runner.go->>executor/: State updated + executor/->>core/: State updated + executor/->>core/: Calculate new actions + executor/->>execbridge.go: Call interfaces to execute actions + execbridge.go->>runner.go: Make individaul requests + end +``` + +At a high level, we have an abstract state machine defined in package [`core/`](./core) that exposes +individual methods for updating the state and a single pure method to determine what to do: +`(*core.State).NextActions()`. + +This `State` object is not thread safe, and only _says_ what to do, without actually doing anything. +So all actual changes to the state go through package [`executor/`](./executor), which internally +provides locked access to the `State`, caching of the desired actions (because `NextActions` is +pure!), and notifications that the state was updated (and so, the actions may have changed). The +`executor` package also defines interfaces for each external system we may need to communicate with +(i.e. the scheduler plugin, NeonVM API, and vm-monitor), and exposes "executor functions" that +repeatedly listen for changes to the state and make the necessary requests, if there are any. + +One level up, `execbridge.go` gives the implementation of the `executor`'s request interfaces. These +interfaces _typically_ just act as the "bridge" between that API and the actual definitions of the +request functions, most of which are in `runner.go` and require access to the underlying `Runner`. From 7e69f74c74efdbc5460dc7970c7b8526902286d3 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 6 Oct 2023 15:36:21 -0700 Subject: [PATCH 37/59] add some comments --- pkg/agent/core/state.go | 7 +++++- pkg/agent/execbridge.go | 3 ++- pkg/agent/executor/core.go | 47 ++++++++++++++++++++++++++++---------- 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index b681d0ac3..e2d848b11 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -123,8 +123,13 @@ type monitorState struct { // request, or (b) a successful upscale notification. approved *api.Resources + // downscaleFailureAt, if not nil, stores the time at which a downscale request most recently + // failed (where "failed" means that some unexpected error occurred, not that it was merely + // denied). downscaleFailureAt *time.Time - upscaleFailureAt *time.Time + // upscaleFailureAt, if not nil, stores the time at which an upscale request most recently + // failed + upscaleFailureAt *time.Time } func (ms *monitorState) active() bool { diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go index bdc6a8d3e..a420df2b8 100644 --- a/pkg/agent/execbridge.go +++ b/pkg/agent/execbridge.go @@ -2,7 +2,8 @@ package agent // Implementations of the interfaces used by & defined in pkg/agent/executor // -// This file is essentially the bridge between 'runner.go' and 'executor/' +// This file is essentially the bridge between 'runner.go' and 'executor/', +// connecting the latter to the actual implementations in the former. import ( "context" diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go index 93cf5512b..0fdbde12c 100644 --- a/pkg/agent/executor/core.go +++ b/pkg/agent/executor/core.go @@ -3,8 +3,14 @@ package executor // Consumers of pkg/agent/core, implementing the "executors" for each type of action. These are // wrapped up into a single ExecutorCore type, which exposes some methods for the various executors. // -// The executors use various abstract interfaces for the scheudler / NeonVM / informant. The -// implementations of those interfaces are defiend in ifaces.go +// The executors use various abstract interfaces for the scheduler plugin / NeonVM / vm-monitor, and +// are defined in exec_*.go. The implementations of those interfaces are defined in execbridge.go. +// +// Each of the methods to modify ExecutorCore take 'withLock' as a callback that runs while the lock +// is held. In general, this is used for logging, so that the log output strictly matches the +// ordering of the changes to the underlying core.State, which should help with debugging. +// +// For more, see pkg/agent/ARCHITECTURE.md. import ( "sync" @@ -49,6 +55,7 @@ func NewExecutorCore(stateLogger *zap.Logger, vm api.VmInfo, config core.Config) } } +// ExecutorCoreWithClients wraps ExecutorCore with the various type ExecutorCoreWithClients struct { *ExecutorCore @@ -62,14 +69,20 @@ func (c *ExecutorCore) WithClients(clients ClientSet) ExecutorCoreWithClients { } } -type timedActionsID int64 - +// timedActions stores the core.ActionSet in ExecutorCore alongside a unique ID type timedActions struct { - id timedActionsID - calculatedAt time.Time - actions core.ActionSet + // id stores a unique ID associated with the cached actions, so that we can use optimistic + // locking to make sure we're never taking an action that is not the *current* recommendation, + // because otherwise guaranteeing correctness of core.State is really difficult. + // + // id is exclusively used by (*ExecutorCore).updateIfActionsUnchanged(). + id timedActionsID + actions core.ActionSet } +type timedActionsID int64 + +// fetch the currently cached actions, or recalculate if they've since been invalidated func (c *ExecutorCore) getActions() timedActions { c.mu.Lock() defer c.mu.Unlock() @@ -80,7 +93,7 @@ func (c *ExecutorCore) getActions() timedActions { // NOTE: Even though we cache the actions generated using time.Now(), it's *generally* ok. now := time.Now() c.stateLogger.Info("Recalculating ActionSet", zap.Time("now", now), zap.Any("state", c.core.Dump())) - c.actions = &timedActions{id: id, calculatedAt: now, actions: c.core.NextActions(now)} + c.actions = &timedActions{id: id, actions: c.core.NextActions(now)} c.lastActionsID = id c.stateLogger.Info("New ActionSet", zap.Time("now", now), zap.Any("actions", c.actions.actions)) } @@ -92,8 +105,6 @@ func (c *ExecutorCore) update(with func(*core.State)) { c.mu.Lock() defer c.mu.Unlock() - // NB: We broadcast the update *before* calling with() because this gets us nicer ordering - // guarantees in some cases. c.updates.Broadcast() c.actions = nil with(c.core) @@ -138,6 +149,8 @@ type ExecutorCoreUpdater struct { core *ExecutorCore } +// UpdateMetrics calls (*core.State).UpdateMetrics() on the inner core.State and runs withLock while +// holding the lock. func (c ExecutorCoreUpdater) UpdateMetrics(metrics api.Metrics, withLock func()) { c.core.update(func(state *core.State) { state.UpdateMetrics(metrics) @@ -145,6 +158,8 @@ func (c ExecutorCoreUpdater) UpdateMetrics(metrics api.Metrics, withLock func()) }) } +// UpdatedVM calls (*core.State).UpdatedVM() on the inner core.State and runs withLock while +// holding the lock. func (c ExecutorCoreUpdater) UpdatedVM(vm api.VmInfo, withLock func()) { c.core.update(func(state *core.State) { state.UpdatedVM(vm) @@ -152,7 +167,8 @@ func (c ExecutorCoreUpdater) UpdatedVM(vm api.VmInfo, withLock func()) { }) } -// NewScheduler updates the inner state, calling (*core.State).Plugin().NewScheduler() +// NewScheduler calls (*core.State).Plugin().NewScheduler() on the inner core.State and runs +// withLock while holding the lock. func (c ExecutorCoreUpdater) NewScheduler(withLock func()) { c.core.update(func(state *core.State) { state.Plugin().NewScheduler() @@ -160,7 +176,8 @@ func (c ExecutorCoreUpdater) NewScheduler(withLock func()) { }) } -// SchedulerGone updates the inner state, calling (*core.State).Plugin().SchedulerGone() +// SchedulerGone calls (*core.State).Plugin().SchedulerGone() on the inner core.State and runs +// withLock while holding the lock. func (c ExecutorCoreUpdater) SchedulerGone(withLock func()) { c.core.update(func(state *core.State) { state.Plugin().SchedulerGone() @@ -168,6 +185,8 @@ func (c ExecutorCoreUpdater) SchedulerGone(withLock func()) { }) } +// ResetMonitor calls (*core.State).Monitor().Reset() on the inner core.State and runs withLock +// while holding the lock. func (c ExecutorCoreUpdater) ResetMonitor(withLock func()) { c.core.update(func(state *core.State) { state.Monitor().Reset() @@ -175,6 +194,8 @@ func (c ExecutorCoreUpdater) ResetMonitor(withLock func()) { }) } +// UpscaleRequested calls (*core.State).Monitor().UpscaleRequested(...) on the inner core.State and +// runs withLock while holding the lock. func (c ExecutorCoreUpdater) UpscaleRequested(resources api.MoreResources, withLock func()) { c.core.update(func(state *core.State) { state.Monitor().UpscaleRequested(time.Now(), resources) @@ -182,6 +203,8 @@ func (c ExecutorCoreUpdater) UpscaleRequested(resources api.MoreResources, withL }) } +// MonitorActive calls (*core.State).Monitor().Active(...) on the inner core.State and runs withLock +// while holding the lock. func (c ExecutorCoreUpdater) MonitorActive(active bool, withLock func()) { c.core.update(func(state *core.State) { state.Monitor().Active(active) From 336e67b9aca2e53a2c49effd92389ac530fde1ad Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 6 Oct 2023 15:46:22 -0700 Subject: [PATCH 38/59] fix lints --- go.mod | 2 +- pkg/agent/ARCHITECTURE.md | 2 +- pkg/agent/core/state.go | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 4584164de..9ed852478 100644 --- a/go.mod +++ b/go.mod @@ -49,6 +49,7 @@ require ( github.com/digitalocean/go-qemu v0.0.0-20220826173844-d5f5e3ceed89 github.com/docker/docker v20.10.24+incompatible github.com/docker/libnetwork v0.8.0-dev.2.0.20210525090646-64b7a4574d14 + github.com/google/uuid v1.3.0 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 github.com/k8snetworkplumbingwg/whereabouts v0.6.1 github.com/kdomanski/iso9660 v0.3.3 @@ -118,7 +119,6 @@ require ( github.com/google/gnostic v0.6.9 // indirect github.com/google/go-cmp v0.5.9 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/uuid v1.3.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/imdario/mergo v0.3.12 // indirect diff --git a/pkg/agent/ARCHITECTURE.md b/pkg/agent/ARCHITECTURE.md index c0d8e18fb..ba2ba5f99 100644 --- a/pkg/agent/ARCHITECTURE.md +++ b/pkg/agent/ARCHITECTURE.md @@ -135,7 +135,7 @@ sequenceDiagram executor/->>core/: State updated executor/->>core/: Calculate new actions executor/->>execbridge.go: Call interfaces to execute actions - execbridge.go->>runner.go: Make individaul requests + execbridge.go->>runner.go: Make individual requests end ``` diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index e2d848b11..2edcd9e87 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -13,7 +13,7 @@ package core // // That said, there's still some tricky semantics we want to maintain. Internally, the // autoscaler-agent must be designed around eventual consistency, but the API we expose to the -// vm-monitor is strictly synchonous. As such, there's some subtle logic to make sure that we're +// vm-monitor is strictly synchronous. As such, there's some subtle logic to make sure that we're // not violating our own guarantees. // // --- @@ -116,7 +116,7 @@ type monitorState struct { // vm-monitor, along with the time at which it occurred. requestedUpscale *requestedUpscale - // deniedDownscale, if not nil, stores the result of the lastest denied /downscale request. + // deniedDownscale, if not nil, stores the result of the latest denied /downscale request. deniedDownscale *deniedDownscale // approved stores the most recent Resources associated with either (a) an accepted downscale @@ -743,7 +743,7 @@ func (s *State) minRequiredResourcesForDeniedDownscale(computeUnit api.Resources } // clampResources uses the directionality of the difference between s.vm.Using() and desired to -// clamp the desired resources with the uppper *or* lower bound +// clamp the desired resources with the upper *or* lower bound func (s *State) clampResources( current api.Resources, desired api.Resources, From 01b35f773c2a078abbe8e2aa74d2606a691a4d86 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 6 Oct 2023 16:52:00 -0700 Subject: [PATCH 39/59] more comments fixes --- pkg/agent/core/dumpstate.go | 4 ++-- pkg/agent/runner.go | 44 +++++-------------------------------- 2 files changed, 8 insertions(+), 40 deletions(-) diff --git a/pkg/agent/core/dumpstate.go b/pkg/agent/core/dumpstate.go index f4c521d40..8236b2ec9 100644 --- a/pkg/agent/core/dumpstate.go +++ b/pkg/agent/core/dumpstate.go @@ -1,6 +1,6 @@ package core -// Implementation of (*UpdateState).Dump() +// Implementation of (*State).Dump() import ( "time" @@ -27,7 +27,7 @@ type StateDump struct { Metrics *api.Metrics `json:"metrics"` } -// Dump produces a JSON-serializable representation of the State +// Dump produces a JSON-serializable copy of the State func (s *State) Dump() StateDump { return StateDump{ Config: s.config, diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index fcdb98103..e2d3ec541 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -5,43 +5,13 @@ package agent // The primary object in this file is the Runner. We create a new Runner for each VM, and the Runner // spawns a handful of long-running tasks that share state via the Runner object itself. // -// # General paradigm +// Each of these tasks is created by (*Runner).spawnBackgroundWorker(), which gracefully handles +// panics so that it terminates (and restarts) the Runner itself, instead of e.g. taking down the +// entire autoscaler-agent. // -// At a high level, we're trying to balance a few goals that are in tension with each other: +// The main entrypoint is (*Runner).Spawn(), which in turn calls (*Runner).Run(), etc. // -// 1. It should be OK to panic, if an error is truly unrecoverable -// 2. A single Runner's panic shouldn't bring down the entire autoscaler-agent¹ -// 3. We want to expose a State() method to view (almost) all internal state -// 4. Some high-level actions (e.g., call to vm-monitor; update VM to desired state) require -// that we have *at most* one such action running at a time. -// -// There are a number of possible solutions to this set of goals. All reasonable solutions require -// multiple goroutines. Here's what we do: -// -// * Runner acts as a global (per-VM) shared object with its fields guarded by Runner.lock. The -// lock is held for as short a duration as possible. -// * "Background" threads are responsible for relatively high-level tasks - like: -// * "track scheduler" -// * "get metrics" -// * "handle VM resources" - using metrics, calculates target resources level and contacts -// scheduler, vm-monitor, and NeonVM -- the "scaling" part of "autoscaling". -// * Each thread makes *synchronous* HTTP requests while holding the necessary lock to prevent any other -// thread from making HTTP requests to the same entity. For example: -// * All requests to NeonVM and the scheduler plugin are guarded by Runner.requestLock, which -// guarantees that we aren't simultaneously telling the scheduler one thing and changing it at -// the same time. -// * Each "background" thread is spawned by (*Runner).spawnBackgroundWorker(), which appropriately -// catches panics and signals the Runner so that the main thread from (*Runner).Run() cleanly -// shuts everything down. -// * Every significant lock has an associated "deadlock checker" background thread that panics if -// it takes too long to acquire the lock. -// -// spawnBackgroundWorker guarantees (1) and (2); Runner.lock makes (3) possible; and -// Runner.requestLock guarantees (4). -// -// --- -// ¹ If we allowed a single Runner to take down the whole autoscaler-agent, it would open up the -// possibility of crash-looping due to unusual cluster state (e.g., weird values in a NeonVM object) +// For more information, refer to ARCHITECTURE.md. import ( "bytes" @@ -386,9 +356,7 @@ func (r *Runner) spawnBackgroundWorker(ctx context.Context, logger *zap.Logger, // getMetricsLoop repeatedly attempts to fetch metrics from the VM // -// Every time metrics are successfully fetched, the value of r.lastMetrics is updated and newMetrics -// is signalled. The update to r.lastMetrics and signal on newMetrics occur without releasing r.lock -// in between. +// Every time metrics are successfully fetched, the value is recorded with newMetrics. func (r *Runner) getMetricsLoop( ctx context.Context, logger *zap.Logger, From 80b24d2807f21abc37df6371d2651413cd2b8497 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Fri, 6 Oct 2023 18:41:43 -0700 Subject: [PATCH 40/59] improve core.State logging situation --- pkg/agent/core/state.go | 55 +++++++++++++++++++------ pkg/agent/core/state_test.go | 13 ++++-- pkg/agent/core/testhelpers/construct.go | 23 +++++++---- pkg/agent/runner.go | 6 ++- 4 files changed, 70 insertions(+), 27 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 2edcd9e87..29cc63ee3 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -27,6 +27,8 @@ import ( "strings" "time" + "go.uber.org/zap" + vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" "github.com/neondatabase/autoscaling/pkg/api" @@ -54,9 +56,18 @@ type Config struct { // MonitorRetryWait gives the amount of time to wait to retry after a *failed* request. MonitorRetryWait time.Duration - // Warn provides an outlet for (*State).Next() to give warnings about conditions that are - // impeding its ability to execute. (e.g. "wanted to do X but couldn't because of Y") - Warn func(string, ...any) `json:"-"` + // Log provides an outlet for (*State).NextActions() to give informative messages or warnings + // about conditions that are impeding its ability to execute. + Log LogConfig `json:"-"` +} + +type LogConfig struct { + // Info, if not nil, will be called to log consistent informative information. + Info func(string, ...zap.Field) + // Warn, if not nil, will be called to log conditions that are impeding the ability to move the + // current resources to what's desired. + // A typical warning may be something like "wanted to do X but couldn't because of Y". + Warn func(string, ...zap.Field) } // State holds all of the necessary internal state for a VM in order to make scaling @@ -200,6 +211,22 @@ func NewState(vm api.VmInfo, config Config) *State { } } +func (s *State) info(msg string, fields ...zap.Field) { + if s.config.Log.Info != nil { + s.config.Log.Info(msg, fields...) + } +} + +func (s *State) warn(msg string, fields ...zap.Field) { + if s.config.Log.Warn != nil { + s.config.Log.Warn(msg, fields...) + } +} + +func (s *State) warnf(msg string, args ...any) { + s.warn(fmt.Sprintf(msg, args...)) +} + // NextActions is used to implement the state machine. It's a pure function that *just* indicates // what the executor should do. func (s *State) NextActions(now time.Time) ActionSet { @@ -278,7 +305,7 @@ func (s *State) calculatePluginAction( desiredResources api.Resources, ) (*ActionPluginRequest, *time.Duration) { logFailureReason := func(reason string) { - s.config.Warn("Wanted to make a request to the scheduler plugin, but %s", reason) + s.warnf("Wanted to make a request to the scheduler plugin, but %s", reason) } // additional resources we want to request OR previous downscaling we need to inform the plugin of @@ -406,7 +433,7 @@ func (s *State) calculateNeonVMAction( } if len(reqs) != 0 { - s.config.Warn("Wanted to make a request to NeonVM API, but there's already %s", strings.Join(reqs, " and ")) + s.warnf("Wanted to make a request to NeonVM API, but there's already %s", strings.Join(reqs, " and ")) } return nil @@ -453,7 +480,7 @@ func (s *State) calculateMonitorUpscaleAction( } if requestDescription != "" { - s.config.Warn("Wanted to send vm-monitor upscale request, but waiting on ongoing %s", requestDescription) + s.warnf("Wanted to send vm-monitor upscale request, but waiting on ongoing %s", requestDescription) } return nil, nil } @@ -462,7 +489,7 @@ func (s *State) calculateMonitorUpscaleAction( if s.monitor.upscaleFailureAt != nil { timeUntilFailureBackoffExpires := s.monitor.upscaleFailureAt.Add(s.config.MonitorRetryWait).Sub(now) if timeUntilFailureBackoffExpires > 0 { - s.config.Warn("Wanted to send vm-monitor upscale request, but failed too recently") + s.warn("Wanted to send vm-monitor upscale request, but failed too recently") return nil, &timeUntilFailureBackoffExpires } } @@ -482,7 +509,7 @@ func (s *State) calculateMonitorDownscaleAction( // can't do anything if we don't have an active connection to the vm-monitor if !s.monitor.active() { if desiredResources.HasFieldLessThan(s.vm.Using()) { - s.config.Warn("Wanted to send vm-monitor downscale request, but there's no active connection") + s.warn("Wanted to send vm-monitor downscale request, but there's no active connection") } return nil, nil } @@ -511,7 +538,7 @@ func (s *State) calculateMonitorDownscaleAction( // Can't make another request if there's already one ongoing (or if an upscaling request is // planned) if plannedUpscaleRequest { - s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on other planned upscale request") + s.warn("Wanted to send vm-monitor downscale request, but waiting on other planned upscale request") return nil, nil } else if s.monitor.ongoingRequest != nil { var requestDescription string @@ -522,7 +549,7 @@ func (s *State) calculateMonitorDownscaleAction( } if requestDescription != "" { - s.config.Warn("Wanted to send vm-monitor downscale request, but waiting on other ongoing %s", requestDescription) + s.warnf("Wanted to send vm-monitor downscale request, but waiting on other ongoing %s", requestDescription) } return nil, nil } @@ -531,7 +558,7 @@ func (s *State) calculateMonitorDownscaleAction( if s.monitor.downscaleFailureAt != nil { timeUntilFailureBackoffExpires := now.Sub(*s.monitor.downscaleFailureAt) if timeUntilFailureBackoffExpires > 0 { - s.config.Warn("Wanted to send vm-monitor downscale request but failed too recently") + s.warn("Wanted to send vm-monitor downscale request but failed too recently") return nil, &timeUntilFailureBackoffExpires } } @@ -587,7 +614,7 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( // if we don't know what the compute unit is, don't do anything. if s.plugin.computeUnit == nil { - s.config.Warn("Can't determine desired resources because compute unit hasn't been set yet") + s.warn("Can't determine desired resources because compute unit hasn't been set yet") return s.vm.Using(), nil } @@ -655,7 +682,7 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( if !result.HasFieldGreaterThan(s.monitor.deniedDownscale.requested) { // This can only happen if s.vm.Max() is less than goalResources, because otherwise this // would have been factored into goalCU, affecting goalResources. Hence, the warning. - s.config.Warn("Can't decrease desired resources to within VM maximum because of vm-monitor previously denied downscale request") + s.warn("Can't decrease desired resources to within VM maximum because of vm-monitor previously denied downscale request") } preMaxResult := result result = result.Max(s.minRequiredResourcesForDeniedDownscale(*s.plugin.computeUnit, *s.monitor.deniedDownscale)) @@ -686,6 +713,8 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( } } + s.info("Calculated desired resources", zap.Object("target", result)) + return result, calculateWaitTime } diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 5b6206c78..a6b79761b 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "go.uber.org/zap" "golang.org/x/exp/slices" "k8s.io/apimachinery/pkg/api/resource" @@ -113,8 +114,11 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { PluginDeniedRetryWait: time.Second, MonitorDeniedDownscaleCooldown: time.Second, MonitorRetryWait: time.Second, - Warn: func(format string, args ...any) { - warnings = append(warnings, fmt.Sprintf(format, args...)) + Log: core.LogConfig{ + Info: nil, + Warn: func(msg string, fields ...zap.Field) { + warnings = append(warnings, msg) + }, }, }, ) @@ -177,7 +181,10 @@ var DefaultInitialStateConfig = helpers.InitialStateConfig{ PluginDeniedRetryWait: 2 * time.Second, MonitorDeniedDownscaleCooldown: 5 * time.Second, MonitorRetryWait: 3 * time.Second, - Warn: func(string, ...any) {}, + Log: core.LogConfig{ + Info: nil, + Warn: nil, + }, }, } diff --git a/pkg/agent/core/testhelpers/construct.go b/pkg/agent/core/testhelpers/construct.go index 4c66f822e..f7315eb4a 100644 --- a/pkg/agent/core/testhelpers/construct.go +++ b/pkg/agent/core/testhelpers/construct.go @@ -1,9 +1,10 @@ package testhelpers import ( - "fmt" "testing" + "go.uber.org/zap" + "k8s.io/apimachinery/pkg/api/resource" vmapi "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" @@ -66,10 +67,12 @@ func WithStoredWarnings(warnings *[]string) InitialStateOpt { return InitialStateOpt{ postCreate: nil, preCreate: func(c *InitialStateConfig) { - warn := c.Core.Warn - c.Core.Warn = func(format string, args ...any) { - *warnings = append(*warnings, fmt.Sprintf(format, args...)) - warn(format, args...) + warn := c.Core.Log.Warn + c.Core.Log.Warn = func(msg string, fields ...zap.Field) { + *warnings = append(*warnings, msg) + if warn != nil { + warn(msg, fields...) + } } }, } @@ -79,10 +82,12 @@ func WithTestingLogfWarnings(t *testing.T) InitialStateOpt { return InitialStateOpt{ postCreate: nil, preCreate: func(c *InitialStateConfig) { - warn := c.Core.Warn - c.Core.Warn = func(format string, args ...any) { - t.Logf(format, args...) - warn(format, args...) + warn := c.Core.Log.Warn + c.Core.Log.Warn = func(msg string, fields ...zap.Field) { + t.Log(msg) + if warn != nil { + warn(msg, fields...) + } } }, } diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index e2d3ec541..c01fc8624 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -31,6 +31,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ktypes "k8s.io/apimachinery/pkg/types" + "github.com/neondatabase/autoscaling/pkg/agent/core" "github.com/neondatabase/autoscaling/pkg/agent/executor" "github.com/neondatabase/autoscaling/pkg/agent/schedwatch" "github.com/neondatabase/autoscaling/pkg/api" @@ -216,8 +217,9 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util PluginDeniedRetryWait: time.Second * time.Duration(r.global.config.Scheduler.RetryDeniedUpscaleSeconds), MonitorDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Monitor.RetryDeniedDownscaleSeconds), MonitorRetryWait: time.Second * time.Duration(r.global.config.Monitor.RetryFailedRequestSeconds), - Warn: func(msg string, args ...any) { - coreExecLogger.Warn(fmt.Sprintf(msg, args...)) + Log: core.LogConfig{ + Info: coreExecLogger.Info, + Warn: coreExecLogger.Warn, }, }) From 1b4a9cda9a1350aea53ef98c1efb4439a7b3a278 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 7 Oct 2023 15:05:35 -0700 Subject: [PATCH 41/59] more tests and fixes, add timeout to upscale requests --- deploy/agent/config_map.yaml | 1 + pkg/agent/config.go | 4 + pkg/agent/core/state.go | 65 +++++-- pkg/agent/core/state_test.go | 351 ++++++++++++++++++++++++++++++++++- pkg/agent/runner.go | 11 +- 5 files changed, 407 insertions(+), 25 deletions(-) diff --git a/deploy/agent/config_map.yaml b/deploy/agent/config_map.yaml index 40cea39fa..7f57eacd3 100644 --- a/deploy/agent/config_map.yaml +++ b/deploy/agent/config_map.yaml @@ -22,6 +22,7 @@ data: "unhealthyStartupGracePeriodSeconds": 20, "maxHealthCheckSequentialFailuresSeconds": 30, "retryDeniedDownscaleSeconds": 5, + "requestedUpscaleValidSeconds": 10, "retryFailedRequestSeconds": 3 }, "metrics": { diff --git a/pkg/agent/config.go b/pkg/agent/config.go index e6ade9189..d10fb0290 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -47,6 +47,9 @@ type MonitorConfig struct { // RetryDeniedDownscaleSeconds gives the duration, in seconds, that we must wait before retrying // a downscale request that was previously denied RetryDeniedDownscaleSeconds uint `json:"retryDeniedDownscaleSeconds"` + // RequestedUpscaleValidSeconds gives the duration, in seconds, that requested upscaling should + // be respected for, before allowing re-downscaling. + RequestedUpscaleValidSeconds uint `json:"requestedUpscaleValidSeconds"` } // DumpStateConfig configures the endpoint to dump all internal state @@ -152,6 +155,7 @@ func (c *Config) validate() error { erc.Whenf(ec, c.Monitor.MaxHealthCheckSequentialFailuresSeconds == 0, zeroTmpl, ".monitor.maxHealthCheckSequentialFailuresSeconds") erc.Whenf(ec, c.Monitor.RetryFailedRequestSeconds == 0, zeroTmpl, ".monitor.retryFailedRequestSeconds") erc.Whenf(ec, c.Monitor.RetryDeniedDownscaleSeconds == 0, zeroTmpl, ".monitor.retryDeniedDownscaleSeconds") + erc.Whenf(ec, c.Monitor.RequestedUpscaleValidSeconds == 0, zeroTmpl, ".monitor.requestedUpscaleValidSeconds") // add all errors if there are any: https://github.com/neondatabase/autoscaling/pull/195#discussion_r1170893494 ec.Add(c.Scaling.DefaultConfig.Validate()) erc.Whenf(ec, c.Scheduler.RequestPort == 0, zeroTmpl, ".scheduler.requestPort") diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index 29cc63ee3..ebafe769b 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -53,6 +53,10 @@ type Config struct { // downscale requests to the vm-monitor where the previous failed. MonitorDeniedDownscaleCooldown time.Duration + // MonitorRequestedUpscaleValidPeriod gives the duration for which requested upscaling from the + // vm-monitor must be respected. + MonitorRequestedUpscaleValidPeriod time.Duration + // MonitorRetryWait gives the amount of time to wait to retry after a *failed* request. MonitorRetryWait time.Duration @@ -640,18 +644,37 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( goalCU = util.Max(cpuGoalCU, memGoalCU) } - // Update goalCU based on any requested upscaling or downscaling that was previously denied - goalCU = util.Max(goalCU, s.requiredCUForRequestedUpscaling(*s.plugin.computeUnit)) + // Copy the initial value of the goal CU so that we can accurately track whether either + // requested upscaling or denied downscaling affected the outcome. + // Otherwise as written, it'd be possible to update goalCU from requested upscaling and + // incorrectly miss that denied downscaling could have had the same effect. + initialGoalCU := goalCU + + var requestedUpscalingAffectedResult bool + + // Update goalCU based on any explicitly requested upscaling + timeUntilRequestedUpscalingExpired := s.timeUntilRequestedUpscalingExpired(now) + requestedUpscalingInEffect := timeUntilRequestedUpscalingExpired > 0 + if requestedUpscalingInEffect { + reqCU := s.requiredCUForRequestedUpscaling(*s.plugin.computeUnit, *s.monitor.requestedUpscale) + if reqCU > initialGoalCU { + // FIXME: this isn't quite correct, because if initialGoalCU is already equal to the + // maximum goal CU we *could* have, this won't actually have an effect. + requestedUpscalingAffectedResult = true + goalCU = util.Max(goalCU, reqCU) + } + } var deniedDownscaleAffectedResult bool + // Update goalCU based on any previously denied downscaling timeUntilDeniedDownscaleExpired := s.timeUntilDeniedDownscaleExpired(now) deniedDownscaleInEffect := timeUntilDeniedDownscaleExpired > 0 if deniedDownscaleInEffect { reqCU := s.requiredCUForDeniedDownscale(*s.plugin.computeUnit, s.monitor.deniedDownscale.requested) - if reqCU > goalCU { + if reqCU > initialGoalCU { deniedDownscaleAffectedResult = true - goalCU = reqCU + goalCU = util.Max(goalCU, reqCU) } } @@ -706,8 +729,20 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( } calculateWaitTime := func(actions ActionSet) *time.Duration { + var waiting bool + waitTime := time.Duration(int64(1<<63 - 1)) // time.Duration is an int64. As an "unset" value, use the maximum. + if deniedDownscaleAffectedResult && actions.MonitorDownscale == nil && s.monitor.ongoingRequest == nil { - return &timeUntilDeniedDownscaleExpired + waitTime = util.Min(waitTime, timeUntilDeniedDownscaleExpired) + waiting = true + } + if requestedUpscalingAffectedResult { + waitTime = util.Min(waitTime, timeUntilRequestedUpscalingExpired) + waiting = true + } + + if waiting { + return &waitTime } else { return nil } @@ -718,23 +753,29 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( return result, calculateWaitTime } -// NB: we could just use s.plugin.computeUnit, but that's sometimes nil. This way, it's clear that -// it's the caller's responsibility to ensure that s.plugin.computeUnit != nil. -func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources) uint32 { - if s.monitor.requestedUpscale == nil { +func (s *State) timeUntilRequestedUpscalingExpired(now time.Time) time.Duration { + if s.monitor.requestedUpscale != nil { + return s.monitor.requestedUpscale.at.Add(s.config.MonitorRequestedUpscaleValidPeriod).Sub(now) + } else { return 0 } +} +// NB: we could just use s.plugin.computeUnit or s.monitor.requestedUpscale from inside the +// function, but those are sometimes nil. This way, it's clear that it's the caller's responsibility +// to ensure that the values are non-nil. +func (s *State) requiredCUForRequestedUpscaling(computeUnit api.Resources, requestedUpscale requestedUpscale) uint32 { var required uint32 - requested := s.monitor.requestedUpscale.requested + requested := requestedUpscale.requested + base := requestedUpscale.base // note: 1 + floor(x / M) gives the minimum integer value greater than x / M. if requested.Cpu { - required = util.Max(required, 1+uint32(s.vm.Cpu.Use/computeUnit.VCPU)) + required = util.Max(required, 1+uint32(base.VCPU/computeUnit.VCPU)) } if requested.Memory { - required = util.Max(required, 1+uint32(s.vm.Mem.Use/computeUnit.Mem)) + required = util.Max(required, 1+uint32(base.Mem/computeUnit.Mem)) } return required diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index a6b79761b..f239e29cc 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -110,10 +110,11 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { MemoryUsageFractionTarget: 0.5, }, // these don't really matter, because we're not using (*State).NextActions() - PluginRequestTick: time.Second, - PluginDeniedRetryWait: time.Second, - MonitorDeniedDownscaleCooldown: time.Second, - MonitorRetryWait: time.Second, + PluginRequestTick: time.Second, + PluginDeniedRetryWait: time.Second, + MonitorDeniedDownscaleCooldown: time.Second, + MonitorRequestedUpscaleValidPeriod: time.Second, + MonitorRetryWait: time.Second, Log: core.LogConfig{ Info: nil, Warn: func(msg string, fields ...zap.Field) { @@ -177,10 +178,11 @@ var DefaultInitialStateConfig = helpers.InitialStateConfig{ LoadAverageFractionTarget: 0.5, MemoryUsageFractionTarget: 0.5, }, - PluginRequestTick: 5 * time.Second, - PluginDeniedRetryWait: 2 * time.Second, - MonitorDeniedDownscaleCooldown: 5 * time.Second, - MonitorRetryWait: 3 * time.Second, + PluginRequestTick: 5 * time.Second, + PluginDeniedRetryWait: 2 * time.Second, + MonitorDeniedDownscaleCooldown: 5 * time.Second, + MonitorRequestedUpscaleValidPeriod: 10 * time.Second, + MonitorRetryWait: 3 * time.Second, Log: core.LogConfig{ Info: nil, Warn: nil, @@ -730,3 +732,336 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { Wait: &core.ActionWait{Duration: duration("5.9s")}, }) } + +// Checks that we scale up in a timely manner when the vm-monitor requests it, and don't request +// downscaling until the time expires. +func TestRequestedUpscale(t *testing.T) { + a := helpers.NewAssert(t) + clock := helpers.NewFakeClock(t) + clockTick := func() helpers.Elapsed { + return clock.Inc(100 * time.Millisecond) + } + resForCU := DefaultComputeUnit.Mul + + state := helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + helpers.WithConfigSetting(func(c *core.Config) { + c.MonitorRequestedUpscaleValidPeriod = duration("6s") // Override this for consistency + }), + ) + var actions core.ActionSet + updateActions := func() core.ActionSet { + actions = state.NextActions(clock.Now()) + return actions + } + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + // Send initial scheduler request: + doInitialPluginRequest(a, state, clock, duration("0.1s"), DefaultComputeUnit, nil, resForCU(1)) + + // Set metrics + clockTick() + lastMetrics := api.Metrics{ + LoadAverage1Min: 0.0, + LoadAverage5Min: 0.0, // unused + MemoryUsageBytes: 0.0, + } + a.Do(state.UpdateMetrics, lastMetrics) + + // Check we're not supposed to do anything + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, + }) + + // Have the vm-monitor request upscaling: + a.Do(state.Monitor().UpscaleRequested, clock.Now(), api.MoreResources{Cpu: false, Memory: true}) + // First need to check with the scheduler plugin to get approval for upscaling: + state.Debug(true) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("6s")}, // if nothing else happens, requested upscale expires. + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(1)), + Target: resForCU(2), + Metrics: &lastMetrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2)) + clockTick() + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("5.9s")}, // same waiting for requested upscale expiring + }) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(2), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + + // After approval from the scheduler plugin, now need to make NeonVM request: + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.9s")}, // plugin tick wait is earlier than requested upscale expiration + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(1), + Target: resForCU(2), + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(2)) + clockTick() + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + + // Finally, tell the vm-monitor that it got upscaled: + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, // still waiting on plugin tick + MonitorUpscale: &core.ActionMonitorUpscale{ + Current: resForCU(1), + Target: resForCU(2), + }, + }) + a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), resForCU(2)) + clockTick() + a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) + + // After everything, we should be waiting on both: + // (a) scheduler plugin tick (4.7s remaining), and + // (b) vm-monitor requested upscaling expiring (5.7s remaining) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.7s")}, + }) + + // Do the routine scheduler plugin request. Still waiting 1s for vm-monitor request expiration + clock.Inc(duration("4.7s")) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("1s")}, + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(2)), + Target: resForCU(2), + Metrics: &lastMetrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2)) + clockTick() + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("0.9s")}, // waiting for requested upscale expiring + }) + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(2), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + + // Still should just be waiting on vm-monitor upscale expiring + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("0.9s")}, + }) + clock.Inc(duration("0.9s")) + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4s")}, // now, waiting on plugin request tick + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(2), + Target: resForCU(1), + }, + }) +} + +// Checks that if we get new metrics partway through downscaling, then we pivot back to upscaling +// without further requests in furtherance of downscaling. +// +// For example, if we pivot during the NeonVM request to do the downscaling, then the request to to +// the scheduler plugin should never be made, because we decided against downscaling. +func TestDownscalePivotBack(t *testing.T) { + a := helpers.NewAssert(t) + var clock *helpers.FakeClock + + clockTickDuration := duration("0.1s") + clockTick := func() helpers.Elapsed { + return clock.Inc(clockTickDuration) + } + halfClockTick := func() helpers.Elapsed { + return clock.Inc(clockTickDuration / 2) + } + resForCU := DefaultComputeUnit.Mul + + var state *core.State + var actions core.ActionSet + updateActions := func() core.ActionSet { + actions = state.NextActions(clock.Now()) + return actions + } + + initialMetrics := api.Metrics{ + LoadAverage1Min: 0.0, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + } + newMetrics := api.Metrics{ + LoadAverage1Min: 0.3, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + } + + steps := []struct { + pre func(pluginWait *time.Duration, midRequest func()) + post func(pluginWait *time.Duration) + }{ + // vm-monitor requests: + { + pre: func(pluginWait *time.Duration, midRequest func()) { + t.Log(" > start vm-monitor downscale") + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: *pluginWait}, + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(2), + Target: resForCU(1), + }, + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(1)) + halfClockTick() + midRequest() + halfClockTick() + *pluginWait -= clockTickDuration + t.Log(" > finish vm-monitor downscale") + a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) + }, + post: func(pluginWait *time.Duration) { + t.Log(" > start vm-monitor upscale") + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: *pluginWait}, + MonitorUpscale: &core.ActionMonitorUpscale{ + Current: resForCU(1), + Target: resForCU(2), + }, + }) + a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), resForCU(2)) + clockTick() + *pluginWait -= clockTickDuration + t.Log(" > finish vm-monitor upscale") + a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) + }, + }, + // NeonVM requests + { + pre: func(pluginWait *time.Duration, midRequest func()) { + t.Log(" > start NeonVM downscale") + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: *pluginWait}, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(2), + Target: resForCU(1), + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(1)) + halfClockTick() + midRequest() + halfClockTick() + *pluginWait -= clockTickDuration + t.Log(" > finish NeonVM downscale") + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + }, + post: func(pluginWait *time.Duration) { + t.Log(" > start NeonVM upscale") + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: *pluginWait}, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(1), + Target: resForCU(2), + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(2)) + clockTick() + *pluginWait -= clockTickDuration + t.Log(" > finish NeonVM upscale") + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + }, + }, + // plugin requests + { + pre: func(pluginWait *time.Duration, midRequest func()) { + t.Log(" > start plugin downscale") + a.Call(updateActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(2)), + Target: resForCU(1), + Metrics: &initialMetrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(1)) + halfClockTick() + midRequest() + halfClockTick() + *pluginWait = duration("4.9s") // reset because we just made a request + t.Log(" > finish plugin downscale") + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(1), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + }, + post: func(pluginWait *time.Duration) { + t.Log(" > start plugin upscale") + a.Call(updateActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(1)), + Target: resForCU(2), + Metrics: &newMetrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2)) + clockTick() + *pluginWait = duration("4.9s") // reset because we just made a request + t.Log(" > finish plugin upscale") + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(2), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + }, + }, + } + + for i := 0; i < len(steps); i++ { + t.Logf("iter(%d)", i) + + // Initial setup + clock = helpers.NewFakeClock(t) + state = helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + helpers.WithMinMaxCU(1, 3), + helpers.WithInitialCU(2), + ) + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + doInitialPluginRequest(a, state, clock, duration("0.1s"), DefaultComputeUnit, nil, resForCU(2)) + + clockTick().AssertEquals(duration("0.2s")) + pluginWait := duration("4.8s") + + a.Do(state.UpdateMetrics, initialMetrics) + // double-check that we agree about the desired resources + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(1)) + + for j := 0; j <= i; j++ { + midRequest := func() {} + if j == i { + // at the midpoint, start backtracking by setting the metrics + midRequest = func() { + t.Log(" > > updating metrics mid-request") + a.Do(state.UpdateMetrics, newMetrics) + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(2)) + } + } + + steps[j].pre(&pluginWait, midRequest) + } + + for j := i; j >= 0; j-- { + steps[j].post(&pluginWait) + } + } +} diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index c01fc8624..fda9c7efa 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -212,11 +212,12 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util coreExecLogger := execLogger.Named("core") executorCore := executor.NewExecutorCore(coreExecLogger, getVmInfo(), executor.Config{ - DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, - PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), - PluginDeniedRetryWait: time.Second * time.Duration(r.global.config.Scheduler.RetryDeniedUpscaleSeconds), - MonitorDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Monitor.RetryDeniedDownscaleSeconds), - MonitorRetryWait: time.Second * time.Duration(r.global.config.Monitor.RetryFailedRequestSeconds), + DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, + PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), + PluginDeniedRetryWait: time.Second * time.Duration(r.global.config.Scheduler.RetryDeniedUpscaleSeconds), + MonitorDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Monitor.RetryDeniedDownscaleSeconds), + MonitorRequestedUpscaleValidPeriod: time.Second * time.Duration(r.global.config.Monitor.RequestedUpscaleValidSeconds), + MonitorRetryWait: time.Second * time.Duration(r.global.config.Monitor.RetryFailedRequestSeconds), Log: core.LogConfig{ Info: coreExecLogger.Info, Warn: coreExecLogger.Warn, From 04b09617cae88a9d1525fe1f26086c7a7a898ebb Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 7 Oct 2023 16:51:52 -0700 Subject: [PATCH 42/59] add 'downscale-then-upscale without plugin' test --- pkg/agent/core/state.go | 4 +- pkg/agent/core/state_test.go | 159 +++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 2 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index ebafe769b..bfe12b95b 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -337,7 +337,7 @@ func (s *State) calculatePluginAction( timeUntilNextRequestTick = s.config.PluginRequestTick - now.Sub(s.plugin.lastRequest.at) } - timeForRequest := timeUntilNextRequestTick <= 0 + timeForRequest := timeUntilNextRequestTick <= 0 && s.plugin.alive var timeUntilRetryBackoffExpires time.Duration requestPreviouslyDenied := !s.plugin.ongoingRequest && @@ -901,7 +901,7 @@ func (h PluginHandle) NewScheduler() { h.s.plugin = pluginState{ alive: true, ongoingRequest: false, - computeUnit: nil, + computeUnit: h.s.plugin.computeUnit, // Keep the previous scheduler's CU unless told otherwise lastRequest: nil, permit: h.s.plugin.permit, // Keep this; trust the previous scheduler. } diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index f239e29cc..5245c896c 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -1065,3 +1065,162 @@ func TestDownscalePivotBack(t *testing.T) { } } } + +// Checks that if we're disconnected from the scheduler plugin, we're able to downscale and +// re-upscale back to the last allocation before disconnection, but not beyond that. +// Also checks that when we reconnect, we first *inform* the scheduler plugin of the current +// resource allocation, and *then* send a follow-up request asking for additional resources. +func TestSchedulerDownscaleReupscale(t *testing.T) { + a := helpers.NewAssert(t) + clock := helpers.NewFakeClock(t) + clockTick := func() helpers.Elapsed { + return clock.Inc(100 * time.Millisecond) + } + resForCU := DefaultComputeUnit.Mul + + state := helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + helpers.WithMinMaxCU(1, 3), + helpers.WithInitialCU(2), + ) + var actions core.ActionSet + updateActions := func() core.ActionSet { + actions = state.NextActions(clock.Now()) + return actions + } + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + // Send initial scheduler request: + doInitialPluginRequest(a, state, clock, duration("0.1s"), DefaultComputeUnit, nil, resForCU(2)) + + clockTick() + + // Set metrics + a.Do(state.UpdateMetrics, api.Metrics{ + LoadAverage1Min: 0.3, // <- means desired scale = 2 + LoadAverage5Min: 0.0, // unused + MemoryUsageBytes: 0.0, + }) + // Check we're not supposed to do anything + a.Call(updateActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, + }) + + clockTick() + + // Record the scheduler as disconnected + state.Plugin().SchedulerGone() + // ... and check that there's nothing we can do: + state.Debug(true) + a.Call(updateActions).Equals(core.ActionSet{}) + + clockTick() + + // First: + // 1. Change the metrics so we want to downscale to 1 CU + // 2. Request downscaling from the vm-monitor + // 3. Do the NeonVM request + // 4. But *don't* do the request to the scheduler plugin (because it's not there) + a.Do(state.UpdateMetrics, api.Metrics{ + LoadAverage1Min: 0.0, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + }) + // Check that we agree about desired resources + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(1)) + // Do vm-monitor request: + a.Call(updateActions).Equals(core.ActionSet{ + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(2), + Target: resForCU(1), + }, + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(1)) + clockTick() + a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) + // Do the NeonVM request + a.Call(updateActions).Equals(core.ActionSet{ + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(2), + Target: resForCU(1), + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(1)) + clockTick() + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + + // Now the current state reflects the desired state, so there shouldn't be anything else we need + // to do or wait on. + a.Call(updateActions).Equals(core.ActionSet{}) + + // Next: + // 1. Change the metrics so we want to upscale to 3 CU + // 2. Can't do the scheduler plugin request (not active), but we previously got a permit for 2 CU + // 3. Do the NeonVM request for 2 CU (3 isn't approved) + // 4. Do vm-monitor upscale request for 2 CU + lastMetrics := api.Metrics{ + LoadAverage1Min: 0.5, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + } + a.Do(state.UpdateMetrics, lastMetrics) + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(3)) + // Do NeonVM request + a.Call(updateActions).Equals(core.ActionSet{ + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(1), + Target: resForCU(2), + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(2)) + clockTick() + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + // Do vm-monitor request + a.Call(updateActions).Equals(core.ActionSet{ + MonitorUpscale: &core.ActionMonitorUpscale{ + Current: resForCU(1), + Target: resForCU(2), + }, + }) + a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), resForCU(2)) + clockTick() + a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) + + // Nothing left to do in the meantime, because again, the current state reflects the desired + // state (at least, given that the we can't request anything from the scheduler plugin) + + // Finally: + // 1. Update the state so we can now communicate with the scheduler plugin + // 2. Make an initial request to the plugin to inform it of *current* resources + // 3. Make another request to the plugin to request up to 3 CU + // We could test after that too, but this should be enough. + a.Do(state.Plugin().NewScheduler) + // Initial request: informative about current usage + a.Call(updateActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(2)), + Target: resForCU(2), + Metrics: &lastMetrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2)) + clockTick() + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(2), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + // Follow-up request: request additional resources + a.Call(updateActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(2)), + Target: resForCU(3), + Metrics: &lastMetrics, + }, + }) +} From 24ed6ece95ea6483761e516898b9b89f78cd4e47 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 7 Oct 2023 17:27:39 -0700 Subject: [PATCH 43/59] s/updateActions/nextActions/g --- pkg/agent/core/state_test.go | 166 ++++++++++++++++------------------- 1 file changed, 78 insertions(+), 88 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 5245c896c..38dd0bcd8 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -249,10 +249,8 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { helpers.WithStoredWarnings(a.StoredWarnings()), helpers.WithTestingLogfWarnings(t), ) - var actions core.ActionSet - updateActions := func() core.ActionSet { - actions = state.NextActions(clock.Now()) - return actions + nextActions := func() core.ActionSet { + return state.NextActions(clock.Now()) } state.Plugin().NewScheduler() @@ -275,7 +273,7 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { // Now that the initial scheduler request is done, and we have metrics that indicate // scale-up would be a good idea, we should be contacting the scheduler to get approval. - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(1)), Target: resForCU(2), @@ -283,10 +281,10 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { }, }) // start the request: - a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2)) clockTick().AssertEquals(duration("0.3s")) // should have nothing more to do; waiting on plugin request to come back - a.Call(updateActions).Equals(core.ActionSet{}) + a.Call(nextActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ Permit: resForCU(2), Migrate: nil, @@ -294,7 +292,7 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { }) // Scheduler approval is done, now we should be making the request to NeonVM - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ // expected to make a scheduler request every 5s; it's been 100ms since the last one, so // if the NeonVM request didn't come back in time, we'd need to get woken up to start // the next scheduler request. @@ -305,16 +303,16 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { }, }) // start the request: - a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(2)) clockTick().AssertEquals(duration("0.4s")) // should have nothing more to do; waiting on NeonVM request to come back - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.8s")}, }) a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // NeonVM change is done, now we should finish by notifying the vm-monitor - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.8s")}, // same as previous, clock hasn't changed MonitorUpscale: &core.ActionMonitorUpscale{ Current: resForCU(1), @@ -322,17 +320,17 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { }, }) // start the request: - a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), actions.MonitorUpscale.Target) + a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), resForCU(2)) clockTick().AssertEquals(duration("0.5s")) // should have nothing more to do; waiting on vm-monitor request to come back - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.7s")}, }) a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) // And now, double-check that there's no sneaky follow-up actions before we change the // metrics - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.7s")}, // same as previous, clock hasn't changed }) @@ -352,39 +350,39 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { Equals(resForCU(1)) // First step in downscaling is getting approval from the vm-monitor: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.6s")}, MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(2), Target: resForCU(1), }, }) - a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), actions.MonitorDownscale.Target) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(1)) clockTick().AssertEquals(duration("0.7s")) // should have nothing more to do; waiting on vm-monitor request to come back - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.5s")}, }) a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) // After getting approval from the vm-monitor, we make the request to NeonVM to carry it out - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.5s")}, // same as previous, clock hasn't changed NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(2), Target: resForCU(1), }, }) - a.Do(state.NeonVM().StartingRequest, clock.Now(), actions.NeonVMRequest.Target) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(1)) clockTick().AssertEquals(duration("0.8s")) // should have nothing more to do; waiting on NeonVM request to come back - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.4s")}, }) a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // Request to NeonVM completed, it's time to inform the scheduler plugin: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(2)), Target: resForCU(1), @@ -392,10 +390,10 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { }, // shouldn't have anything to say to the other components }) - a.Do(state.Plugin().StartingRequest, clock.Now(), actions.PluginRequest.Target) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(1)) clockTick().AssertEquals(duration("0.9s")) // should have nothing more to do; waiting on plugin request to come back - a.Call(updateActions).Equals(core.ActionSet{}) + a.Call(nextActions).Equals(core.ActionSet{}) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ Permit: resForCU(1), Migrate: nil, @@ -403,7 +401,7 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) { }) // Finally, check there's no leftover actions: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.9s")}, // request that just finished was started 100ms ago }) } @@ -494,10 +492,8 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }), ) - var actions core.ActionSet - updateActions := func() core.ActionSet { - actions = state.NextActions(clock.Now()) - return actions + nextActions := func() core.ActionSet { + return state.NextActions(clock.Now()) } state.Plugin().NewScheduler() @@ -539,7 +535,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { clock.Elapsed().AssertEquals(duration("0.2s")) currentPluginWait := duration("5.8s") for cu := uint16(1); cu <= 5; cu += 1 { - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: currentPluginWait}, MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(6), @@ -548,7 +544,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }) // Do the request: a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(cu)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: currentPluginWait}, }) clockTick() @@ -556,7 +552,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { a.Do(state.Monitor().DownscaleRequestDenied, clock.Now()) } // At the end, we should be waiting to retry downscaling: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ // Taken from DefaultInitialStateConfig.Core.MonitorDeniedDownscaleCooldown Wait: &core.ActionWait{Duration: duration("4s")}, }) @@ -566,7 +562,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { // Second pass: Approve only down to 3 CU, then NeonVM & plugin requests. for cu := uint16(1); cu <= 3; cu += 1 { - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: currentPluginWait}, MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(6), @@ -574,7 +570,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }, }) a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(cu)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: currentPluginWait}, }) clockTick() @@ -588,7 +584,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { // At this point, waiting 3.9s for next attempt to downscale below 3 CU (last request was // successful, but the one before it wasn't), and 1s for plugin tick. // Also, because downscaling was approved, we should want to make a NeonVM request to do that. - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("1s")}, NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(6), @@ -597,14 +593,14 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }) // Make the request: a.Do(state.NeonVM().StartingRequest, time.Now(), resForCU(3)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("1s")}, }) clockTick().AssertEquals(duration("5.1s")) a.Do(state.NeonVM().RequestSuccessful, time.Now()) // Successfully scaled down, so we should now inform the plugin. But also, we'll want to retry // the downscale request to vm-monitor once the retry is up: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("3.8s")}, PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(6)), @@ -613,7 +609,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }, }) a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(3)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("3.8s")}, }) clockTick().AssertEquals(duration("5.2s")) @@ -623,7 +619,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { ComputeUnit: DefaultComputeUnit, }) // ... And *now* there's nothing left to do but wait until downscale wait expires: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("3.7s")}, }) @@ -633,7 +629,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { // Third pass: deny all requests. currentPluginWait = duration("2.2s") for cu := uint16(1); cu < 3; cu += 1 { - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: currentPluginWait}, MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(3), @@ -641,7 +637,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }, }) a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(cu)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: currentPluginWait}, }) clockTick() @@ -651,12 +647,12 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { clock.Elapsed().AssertEquals(duration("9.1s")) // At the end, we should be waiting to retry downscaling (but actually, the regular plugin // request is coming up sooner). - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("2s")}, }) // ... so, wait for that plugin request/response, and then wait to retry downscaling: clock.Inc(duration("2s")).AssertEquals(duration("11.1s")) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("2s")}, // still want to retry vm-monitor downscaling PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(3)), @@ -665,7 +661,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }, }) a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(3)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("2s")}, // still waiting on retrying vm-monitor downscaling }) clockTick().AssertEquals(duration("11.2s")) @@ -674,14 +670,14 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { Migrate: nil, ComputeUnit: DefaultComputeUnit, }) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("1.9s")}, // yep, still waiting on retrying vm-monitor downscaling }) clock.Inc(duration("2s")).AssertEquals(duration("13.2s")) // Fourth pass: approve down to 1 CU - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("3.9s")}, // waiting for plugin request tick MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(3), @@ -689,14 +685,14 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }, }) a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(1)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("3.9s")}, // still waiting on plugin }) clockTick().AssertEquals(duration("13.3s")) a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) // Still waiting on plugin request tick, but we can make a NeonVM request to enact the // downscaling right away ! - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("3.8s")}, NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(3), @@ -704,13 +700,13 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }, }) a.Do(state.NeonVM().StartingRequest, time.Now(), resForCU(1)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("3.8s")}, // yep, still waiting on the plugin }) clockTick().AssertEquals(duration("13.4s")) a.Do(state.NeonVM().RequestSuccessful, time.Now()) // Successfully downscaled, so now we should inform the plugin. Not waiting on any retries. - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(3)), Target: resForCU(1), @@ -718,7 +714,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { }, }) a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(1)) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ // not waiting on anything! }) clockTick().AssertEquals(duration("13.5s")) @@ -728,7 +724,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { ComputeUnit: DefaultComputeUnit, }) // And now there's truly nothing left to do. Back to waiting on plugin request tick :) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("5.9s")}, }) } @@ -750,10 +746,8 @@ func TestRequestedUpscale(t *testing.T) { c.MonitorRequestedUpscaleValidPeriod = duration("6s") // Override this for consistency }), ) - var actions core.ActionSet - updateActions := func() core.ActionSet { - actions = state.NextActions(clock.Now()) - return actions + nextActions := func() core.ActionSet { + return state.NextActions(clock.Now()) } state.Plugin().NewScheduler() @@ -772,7 +766,7 @@ func TestRequestedUpscale(t *testing.T) { a.Do(state.UpdateMetrics, lastMetrics) // Check we're not supposed to do anything - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.8s")}, }) @@ -780,7 +774,7 @@ func TestRequestedUpscale(t *testing.T) { a.Do(state.Monitor().UpscaleRequested, clock.Now(), api.MoreResources{Cpu: false, Memory: true}) // First need to check with the scheduler plugin to get approval for upscaling: state.Debug(true) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("6s")}, // if nothing else happens, requested upscale expires. PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(1)), @@ -790,7 +784,7 @@ func TestRequestedUpscale(t *testing.T) { }) a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2)) clockTick() - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("5.9s")}, // same waiting for requested upscale expiring }) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ @@ -800,7 +794,7 @@ func TestRequestedUpscale(t *testing.T) { }) // After approval from the scheduler plugin, now need to make NeonVM request: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.9s")}, // plugin tick wait is earlier than requested upscale expiration NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(1), @@ -812,7 +806,7 @@ func TestRequestedUpscale(t *testing.T) { a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // Finally, tell the vm-monitor that it got upscaled: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.8s")}, // still waiting on plugin tick MonitorUpscale: &core.ActionMonitorUpscale{ Current: resForCU(1), @@ -826,13 +820,13 @@ func TestRequestedUpscale(t *testing.T) { // After everything, we should be waiting on both: // (a) scheduler plugin tick (4.7s remaining), and // (b) vm-monitor requested upscaling expiring (5.7s remaining) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.7s")}, }) // Do the routine scheduler plugin request. Still waiting 1s for vm-monitor request expiration clock.Inc(duration("4.7s")) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("1s")}, PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(2)), @@ -842,7 +836,7 @@ func TestRequestedUpscale(t *testing.T) { }) a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2)) clockTick() - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("0.9s")}, // waiting for requested upscale expiring }) a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ @@ -852,11 +846,11 @@ func TestRequestedUpscale(t *testing.T) { }) // Still should just be waiting on vm-monitor upscale expiring - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("0.9s")}, }) clock.Inc(duration("0.9s")) - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4s")}, // now, waiting on plugin request tick MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(2), @@ -884,10 +878,8 @@ func TestDownscalePivotBack(t *testing.T) { resForCU := DefaultComputeUnit.Mul var state *core.State - var actions core.ActionSet - updateActions := func() core.ActionSet { - actions = state.NextActions(clock.Now()) - return actions + nextActions := func() core.ActionSet { + return state.NextActions(clock.Now()) } initialMetrics := api.Metrics{ @@ -909,7 +901,7 @@ func TestDownscalePivotBack(t *testing.T) { { pre: func(pluginWait *time.Duration, midRequest func()) { t.Log(" > start vm-monitor downscale") - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: *pluginWait}, MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(2), @@ -926,7 +918,7 @@ func TestDownscalePivotBack(t *testing.T) { }, post: func(pluginWait *time.Duration) { t.Log(" > start vm-monitor upscale") - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: *pluginWait}, MonitorUpscale: &core.ActionMonitorUpscale{ Current: resForCU(1), @@ -944,7 +936,7 @@ func TestDownscalePivotBack(t *testing.T) { { pre: func(pluginWait *time.Duration, midRequest func()) { t.Log(" > start NeonVM downscale") - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: *pluginWait}, NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(2), @@ -961,7 +953,7 @@ func TestDownscalePivotBack(t *testing.T) { }, post: func(pluginWait *time.Duration) { t.Log(" > start NeonVM upscale") - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: *pluginWait}, NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(1), @@ -979,7 +971,7 @@ func TestDownscalePivotBack(t *testing.T) { { pre: func(pluginWait *time.Duration, midRequest func()) { t.Log(" > start plugin downscale") - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(2)), Target: resForCU(1), @@ -1000,7 +992,7 @@ func TestDownscalePivotBack(t *testing.T) { }, post: func(pluginWait *time.Duration) { t.Log(" > start plugin upscale") - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(1)), Target: resForCU(2), @@ -1084,10 +1076,8 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { helpers.WithMinMaxCU(1, 3), helpers.WithInitialCU(2), ) - var actions core.ActionSet - updateActions := func() core.ActionSet { - actions = state.NextActions(clock.Now()) - return actions + nextActions := func() core.ActionSet { + return state.NextActions(clock.Now()) } state.Plugin().NewScheduler() @@ -1105,7 +1095,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { MemoryUsageBytes: 0.0, }) // Check we're not supposed to do anything - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("4.8s")}, }) @@ -1115,7 +1105,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { state.Plugin().SchedulerGone() // ... and check that there's nothing we can do: state.Debug(true) - a.Call(updateActions).Equals(core.ActionSet{}) + a.Call(nextActions).Equals(core.ActionSet{}) clockTick() @@ -1133,7 +1123,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { a.Call(getDesiredResources, state, clock.Now()). Equals(resForCU(1)) // Do vm-monitor request: - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ MonitorDownscale: &core.ActionMonitorDownscale{ Current: resForCU(2), Target: resForCU(1), @@ -1143,7 +1133,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { clockTick() a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) // Do the NeonVM request - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(2), Target: resForCU(1), @@ -1155,7 +1145,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { // Now the current state reflects the desired state, so there shouldn't be anything else we need // to do or wait on. - a.Call(updateActions).Equals(core.ActionSet{}) + a.Call(nextActions).Equals(core.ActionSet{}) // Next: // 1. Change the metrics so we want to upscale to 3 CU @@ -1171,7 +1161,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { a.Call(getDesiredResources, state, clock.Now()). Equals(resForCU(3)) // Do NeonVM request - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ NeonVMRequest: &core.ActionNeonVMRequest{ Current: resForCU(1), Target: resForCU(2), @@ -1181,7 +1171,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { clockTick() a.Do(state.NeonVM().RequestSuccessful, clock.Now()) // Do vm-monitor request - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ MonitorUpscale: &core.ActionMonitorUpscale{ Current: resForCU(1), Target: resForCU(2), @@ -1201,7 +1191,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { // We could test after that too, but this should be enough. a.Do(state.Plugin().NewScheduler) // Initial request: informative about current usage - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(2)), Target: resForCU(2), @@ -1216,7 +1206,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { ComputeUnit: DefaultComputeUnit, }) // Follow-up request: request additional resources - a.Call(updateActions).Equals(core.ActionSet{ + a.Call(nextActions).Equals(core.ActionSet{ PluginRequest: &core.ActionPluginRequest{ LastPermit: ptr(resForCU(2)), Target: resForCU(3), From e1ff8f3b0f0612b9d66382199cd7bc7788e784bf Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sat, 7 Oct 2023 19:12:55 -0700 Subject: [PATCH 44/59] testhelpers: allow separate VmInfo construction --- pkg/agent/core/state_test.go | 10 +- pkg/agent/core/testhelpers/construct.go | 137 ++++++++++++++---------- 2 files changed, 89 insertions(+), 58 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 38dd0bcd8..2f6c0559a 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -168,11 +168,13 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) { var DefaultComputeUnit = api.Resources{VCPU: 250, Mem: 1} var DefaultInitialStateConfig = helpers.InitialStateConfig{ - ComputeUnit: DefaultComputeUnit, - MemorySlotSize: resource.MustParse("1Gi"), + VM: helpers.InitialVmInfoConfig{ + ComputeUnit: DefaultComputeUnit, + MemorySlotSize: resource.MustParse("1Gi"), - MinCU: 1, - MaxCU: 4, + MinCU: 1, + MaxCU: 4, + }, Core: core.Config{ DefaultScalingConfig: api.ScalingConfig{ LoadAverageFractionTarget: 0.5, diff --git a/pkg/agent/core/testhelpers/construct.go b/pkg/agent/core/testhelpers/construct.go index f7315eb4a..1bbd5fed5 100644 --- a/pkg/agent/core/testhelpers/construct.go +++ b/pkg/agent/core/testhelpers/construct.go @@ -13,28 +13,53 @@ import ( "github.com/neondatabase/autoscaling/pkg/api" ) -type InitialStateConfig struct { +type InitialVmInfoConfig struct { ComputeUnit api.Resources MemorySlotSize resource.Quantity MinCU uint16 MaxCU uint16 +} + +type InitialStateConfig struct { + VM InitialVmInfoConfig Core core.Config } -type InitialStateOpt struct { - preCreate func(*InitialStateConfig) - postCreate func(InitialStateConfig, *api.VmInfo) +type InitialStateOpt interface { + modifyStateConfig(*core.Config) +} + +type InitialVmInfoOpt interface { + InitialStateOpt + + modifyVmInfoConfig(*InitialVmInfoConfig) + modifyVmInfoWithConfig(InitialVmInfoConfig, *api.VmInfo) } func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *core.State { + vmOpts := []InitialVmInfoOpt{} for _, o := range opts { - if o.preCreate != nil { - o.preCreate(&config) + if vo, ok := o.(InitialVmInfoOpt); ok { + vmOpts = append(vmOpts, vo) } } + vm := CreateInitialVmInfo(config.VM, vmOpts...) + + for _, o := range opts { + o.modifyStateConfig(&config.Core) + } + + return core.NewState(vm, config.Core) +} + +func CreateInitialVmInfo(config InitialVmInfoConfig, opts ...InitialVmInfoOpt) api.VmInfo { + for _, o := range opts { + o.modifyVmInfoConfig(&config) + } + vm := api.VmInfo{ Name: "test", Namespace: "test", @@ -55,68 +80,72 @@ func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *cor } for _, o := range opts { - if o.postCreate != nil { - o.postCreate(config, &vm) - } + o.modifyVmInfoWithConfig(config, &vm) } - return core.NewState(vm, config.Core) + return vm +} + +type coreConfigModifier func(*core.Config) +type vmInfoConfigModifier func(*InitialVmInfoConfig) +type vmInfoModifier func(InitialVmInfoConfig, *api.VmInfo) + +var ( + _ InitialVmInfoOpt = vmInfoConfigModifier(nil) + _ InitialVmInfoOpt = vmInfoModifier(nil) +) + +func (m coreConfigModifier) modifyStateConfig(c *core.Config) { (func(*core.Config))(m)(c) } +func (m vmInfoConfigModifier) modifyStateConfig(*core.Config) {} +func (m vmInfoModifier) modifyStateConfig(*core.Config) {} + +func (m vmInfoModifier) modifyVmInfoConfig(*InitialVmInfoConfig) {} +func (m vmInfoConfigModifier) modifyVmInfoConfig(c *InitialVmInfoConfig) { + (func(*InitialVmInfoConfig))(m)(c) +} + +func (m vmInfoConfigModifier) modifyVmInfoWithConfig(InitialVmInfoConfig, *api.VmInfo) {} +func (m vmInfoModifier) modifyVmInfoWithConfig(c InitialVmInfoConfig, vm *api.VmInfo) { + (func(InitialVmInfoConfig, *api.VmInfo))(m)(c, vm) +} + +func WithConfigSetting(f func(*core.Config)) InitialStateOpt { + return coreConfigModifier(f) } func WithStoredWarnings(warnings *[]string) InitialStateOpt { - return InitialStateOpt{ - postCreate: nil, - preCreate: func(c *InitialStateConfig) { - warn := c.Core.Log.Warn - c.Core.Log.Warn = func(msg string, fields ...zap.Field) { - *warnings = append(*warnings, msg) - if warn != nil { - warn(msg, fields...) - } + return WithConfigSetting(func(c *core.Config) { + warn := c.Log.Warn + c.Log.Warn = func(msg string, fields ...zap.Field) { + *warnings = append(*warnings, msg) + if warn != nil { + warn(msg, fields...) } - }, - } + } + }) } func WithTestingLogfWarnings(t *testing.T) InitialStateOpt { - return InitialStateOpt{ - postCreate: nil, - preCreate: func(c *InitialStateConfig) { - warn := c.Core.Log.Warn - c.Core.Log.Warn = func(msg string, fields ...zap.Field) { - t.Log(msg) - if warn != nil { - warn(msg, fields...) - } + return WithConfigSetting(func(c *core.Config) { + warn := c.Log.Warn + c.Log.Warn = func(msg string, fields ...zap.Field) { + t.Log(msg) + if warn != nil { + warn(msg, fields...) } - }, - } + } + }) } func WithMinMaxCU(minCU, maxCU uint16) InitialStateOpt { - return InitialStateOpt{ - preCreate: func(c *InitialStateConfig) { - c.MinCU = minCU - c.MaxCU = maxCU - }, - postCreate: nil, - } + return vmInfoConfigModifier(func(c *InitialVmInfoConfig) { + c.MinCU = minCU + c.MaxCU = maxCU + }) } func WithInitialCU(cu uint16) InitialStateOpt { - return InitialStateOpt{ - preCreate: nil, - postCreate: func(c InitialStateConfig, vm *api.VmInfo) { - vm.SetUsing(c.ComputeUnit.Mul(cu)) - }, - } -} - -func WithConfigSetting(f func(*core.Config)) InitialStateOpt { - return InitialStateOpt{ - preCreate: func(c *InitialStateConfig) { - f(&c.Core) - }, - postCreate: nil, - } + return vmInfoModifier(func(c InitialVmInfoConfig, vm *api.VmInfo) { + vm.SetUsing(c.ComputeUnit.Mul(cu)) + }) } From bf4db09e801c0aeaf01c8b6d99b9c24e0d2171ed Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 09:11:57 -0700 Subject: [PATCH 45/59] rename some testhelpers bits --- pkg/agent/core/state_test.go | 6 +++--- pkg/agent/core/testhelpers/construct.go | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 2f6c0559a..6115a0fbf 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -486,7 +486,7 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) { DefaultInitialStateConfig, helpers.WithStoredWarnings(a.StoredWarnings()), helpers.WithMinMaxCU(1, 8), - helpers.WithInitialCU(6), // NOTE: Start at 6 CU, so we're trying to scale down immediately. + helpers.WithCurrentCU(6), // NOTE: Start at 6 CU, so we're trying to scale down immediately. helpers.WithConfigSetting(func(c *core.Config) { // values close to the default, so request timing works out a little better. c.PluginRequestTick = duration("6s") @@ -1023,7 +1023,7 @@ func TestDownscalePivotBack(t *testing.T) { DefaultInitialStateConfig, helpers.WithStoredWarnings(a.StoredWarnings()), helpers.WithMinMaxCU(1, 3), - helpers.WithInitialCU(2), + helpers.WithCurrentCU(2), ) state.Plugin().NewScheduler() @@ -1076,7 +1076,7 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { DefaultInitialStateConfig, helpers.WithStoredWarnings(a.StoredWarnings()), helpers.WithMinMaxCU(1, 3), - helpers.WithInitialCU(2), + helpers.WithCurrentCU(2), ) nextActions := func() core.ActionSet { return state.NextActions(clock.Now()) diff --git a/pkg/agent/core/testhelpers/construct.go b/pkg/agent/core/testhelpers/construct.go index 1bbd5fed5..6ae73229a 100644 --- a/pkg/agent/core/testhelpers/construct.go +++ b/pkg/agent/core/testhelpers/construct.go @@ -31,7 +31,7 @@ type InitialStateOpt interface { modifyStateConfig(*core.Config) } -type InitialVmInfoOpt interface { +type VmInfoOpt interface { InitialStateOpt modifyVmInfoConfig(*InitialVmInfoConfig) @@ -39,14 +39,14 @@ type InitialVmInfoOpt interface { } func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *core.State { - vmOpts := []InitialVmInfoOpt{} + vmOpts := []VmInfoOpt{} for _, o := range opts { - if vo, ok := o.(InitialVmInfoOpt); ok { + if vo, ok := o.(VmInfoOpt); ok { vmOpts = append(vmOpts, vo) } } - vm := CreateInitialVmInfo(config.VM, vmOpts...) + vm := CreateVmInfo(config.VM, vmOpts...) for _, o := range opts { o.modifyStateConfig(&config.Core) @@ -55,7 +55,7 @@ func CreateInitialState(config InitialStateConfig, opts ...InitialStateOpt) *cor return core.NewState(vm, config.Core) } -func CreateInitialVmInfo(config InitialVmInfoConfig, opts ...InitialVmInfoOpt) api.VmInfo { +func CreateVmInfo(config InitialVmInfoConfig, opts ...VmInfoOpt) api.VmInfo { for _, o := range opts { o.modifyVmInfoConfig(&config) } @@ -91,8 +91,8 @@ type vmInfoConfigModifier func(*InitialVmInfoConfig) type vmInfoModifier func(InitialVmInfoConfig, *api.VmInfo) var ( - _ InitialVmInfoOpt = vmInfoConfigModifier(nil) - _ InitialVmInfoOpt = vmInfoModifier(nil) + _ VmInfoOpt = vmInfoConfigModifier(nil) + _ VmInfoOpt = vmInfoModifier(nil) ) func (m coreConfigModifier) modifyStateConfig(c *core.Config) { (func(*core.Config))(m)(c) } @@ -137,14 +137,14 @@ func WithTestingLogfWarnings(t *testing.T) InitialStateOpt { }) } -func WithMinMaxCU(minCU, maxCU uint16) InitialStateOpt { +func WithMinMaxCU(minCU, maxCU uint16) VmInfoOpt { return vmInfoConfigModifier(func(c *InitialVmInfoConfig) { c.MinCU = minCU c.MaxCU = maxCU }) } -func WithInitialCU(cu uint16) InitialStateOpt { +func WithCurrentCU(cu uint16) VmInfoOpt { return vmInfoModifier(func(c InitialVmInfoConfig, vm *api.VmInfo) { vm.SetUsing(c.ComputeUnit.Mul(cu)) }) From b6c4d4590decb3002788aeb728c56584df0a1b19 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 09:12:35 -0700 Subject: [PATCH 46/59] state_test: remove calls to (*State).Debug() --- pkg/agent/core/state_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 6115a0fbf..453aa3fc9 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -775,7 +775,6 @@ func TestRequestedUpscale(t *testing.T) { // Have the vm-monitor request upscaling: a.Do(state.Monitor().UpscaleRequested, clock.Now(), api.MoreResources{Cpu: false, Memory: true}) // First need to check with the scheduler plugin to get approval for upscaling: - state.Debug(true) a.Call(nextActions).Equals(core.ActionSet{ Wait: &core.ActionWait{Duration: duration("6s")}, // if nothing else happens, requested upscale expires. PluginRequest: &core.ActionPluginRequest{ @@ -1106,7 +1105,6 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { // Record the scheduler as disconnected state.Plugin().SchedulerGone() // ... and check that there's nothing we can do: - state.Debug(true) a.Call(nextActions).Equals(core.ActionSet{}) clockTick() From 227ea8377b5c504e2ca2405d9bf0fe85ccee82d4 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 09:12:59 -0700 Subject: [PATCH 47/59] state_test: add tests that VM bounds changes are respected --- pkg/agent/core/state_test.go | 192 +++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/pkg/agent/core/state_test.go b/pkg/agent/core/state_test.go index 453aa3fc9..308c53704 100644 --- a/pkg/agent/core/state_test.go +++ b/pkg/agent/core/state_test.go @@ -1214,3 +1214,195 @@ func TestSchedulerDownscaleReupscale(t *testing.T) { }, }) } + +// Checks that if the VM's min/max bounds change so that the maximum is below the current and +// desired usage, we try to downscale +func TestBoundsChangeRequiresDownsale(t *testing.T) { + a := helpers.NewAssert(t) + clock := helpers.NewFakeClock(t) + clockTick := func() helpers.Elapsed { + return clock.Inc(100 * time.Millisecond) + } + resForCU := DefaultComputeUnit.Mul + + state := helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + helpers.WithMinMaxCU(1, 3), + helpers.WithCurrentCU(2), + ) + nextActions := func() core.ActionSet { + return state.NextActions(clock.Now()) + } + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + // Send initial scheduler request: + doInitialPluginRequest(a, state, clock, duration("0.1s"), DefaultComputeUnit, nil, resForCU(2)) + + clockTick() + + // Set metrics so the desired resources are still 2 CU + metrics := api.Metrics{ + LoadAverage1Min: 0.3, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + } + a.Do(state.UpdateMetrics, metrics) + // Check that we agree about desired resources + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(2)) + // Check we've got nothing to do yet + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, + }) + + clockTick() + + // Update the VM to set min=max=1 CU + a.Do(state.UpdatedVM, helpers.CreateVmInfo( + DefaultInitialStateConfig.VM, + helpers.WithCurrentCU(2), + helpers.WithMinMaxCU(1, 1), + )) + + // We should be making a vm-monitor downscaling request + // TODO: In the future, we should have a "force-downscale" alternative so the vm-monitor doesn't + // get to deny the downscaling. + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.7s")}, + MonitorDownscale: &core.ActionMonitorDownscale{ + Current: resForCU(2), + Target: resForCU(1), + }, + }) + a.Do(state.Monitor().StartingDownscaleRequest, clock.Now(), resForCU(1)) + clockTick() + a.Do(state.Monitor().DownscaleRequestAllowed, clock.Now()) + // Do NeonVM request for that downscaling + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.6s")}, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(2), + Target: resForCU(1), + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(1)) + clockTick() + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + // Do plugin request for that downscaling: + a.Call(nextActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(2)), + Target: resForCU(1), + Metrics: &metrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(1)) + clockTick() + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(1), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + // And then, we shouldn't need to do anything else: + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.9s")}, + }) +} + +// Checks that if the VM's min/max bounds change so that the minimum is above the current and +// desired usage, we try to upscale +func TestBoundsChangeRequiresUpscale(t *testing.T) { + a := helpers.NewAssert(t) + clock := helpers.NewFakeClock(t) + clockTick := func() helpers.Elapsed { + return clock.Inc(100 * time.Millisecond) + } + resForCU := DefaultComputeUnit.Mul + + state := helpers.CreateInitialState( + DefaultInitialStateConfig, + helpers.WithStoredWarnings(a.StoredWarnings()), + helpers.WithMinMaxCU(1, 3), + helpers.WithCurrentCU(2), + ) + nextActions := func() core.ActionSet { + return state.NextActions(clock.Now()) + } + + state.Plugin().NewScheduler() + state.Monitor().Active(true) + + // Send initial scheduler request: + doInitialPluginRequest(a, state, clock, duration("0.1s"), DefaultComputeUnit, nil, resForCU(2)) + + clockTick() + + // Set metrics so the desired resources are still 2 CU + metrics := api.Metrics{ + LoadAverage1Min: 0.3, + LoadAverage5Min: 0.0, + MemoryUsageBytes: 0.0, + } + a.Do(state.UpdateMetrics, metrics) + // Check that we agree about desired resources + a.Call(getDesiredResources, state, clock.Now()). + Equals(resForCU(2)) + // Check we've got nothing to do yet + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, + }) + + clockTick() + + // Update the VM to set min=max=3 CU + a.Do(state.UpdatedVM, helpers.CreateVmInfo( + DefaultInitialStateConfig.VM, + helpers.WithCurrentCU(2), + helpers.WithMinMaxCU(3, 3), + )) + + // We should be making a plugin request to get upscaling: + a.Call(nextActions).Equals(core.ActionSet{ + PluginRequest: &core.ActionPluginRequest{ + LastPermit: ptr(resForCU(2)), + Target: resForCU(3), + Metrics: &metrics, + }, + }) + a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(3)) + clockTick() + a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{ + Permit: resForCU(3), + Migrate: nil, + ComputeUnit: DefaultComputeUnit, + }) + // Do NeonVM request for the upscaling + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.9s")}, + NeonVMRequest: &core.ActionNeonVMRequest{ + Current: resForCU(2), + Target: resForCU(3), + }, + }) + a.Do(state.NeonVM().StartingRequest, clock.Now(), resForCU(3)) + clockTick() + a.Do(state.NeonVM().RequestSuccessful, clock.Now()) + // Do vm-monitor upscale request + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.8s")}, + MonitorUpscale: &core.ActionMonitorUpscale{ + Current: resForCU(2), + Target: resForCU(3), + }, + }) + a.Do(state.Monitor().StartingUpscaleRequest, clock.Now(), resForCU(3)) + clockTick() + a.Do(state.Monitor().UpscaleRequestSuccessful, clock.Now()) + // And then, we shouldn't need to do anything else: + a.Call(nextActions).Equals(core.ActionSet{ + Wait: &core.ActionWait{Duration: duration("4.7s")}, + }) +} From 290750d2dde19f54b28426ec2e49f300d77d693a Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 10:09:03 -0700 Subject: [PATCH 48/59] add metric for number of calls to (*core.State).NextActions() --- pkg/agent/executor/core.go | 17 ++++++++++++++--- pkg/agent/prommetrics.go | 7 +++++++ pkg/agent/runner.go | 21 ++++++++++++--------- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go index 0fdbde12c..b5ae5bae3 100644 --- a/pkg/agent/executor/core.go +++ b/pkg/agent/executor/core.go @@ -23,7 +23,15 @@ import ( "github.com/neondatabase/autoscaling/pkg/util" ) -type Config = core.Config +type Config struct { + // OnNextActions is called each time the ExecutorCore calls (*core.State).NextActions() on the + // inner state object. + // + // In practice, this value is set to a callback that increments a metric. + OnNextActions func() + + Core core.Config +} type ExecutorCore struct { mu sync.Mutex @@ -34,6 +42,7 @@ type ExecutorCore struct { actions *timedActions lastActionsID timedActionsID + onNextActions func() updates *util.Broadcaster } @@ -44,13 +53,14 @@ type ClientSet struct { Monitor MonitorInterface } -func NewExecutorCore(stateLogger *zap.Logger, vm api.VmInfo, config core.Config) *ExecutorCore { +func NewExecutorCore(stateLogger *zap.Logger, vm api.VmInfo, config Config) *ExecutorCore { return &ExecutorCore{ mu: sync.Mutex{}, stateLogger: stateLogger, - core: core.NewState(vm, config), + core: core.NewState(vm, config.Core), actions: nil, // (*ExecutorCore).getActions() checks if this is nil lastActionsID: -1, + onNextActions: config.OnNextActions, updates: util.NewBroadcaster(), } } @@ -89,6 +99,7 @@ func (c *ExecutorCore) getActions() timedActions { if c.actions == nil { id := c.lastActionsID + 1 + c.onNextActions() // NOTE: Even though we cache the actions generated using time.Now(), it's *generally* ok. now := time.Now() diff --git a/pkg/agent/prommetrics.go b/pkg/agent/prommetrics.go index cf9fdd09c..cbcdca115 100644 --- a/pkg/agent/prommetrics.go +++ b/pkg/agent/prommetrics.go @@ -25,6 +25,7 @@ type PromMetrics struct { runnerThreadPanics prometheus.Counter runnerStarts prometheus.Counter runnerRestarts prometheus.Counter + runnerNextActions prometheus.Counter } type resourceChangePair struct { @@ -210,6 +211,12 @@ func makePrometheusParts(globalstate *agentState) (PromMetrics, *prometheus.Regi Help: "Number of existing per-VM Runners restarted due to failure", }, )), + runnerNextActions: util.RegisterMetric(reg, prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "autosclaing_agent_runner_next_actions_total", + Help: "Number of times (*core.State).NextActions() has been called", + }, + )), } // Some of of the metrics should have default keys set to zero. Otherwise, these won't be filled diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index 861478f66..175bfe4f7 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -212,15 +212,18 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util coreExecLogger := execLogger.Named("core") executorCore := executor.NewExecutorCore(coreExecLogger, getVmInfo(), executor.Config{ - DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, - PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), - PluginDeniedRetryWait: time.Second * time.Duration(r.global.config.Scheduler.RetryDeniedUpscaleSeconds), - MonitorDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Monitor.RetryDeniedDownscaleSeconds), - MonitorRequestedUpscaleValidPeriod: time.Second * time.Duration(r.global.config.Monitor.RequestedUpscaleValidSeconds), - MonitorRetryWait: time.Second * time.Duration(r.global.config.Monitor.RetryFailedRequestSeconds), - Log: core.LogConfig{ - Info: coreExecLogger.Info, - Warn: coreExecLogger.Warn, + OnNextActions: r.global.metrics.runnerNextActions.Inc, + Core: core.Config{ + DefaultScalingConfig: r.global.config.Scaling.DefaultConfig, + PluginRequestTick: time.Second * time.Duration(r.global.config.Scheduler.RequestAtLeastEverySeconds), + PluginDeniedRetryWait: time.Second * time.Duration(r.global.config.Scheduler.RetryDeniedUpscaleSeconds), + MonitorDeniedDownscaleCooldown: time.Second * time.Duration(r.global.config.Monitor.RetryDeniedDownscaleSeconds), + MonitorRequestedUpscaleValidPeriod: time.Second * time.Duration(r.global.config.Monitor.RequestedUpscaleValidSeconds), + MonitorRetryWait: time.Second * time.Duration(r.global.config.Monitor.RetryFailedRequestSeconds), + Log: core.LogConfig{ + Info: coreExecLogger.Info, + Warn: coreExecLogger.Warn, + }, }, }) From 07fd321a348c905507f09dbd2830969bc07c8ee6 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 12:13:51 -0700 Subject: [PATCH 49/59] update comments/docs --- pkg/agent/core/state.go | 16 ++++++++++++---- pkg/agent/core/testhelpers/clock.go | 4 ++-- pkg/util/broadcast.go | 22 +++++++++++++++++++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pkg/agent/core/state.go b/pkg/agent/core/state.go index bfe12b95b..f3fd73764 100644 --- a/pkg/agent/core/state.go +++ b/pkg/agent/core/state.go @@ -66,7 +66,8 @@ type Config struct { } type LogConfig struct { - // Info, if not nil, will be called to log consistent informative information. + // Info, if not nil, will be called to provide information during normal functioning. + // For example, we log the calculated desired resources on every call to NextActions. Info func(string, ...zap.Field) // Warn, if not nil, will be called to log conditions that are impeding the ability to move the // current resources to what's desired. @@ -748,7 +749,7 @@ func (s *State) DesiredResourcesFromMetricsOrRequestedUpscaling(now time.Time) ( } } - s.info("Calculated desired resources", zap.Object("target", result)) + s.info("Calculated desired resources", zap.Object("current", s.vm.Using()), zap.Object("target", result)) return result, calculateWaitTime } @@ -805,7 +806,6 @@ func (s *State) minRequiredResourcesForDeniedDownscale(computeUnit api.Resources // // phrasing it like this cleanly handles some subtle edge cases when denied.current isn't a // multiple of the compute unit. - // FIXME: add test return api.Resources{ VCPU: util.Min(denied.current.VCPU, computeUnit.VCPU*vmapi.MilliCPU(1+uint32(denied.requested.VCPU/computeUnit.VCPU))), Mem: util.Min(denied.current.Mem, computeUnit.Mem*(1+uint16(denied.requested.Mem/computeUnit.Mem))), @@ -864,7 +864,7 @@ func (s *State) pluginApprovedUpperBound() api.Resources { if s.plugin.permit != nil { return *s.plugin.permit } else { - return s.vm.Using() // FIXME: this isn't quite correct; this wouldn't allow down-then-upscale without the scheduler. + return s.vm.Using() } } @@ -872,6 +872,8 @@ func (s *State) pluginApprovedUpperBound() api.Resources { // PUBLIC FUNCTIONS TO UPDATE THE STATE // ////////////////////////////////////////// +// Debug sets s.debug = enabled. This method is exclusively meant to be used in tests, to make it +// easier to enable print debugging only for a single call to NextActions, via s.warn() or otherwise. func (s *State) Debug(enabled bool) { s.debug = enabled } @@ -880,6 +882,11 @@ func (s *State) UpdatedVM(vm api.VmInfo) { // FIXME: overriding this is required right now because we trust that a successful request to // NeonVM means the VM was already updated, which... isn't true, and otherwise we could run into // sync issues. + // A first-pass solution is possible by reading the values of VirtualMachine.Spec, but the + // "proper" solution would read from VirtualMachine.Status, which (at time of writing) isn't + // sound. For more, see: + // - https://github.com/neondatabase/autoscaling/pull/371#issuecomment-1752110131 + // - https://github.com/neondatabase/autoscaling/issues/462 vm.SetUsing(s.vm.Using()) s.vm = vm } @@ -1065,6 +1072,7 @@ func (h NeonVMHandle) RequestSuccessful(now time.Time) { // FIXME: This is actually incorrect; we shouldn't trust that the VM has already been updated // just because the request completed. It takes longer for the reconcile cycle(s) to make the // necessary changes. + // See the comments in (*State).UpdatedVM() for more info. h.s.vm.Cpu.Use = resources.VCPU h.s.vm.Mem.Use = resources.Mem diff --git a/pkg/agent/core/testhelpers/clock.go b/pkg/agent/core/testhelpers/clock.go index 14bed3a05..8ba827b89 100644 --- a/pkg/agent/core/testhelpers/clock.go +++ b/pkg/agent/core/testhelpers/clock.go @@ -8,8 +8,8 @@ import ( "github.com/stretchr/testify/require" ) -// FakeClock is a small facility that makes it easy to operation on duration since start with -// relative times. +// FakeClock is a small facility that makes it easy to operate on duration since start with +// relative times, rather than absolute times. type FakeClock struct { t *testing.T base time.Time diff --git a/pkg/util/broadcast.go b/pkg/util/broadcast.go index a11d8e03a..a6a5a7225 100644 --- a/pkg/util/broadcast.go +++ b/pkg/util/broadcast.go @@ -1,7 +1,7 @@ package util // A channel-based sync.Cond-like interface, with support for broadcast operations (but some -// additional restrictions) +// additional restrictions). Refer to the documentation of Wait for detailed usage. import ( "sync" @@ -28,6 +28,7 @@ type BroadcastReceiver struct { viewed uint64 } +// Broadcast sends a signal to all receivers func (b *Broadcaster) Broadcast() { b.mu.Lock() defer b.mu.Unlock() @@ -37,6 +38,10 @@ func (b *Broadcaster) Broadcast() { b.sent += 1 } +// NewReceiver creates a new BroadcastReceiver that will receive only future broadcasted events. +// +// It's generally not recommended to call (*BroadcastReceiver).Wait() on a single BroadcastReceiver +// from more than one thread at a time, although it *is* thread-safe. func (b *Broadcaster) NewReceiver() BroadcastReceiver { b.mu.Lock() defer b.mu.Unlock() @@ -53,6 +58,19 @@ var closedChannel = func() <-chan struct{} { return ch }() +// Wait returns a channel that will be closed once there has been an event broadcasted since +// the BroadcastReceiver was created, or the last call to Awake(). +// +// Typical usage of Wait will involve selecting on the channel returned and calling Awake +// immediately in the branch handling the event, for example: +// +// select { +// case <-ctx.Done(): +// return +// case <-receiver.Wait(): +// receiver.Awake() +// ... +// } func (r *BroadcastReceiver) Wait() <-chan struct{} { r.b.mu.Lock() defer r.b.mu.Unlock() @@ -64,6 +82,8 @@ func (r *BroadcastReceiver) Wait() <-chan struct{} { } } +// Awake marks the most recent broadcast event as received, so that the next call to Wait returns a +// channel that will only be closed once there's been a new event after this call to Awake. func (r *BroadcastReceiver) Awake() { r.b.mu.Lock() defer r.b.mu.Unlock() From 0098d344778bad50f2a51096ecb6cbb04499c657 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 12:14:58 -0700 Subject: [PATCH 50/59] testhelpers: panic if Call() is not resolved Ran into this a couple times while writing tests - it's easy to accidentally write Call() where you meant Do(), and then the function call would just never be run, which is hard to debug. --- pkg/agent/core/testhelpers/assert.go | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pkg/agent/core/testhelpers/assert.go b/pkg/agent/core/testhelpers/assert.go index bb57a823e..222d8e76d 100644 --- a/pkg/agent/core/testhelpers/assert.go +++ b/pkg/agent/core/testhelpers/assert.go @@ -10,8 +10,9 @@ import ( ) type Assert struct { - t *testing.T - storedWarnings *[]string + t *testing.T + storedWarnings *[]string + waitingOnPreparedCall *bool tinfo transactionInfo } @@ -23,8 +24,9 @@ type transactionInfo struct { // NewAssert creates a new Assert object wrapping the provided *testing.T func NewAssert(t *testing.T) Assert { return Assert{ - t: t, - storedWarnings: &[]string{}, + t: t, + storedWarnings: &[]string{}, + waitingOnPreparedCall: &[]bool{false}[0], // take address of false tinfo: transactionInfo{ expectedWarnings: []string{}, }, @@ -43,12 +45,6 @@ func (a Assert) WithWarnings(warnings ...string) Assert { return a } -// Nil returns a type-erased zero value of T, typically for use when a typed nil is necessary -func Nil[T any]() any { - var t T - return any(t) -} - // Do calls the function with the provided arguments, checking that no unexpected warnings were // generated // @@ -68,6 +64,10 @@ func (a Assert) NoError(f any, args ...any) { // // Variadic functions are not supported. func (a Assert) Call(f any, args ...any) PreparedFunctionCall { + if *a.waitingOnPreparedCall { + panic(errors.New("previous Call() constructed but not executed (must use `Do()`, `NoError()`, or `Call().Equals()`)")) + } + fv := reflect.ValueOf(f) fTy := fv.Type() if fTy.Kind() != reflect.Func { @@ -81,6 +81,8 @@ func (a Assert) Call(f any, args ...any) PreparedFunctionCall { argValues = append(argValues, reflect.ValueOf(a)) } + *a.waitingOnPreparedCall = true + return PreparedFunctionCall{a: a, f: fv, args: argValues} } @@ -94,6 +96,8 @@ type PreparedFunctionCall struct { // Equals calls the prepared function, checking that all the return values are equal to what's // expected, and that no unexpected warnings were generated. func (f PreparedFunctionCall) Equals(expected ...any) { + *f.a.waitingOnPreparedCall = false + fTy := f.f.Type() numOut := fTy.NumOut() From 57f71cb4bcfe9846cf3bd42602e5017115b5bdfe Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 12:34:26 -0700 Subject: [PATCH 51/59] add util.Broadcaster tests, fix usage in executor --- pkg/agent/executor/exec_monitor.go | 1 + pkg/agent/executor/exec_neonvm.go | 1 + pkg/agent/executor/exec_plugin.go | 1 + pkg/util/broadcast.go | 4 +- pkg/util/broadcast_test.go | 59 ++++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 pkg/util/broadcast_test.go diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 34c4b59f1..3047b28b0 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -44,6 +44,7 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge case <-ctx.Done(): return case <-updates.Wait(): + updates.Awake() } last := c.getActions() diff --git a/pkg/agent/executor/exec_neonvm.go b/pkg/agent/executor/exec_neonvm.go index da19fa2f1..5f431473c 100644 --- a/pkg/agent/executor/exec_neonvm.go +++ b/pkg/agent/executor/exec_neonvm.go @@ -27,6 +27,7 @@ func (c *ExecutorCoreWithClients) DoNeonVMRequests(ctx context.Context, logger * case <-ctx.Done(): return case <-updates.Wait(): + updates.Awake() } last := c.getActions() diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index 82d3cd198..3c13e1d43 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -42,6 +42,7 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * case <-ctx.Done(): return case <-updates.Wait(): + updates.Awake() } last := c.getActions() diff --git a/pkg/util/broadcast.go b/pkg/util/broadcast.go index a6a5a7225..75d6cea54 100644 --- a/pkg/util/broadcast.go +++ b/pkg/util/broadcast.go @@ -10,7 +10,7 @@ import ( func NewBroadcaster() *Broadcaster { return &Broadcaster{ mu: sync.Mutex{}, - ch: make(chan struct{}, 1), + ch: make(chan struct{}), sent: 0, } } @@ -34,7 +34,7 @@ func (b *Broadcaster) Broadcast() { defer b.mu.Unlock() close(b.ch) - b.ch = make(chan struct{}, 1) + b.ch = make(chan struct{}) b.sent += 1 } diff --git a/pkg/util/broadcast_test.go b/pkg/util/broadcast_test.go new file mode 100644 index 000000000..0e745e403 --- /dev/null +++ b/pkg/util/broadcast_test.go @@ -0,0 +1,59 @@ +package util_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/neondatabase/autoscaling/pkg/util" +) + +func closed(ch <-chan struct{}) bool { + select { + case <-ch: + return true + default: + return false + } +} + +func TestBroadcast(t *testing.T) { + broadcast := util.NewBroadcaster() + + receiver := broadcast.NewReceiver() + waitCh := receiver.Wait() + + // Not yet closed, no events yet + require.False(t, closed(waitCh)) + + // Send event, now should be closed, and continue to be closed on subsequent calls to Wait + broadcast.Broadcast() + require.True(t, closed(waitCh)) + require.True(t, closed(receiver.Wait())) + + // After we mark the event as received, we should go back to waiting + receiver.Awake() + + waitCh = receiver.Wait() + require.False(t, closed(waitCh)) + + // Multiple events should get collapsed into one: + broadcast.Broadcast() + broadcast.Broadcast() + require.True(t, closed(waitCh)) + receiver.Awake() + require.False(t, closed(receiver.Wait())) + + // If we first call Wait() after the unreceived event has already happened, then it should + // already be closed + broadcast.Broadcast() + require.True(t, closed(receiver.Wait())) + + // Creating a receiver after there's already been some events should behave like normal: + receiver = broadcast.NewReceiver() + require.False(t, closed(receiver.Wait())) + broadcast.Broadcast() + require.True(t, closed(receiver.Wait())) + receiver.Awake() + require.False(t, closed(receiver.Wait())) +} From 9b59a015944ef04ff51714679fa352aca8c11f47 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 12:40:04 -0700 Subject: [PATCH 52/59] refactor exec_sleeper to match other executor threads --- pkg/agent/executor/exec_sleeper.go | 59 ++++++++++++++---------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/pkg/agent/executor/exec_sleeper.go b/pkg/agent/executor/exec_sleeper.go index 4208491df..9fffa377d 100644 --- a/pkg/agent/executor/exec_sleeper.go +++ b/pkg/agent/executor/exec_sleeper.go @@ -17,51 +17,48 @@ func (c *ExecutorCore) DoSleeper(ctx context.Context, logger *zap.Logger) { timer := time.NewTimer(0) defer timer.Stop() - last := c.getActions() for { // Ensure the timer is cleared at the top of the loop if !timer.Stop() { <-timer.C } - // If NOT waiting for a particular duration: + // Wait until the state's changed or we're done + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + updates.Awake() + } + + last := c.getActions() if last.actions.Wait == nil { - select { - case <-ctx.Done(): - return - case <-updates.Wait(): - updates.Awake() - last = c.getActions() - } + continue // nothing to do; wait until the state changes } - // If YES waiting for a particular duration - if last.actions.Wait != nil { - // NB: It's possible for last.calculatedAt to be somewhat out of date. It's *probably* - // fine, because we'll be given a notification any time the state has changed, so we - // should wake from a select soon enough to get here - timer.Reset(last.actions.Wait.Duration) + // NB: It's possible for last.calculatedAt to be somewhat out of date. It's *probably* + // fine, because we'll be given a notification any time the state has changed, so we + // should wake from a select soon enough to get here + timer.Reset(last.actions.Wait.Duration) + select { + case <-ctx.Done(): + return + case <-updates.Wait(): + // Don't consume the event here. Rely on the event to remain at the top of the loop + continue + case <-timer.C: select { - case <-ctx.Done(): - return + // If there's also an update, then let that take preference: case <-updates.Wait(): + // Same thing as above - don't consume the event here. + continue + // Otherwise, trigger cache invalidation because we've waited for the requested + // amount of time: + default: + c.update(func(*core.State) {}) updates.Awake() - last = c.getActions() - case <-timer.C: - select { - // If there's also an update, then let that take preference: - case <-updates.Wait(): - updates.Awake() - last = c.getActions() - // Otherwise, trigger cache invalidation because we've waited for the requested - // amount of time: - default: - c.update(func(*core.State) {}) - updates.Awake() - last = c.getActions() - } } } } From 240325dc2e86ec9b9bc7211b4c043839f3ab7822 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 13:35:11 -0700 Subject: [PATCH 53/59] one more executor broadcaster usage fix --- pkg/agent/executor/exec_monitor.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 3047b28b0..2cb298a8d 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -129,6 +129,7 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger case <-ctx.Done(): return case <-updates.Wait(): + updates.Awake() } last := c.getActions() From eb06b09a00e2477e36cad55239288bb159ac723d Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 13:38:40 -0700 Subject: [PATCH 54/59] switch from IDs to generation numbers (+ schedulerGone) With IDs, it was theoretically possible for us to reconnect to the same scheduler instance after disconnecting, which would have the same IDs. We could have mitigated this by including the scheduler's resourceVersion in the ID, but IDs are a little hard to grok anyways and tend to require spooky action at a distance, so generation numbers it is! --- also forwards scheduler deletion in trackSchedulerLoop into SchedulerGone calls in the executor. --- go.mod | 2 +- pkg/agent/dispatcher.go | 10 ----- pkg/agent/execbridge.go | 62 +++++++++++++++++------------- pkg/agent/executor/exec_monitor.go | 28 +++++--------- pkg/agent/executor/exec_plugin.go | 15 +++----- pkg/agent/executor/generation.go | 29 ++++++++++++++ pkg/agent/globalstate.go | 2 +- pkg/agent/runner.go | 59 +++++++++++++++++++--------- 8 files changed, 123 insertions(+), 84 deletions(-) create mode 100644 pkg/agent/executor/generation.go diff --git a/go.mod b/go.mod index 9ed852478..4584164de 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,6 @@ require ( github.com/digitalocean/go-qemu v0.0.0-20220826173844-d5f5e3ceed89 github.com/docker/docker v20.10.24+incompatible github.com/docker/libnetwork v0.8.0-dev.2.0.20210525090646-64b7a4574d14 - github.com/google/uuid v1.3.0 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 github.com/k8snetworkplumbingwg/whereabouts v0.6.1 github.com/kdomanski/iso9660 v0.3.3 @@ -119,6 +118,7 @@ require ( github.com/google/gnostic v0.6.9 // indirect github.com/google/go-cmp v0.5.9 // indirect github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.3.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/imdario/mergo v0.3.12 // indirect diff --git a/pkg/agent/dispatcher.go b/pkg/agent/dispatcher.go index c903c7e6d..091ed7a27 100644 --- a/pkg/agent/dispatcher.go +++ b/pkg/agent/dispatcher.go @@ -12,7 +12,6 @@ import ( "sync/atomic" "time" - "github.com/google/uuid" "go.uber.org/zap" "nhooyr.io/websocket" "nhooyr.io/websocket/wsjson" @@ -40,8 +39,6 @@ type MonitorResult struct { // The Dispatcher is the main object managing the websocket connection to the // monitor. For more information on the protocol, see pkg/api/types.go type Dispatcher struct { - uniqueID string - // The underlying connection we are managing conn *websocket.Conn @@ -105,7 +102,6 @@ func NewDispatcher( } disp := &Dispatcher{ - uniqueID: uuid.NewString(), conn: conn, waiters: make(map[uint64]util.SignalSender[waiterResult]), runner: runner, @@ -260,12 +256,6 @@ func connectToMonitor( return c, &resp.Version, nil } -// UniqueID returns the unique ID assigned to this Dispatcher -// (it's a UUID) -func (disp *Dispatcher) UniqueID() string { - return disp.uniqueID -} - // ExitSignal returns a channel that is closed when the Dispatcher is no longer running func (disp *Dispatcher) ExitSignal() <-chan struct{} { return disp.exitSignal diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go index a420df2b8..d7d0afa09 100644 --- a/pkg/agent/execbridge.go +++ b/pkg/agent/execbridge.go @@ -26,17 +26,21 @@ var ( ///////////////////////////////////////////////////////////// type execPluginInterface struct { - runner *Runner - core *executor.ExecutorCore + runner *Runner + core *executor.ExecutorCore + generation *executor.StoredGenerationNumber } -func makePluginInterface(r *Runner, core *executor.ExecutorCore) *execPluginInterface { - return &execPluginInterface{runner: r, core: core} +func makePluginInterface( + r *Runner, + core *executor.ExecutorCore, + generation *executor.StoredGenerationNumber, +) *execPluginInterface { + return &execPluginInterface{runner: r, core: core, generation: generation} } -// EmptyID implements executor.PluginInterface -func (iface *execPluginInterface) EmptyID() string { - return "" +func (iface *execPluginInterface) CurrentGeneration() executor.GenerationNumber { + return iface.generation.Get() } // GetHandle implements executor.PluginInterface @@ -58,9 +62,9 @@ type execPluginHandle struct { scheduler *Scheduler } -// ID implements executor.PluginHandle -func (h *execPluginHandle) ID() string { - return string(h.scheduler.info.UID) +// Generation implements executor.PluginHandle +func (h *execPluginHandle) Generation() executor.GenerationNumber { + return h.scheduler.generation } // Request implements executor.PluginHandle @@ -113,39 +117,43 @@ func (iface *execNeonVMInterface) Request(ctx context.Context, logger *zap.Logge //////////////////////////////////////////////////// type execMonitorInterface struct { - runner *Runner - core *executor.ExecutorCore + runner *Runner + core *executor.ExecutorCore + generation *executor.StoredGenerationNumber } -func makeMonitorInterface(r *Runner, core *executor.ExecutorCore) *execMonitorInterface { - return &execMonitorInterface{runner: r, core: core} +func makeMonitorInterface( + r *Runner, + core *executor.ExecutorCore, + generation *executor.StoredGenerationNumber, +) *execMonitorInterface { + return &execMonitorInterface{runner: r, core: core, generation: generation} } -// EmptyID implements executor.MonitorInterface -func (iface *execMonitorInterface) EmptyID() string { - return "" +func (iface *execMonitorInterface) CurrentGeneration() executor.GenerationNumber { + return iface.generation.Get() } func (iface *execMonitorInterface) GetHandle() executor.MonitorHandle { - dispatcher := iface.runner.monitor.Load() + monitor := iface.runner.monitor.Load() - if dispatcher == nil || dispatcher.Exited() { + if monitor == nil || monitor.dispatcher.Exited() { return nil } return &execMonitorHandle{ - runner: iface.runner, - dispatcher: dispatcher, + runner: iface.runner, + monitor: monitor, } } type execMonitorHandle struct { - runner *Runner - dispatcher *Dispatcher + runner *Runner + monitor *monitorInfo } -func (h *execMonitorHandle) ID() string { - return h.dispatcher.UniqueID() +func (h *execMonitorHandle) Generation() executor.GenerationNumber { + return h.monitor.generation } func (h *execMonitorHandle) Downscale( @@ -162,7 +170,7 @@ func (h *execMonitorHandle) Downscale( h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorRequestedChange) - result, err := doMonitorDownscale(ctx, logger, h.dispatcher, target) + result, err := doMonitorDownscale(ctx, logger, h.monitor.dispatcher, target) if err != nil && result.Ok { h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorApprovedChange) @@ -180,7 +188,7 @@ func (h *execMonitorHandle) Upscale(ctx context.Context, logger *zap.Logger, cur h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorRequestedChange) - err := doMonitorUpscale(ctx, logger, h.dispatcher, target) + err := doMonitorUpscale(ctx, logger, h.monitor.dispatcher, target) if err != nil { h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorApprovedChange) diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 2cb298a8d..6c4f34db6 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -13,12 +13,12 @@ import ( ) type MonitorInterface interface { - EmptyID() string + CurrentGeneration() GenerationNumber GetHandle() MonitorHandle } type MonitorHandle interface { - ID() string + Generation() GenerationNumber Downscale(_ context.Context, _ *zap.Logger, current, target api.Resources) (*api.DownscaleResult, error) Upscale(_ context.Context, _ *zap.Logger, current, target api.Resources) error } @@ -29,13 +29,9 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge ifaceLogger *zap.Logger = logger.Named("client") ) - // meant to be called while holding c's lock - idUnchanged := func(current string) bool { - if h := c.clients.Monitor.GetHandle(); h != nil { - return current == h.ID() - } else { - return current == c.clients.Monitor.EmptyID() - } + // must be called while holding c's lock + generationUnchanged := func(since MonitorHandle) bool { + return since.Generation() == c.clients.Monitor.CurrentGeneration() } for { @@ -76,7 +72,7 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge endTime := time.Now() c.update(func(state *core.State) { - unchanged := idUnchanged(monitorIface.ID()) + unchanged := generationUnchanged(monitorIface) logFields := []zap.Field{ zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime)), @@ -114,13 +110,9 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger ifaceLogger *zap.Logger = logger.Named("client") ) - // meant to be called while holding c's lock - idUnchanged := func(current string) bool { - if h := c.clients.Monitor.GetHandle(); h != nil { - return current == h.ID() - } else { - return current == c.clients.Monitor.EmptyID() - } + // must be called while holding c's lock + generationUnchanged := func(since MonitorHandle) bool { + return since.Generation() == c.clients.Plugin.CurrentGeneration() } for { @@ -159,7 +151,7 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger endTime := time.Now() c.update(func(state *core.State) { - unchanged := idUnchanged(monitorIface.ID()) + unchanged := generationUnchanged(monitorIface) logFields := []zap.Field{ zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime)), diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index 3c13e1d43..7068fa27c 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -13,12 +13,12 @@ import ( ) type PluginInterface interface { - EmptyID() string + CurrentGeneration() GenerationNumber GetHandle() PluginHandle } type PluginHandle interface { - ID() string + Generation() GenerationNumber Request(_ context.Context, _ *zap.Logger, lastPermit *api.Resources, target api.Resources, _ *api.Metrics) (*api.PluginResponse, error) } @@ -28,12 +28,9 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * ifaceLogger *zap.Logger = logger.Named("client") ) - idUnchanged := func(current string) bool { - if h := c.clients.Plugin.GetHandle(); h != nil { - return current == h.ID() - } else { - return current == c.clients.Plugin.EmptyID() - } + // must be called while holding c's lock + generationUnchanged := func(since PluginHandle) bool { + return since.Generation() == c.clients.Plugin.CurrentGeneration() } for { @@ -74,7 +71,7 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * endTime := time.Now() c.update(func(state *core.State) { - unchanged := idUnchanged(pluginIface.ID()) + unchanged := generationUnchanged(pluginIface) logFields := []zap.Field{ zap.Any("action", action), zap.Duration("duration", endTime.Sub(startTime)), diff --git a/pkg/agent/executor/generation.go b/pkg/agent/executor/generation.go new file mode 100644 index 000000000..5a8d50738 --- /dev/null +++ b/pkg/agent/executor/generation.go @@ -0,0 +1,29 @@ +package executor + +// Generation numbers, for use by implementers of the various interfaces (i.e. pkg/agent/execbridge.go) + +import ( + "sync/atomic" +) + +type StoredGenerationNumber struct { + value atomic.Int64 +} + +type GenerationNumber struct { + value int64 +} + +func NewStoredGenerationNumber() *StoredGenerationNumber { + return &StoredGenerationNumber{value: atomic.Int64{}} +} + +// Inc increments the stored GenerationNumber, returning the new value +func (n *StoredGenerationNumber) Inc() GenerationNumber { + return GenerationNumber{value: n.value.Add(1)} +} + +// Get fetches the current value of the stored GenerationNumber +func (n *StoredGenerationNumber) Get() GenerationNumber { + return GenerationNumber{value: n.value.Load()} +} diff --git a/pkg/agent/globalstate.go b/pkg/agent/globalstate.go index 415a03ac5..b6ce9fb36 100644 --- a/pkg/agent/globalstate.go +++ b/pkg/agent/globalstate.go @@ -358,7 +358,7 @@ func (s *agentState) newRunner(vmInfo api.VmInfo, podName util.NamespacedName, p executorStateDump: nil, // set by (*Runner).Run scheduler: atomic.Pointer[Scheduler]{}, - monitor: atomic.Pointer[Dispatcher]{}, + monitor: atomic.Pointer[monitorInfo]{}, backgroundWorkerCount: atomic.Int64{}, backgroundPanic: make(chan error), diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index 175bfe4f7..c0ebc1235 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -79,7 +79,7 @@ type Runner struct { scheduler atomic.Pointer[Scheduler] // monitor, if non nil, stores the current Dispatcher in use for communicating with the // vm-monitor - monitor atomic.Pointer[Dispatcher] + monitor atomic.Pointer[monitorInfo] // backgroundWorkerCount tracks the current number of background workers. It is exclusively // updated by r.spawnBackgroundWorker @@ -96,6 +96,8 @@ type Scheduler struct { // info holds the immutable information we use to connect to and describe the scheduler info schedwatch.SchedulerInfo + + generation executor.GenerationNumber } // RunnerState is the serializable state of the Runner, extracted by its State method @@ -229,9 +231,12 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util r.executorStateDump = executorCore.StateDump - pluginIface := makePluginInterface(r, executorCore) + pluginGeneration := executor.NewStoredGenerationNumber() + monitorGeneration := executor.NewStoredGenerationNumber() + + pluginIface := makePluginInterface(r, executorCore, pluginGeneration) neonvmIface := makeNeonVMInterface(r) - monitorIface := makeMonitorInterface(r, executorCore) + monitorIface := makeMonitorInterface(r, executorCore, monitorGeneration) // "ecwc" stands for "ExecutorCoreWithClients" ecwc := executorCore.WithClients(executor.ClientSet{ @@ -263,9 +268,13 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util } }) r.spawnBackgroundWorker(ctx, logger, "track scheduler", func(c context.Context, l *zap.Logger) { - r.trackSchedulerLoop(c, l, scheduler, schedulerWatch, func(withLock func()) { + newScheduler := func(withLock func()) { ecwc.Updater().NewScheduler(withLock) - }) + } + schedulerGone := func(withLock func()) { + ecwc.Updater().SchedulerGone(withLock) + } + r.trackSchedulerLoop(c, l, scheduler, schedulerWatch, pluginGeneration, newScheduler, schedulerGone) }) r.spawnBackgroundWorker(ctx, logger, "get metrics", func(c context.Context, l *zap.Logger) { r.getMetricsLoop(c, l, func(metrics api.Metrics, withLock func()) { @@ -273,7 +282,7 @@ func (r *Runner) Run(ctx context.Context, logger *zap.Logger, vmInfoUpdated util }) }) r.spawnBackgroundWorker(ctx, logger.Named("vm-monitor"), "vm-monitor reconnection loop", func(c context.Context, l *zap.Logger) { - r.connectToMonitorLoop(c, l, monitorStateCallbacks{ + r.connectToMonitorLoop(c, l, monitorGeneration, monitorStateCallbacks{ reset: func(withLock func()) { ecwc.Updater().ResetMonitor(withLock) }, @@ -393,6 +402,11 @@ func (r *Runner) getMetricsLoop( } } +type monitorInfo struct { + generation executor.GenerationNumber + dispatcher *Dispatcher +} + type monitorStateCallbacks struct { reset func(withLock func()) upscaleRequested func(request api.MoreResources, withLock func()) @@ -403,6 +417,7 @@ type monitorStateCallbacks struct { func (r *Runner) connectToMonitorLoop( ctx context.Context, logger *zap.Logger, + generation *executor.StoredGenerationNumber, callbacks monitorStateCallbacks, ) { addr := fmt.Sprintf("ws://%s:%d/monitor", r.podIP, r.global.config.Monitor.ServerPort) @@ -417,6 +432,7 @@ func (r *Runner) connectToMonitorLoop( r.lock.Lock() defer r.lock.Unlock() callbacks.reset(func() { + generation.Inc() r.monitor.Store(nil) logger.Info("Reset previous vm-monitor connection") }) @@ -488,7 +504,10 @@ func (r *Runner) connectToMonitorLoop( r.lock.Lock() defer r.lock.Unlock() callbacks.setActive(true, func() { - r.monitor.Store(dispatcher) + r.monitor.Store(&monitorInfo{ + generation: generation.Inc(), + dispatcher: dispatcher, + }) logger.Info("Connected to vm-monitor") }) }() @@ -510,7 +529,9 @@ func (r *Runner) trackSchedulerLoop( logger *zap.Logger, init *schedwatch.SchedulerInfo, schedulerWatch schedwatch.SchedulerWatch, + generation *executor.StoredGenerationNumber, newScheduler func(withLock func()), + schedulerGone func(withLock func()), ) { // pre-declare a bunch of variables because we have some gotos here. var ( @@ -539,18 +560,17 @@ startScheduler: verb = "Updating" } - sched := &Scheduler{ - runner: r, - info: currentInfo, - } - func() { r.lock.Lock() defer r.lock.Unlock() newScheduler(func() { - r.scheduler.Store(sched) logger.Info(fmt.Sprintf("%s scheduler pod", verb), zap.Object("scheduler", currentInfo)) + r.scheduler.Store(&Scheduler{ + runner: r, + info: currentInfo, + generation: generation.Inc(), + }) }) }() } @@ -575,12 +595,15 @@ startScheduler: return false } - logger.Info( - "Scheduler pod was deleted. Aborting further communication", - zap.Object("scheduler", scheduler.info), - ) + schedulerGone(func() { + logger.Info( + "Scheduler pod was deleted. Aborting further communication", + zap.Object("scheduler", scheduler.info), + ) - r.scheduler.Store(nil) + generation.Inc() + r.scheduler.Store(nil) + }) return true }() From 59dc242045c5b37f75200623c0737cfebee8f775 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 15:14:46 -0700 Subject: [PATCH 55/59] fix typo --- pkg/agent/prommetrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/agent/prommetrics.go b/pkg/agent/prommetrics.go index cbcdca115..237f12c09 100644 --- a/pkg/agent/prommetrics.go +++ b/pkg/agent/prommetrics.go @@ -213,7 +213,7 @@ func makePrometheusParts(globalstate *agentState) (PromMetrics, *prometheus.Regi )), runnerNextActions: util.RegisterMetric(reg, prometheus.NewCounter( prometheus.CounterOpts{ - Name: "autosclaing_agent_runner_next_actions_total", + Name: "autoscaling_agent_runner_next_actions_total", Help: "Number of times (*core.State).NextActions() has been called", }, )), From e570cdd6c0284d59e8f1a66b566b56f187714ee9 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 15:33:30 -0700 Subject: [PATCH 56/59] executor: add warnings when skipping state update --- pkg/agent/executor/exec_monitor.go | 18 ++++++++++++++++++ pkg/agent/executor/exec_plugin.go | 8 ++++++++ 2 files changed, 26 insertions(+) diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 6c4f34db6..386cb1076 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -79,10 +79,16 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge zap.Bool("unchanged", unchanged), } + warnSkipBecauseChanged := func() { + logger.Warn("Skipping state update after vm-monitor downscale request because MonitorHandle changed") + } + if err != nil { logger.Error("vm-monitor downscale request failed", append(logFields, zap.Error(err))...) if unchanged { state.Monitor().DownscaleRequestFailed(endTime) + } else { + warnSkipBecauseChanged() } return } @@ -93,11 +99,15 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge logger.Warn("vm-monitor denied downscale", logFields...) if unchanged { state.Monitor().DownscaleRequestDenied(endTime) + } else { + warnSkipBecauseChanged() } } else { logger.Info("vm-monitor approved downscale", logFields...) if unchanged { state.Monitor().DownscaleRequestAllowed(endTime) + } else { + warnSkipBecauseChanged() } } }) @@ -158,10 +168,16 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger zap.Bool("unchanged", unchanged), } + warnSkipBecauseChanged := func() { + logger.Warn("Skipping state update after vm-monitor upscale request because MonitorHandle changed") + } + if err != nil { logger.Error("vm-monitor upscale request failed", append(logFields, zap.Error(err))...) if unchanged { state.Monitor().UpscaleRequestFailed(endTime) + } else { + warnSkipBecauseChanged() } return } @@ -169,6 +185,8 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger logger.Info("vm-monitor upscale request successful", logFields...) if unchanged { state.Monitor().UpscaleRequestSuccessful(endTime) + } else { + warnSkipBecauseChanged() } }) } diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index 7068fa27c..d7013840c 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -78,10 +78,16 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * zap.Bool("unchanged", unchanged), } + warnSkipBecauseChanged := func() { + logger.Warn("Skipping state update after plugin request because PluginHandle changed") + } + if err != nil { logger.Error("Plugin request failed", append(logFields, zap.Error(err))...) if unchanged { state.Plugin().RequestFailed(endTime) + } else { + warnSkipBecauseChanged() } } else { logFields = append(logFields, zap.Any("response", resp)) @@ -90,6 +96,8 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * if err := state.Plugin().RequestSuccessful(endTime, *resp); err != nil { logger.Error("Plugin response validation failed", append(logFields, zap.Error(err))...) } + } else { + warnSkipBecauseChanged() } } }) From f39cf32875070fe67c3875ea9557b2400127d431 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 15:44:31 -0700 Subject: [PATCH 57/59] executor: require plugin/monitor interfaces are non-nil during request Essentially, locked state updates should *always* guarantee that a handle to the plugin or vm-monitor (via reading the field of the Runner) will be consistent with the current state of the ExecutorCore. --- pkg/agent/executor/exec_monitor.go | 28 ++++++++++++++-------------- pkg/agent/executor/exec_plugin.go | 15 +++++++-------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 386cb1076..8d96cd2c8 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -57,18 +57,17 @@ func (c *ExecutorCoreWithClients) DoMonitorDownscales(ctx context.Context, logge startTime = time.Now() monitorIface = c.clients.Monitor.GetHandle() state.Monitor().StartingDownscaleRequest(startTime, action.Target) + + if monitorIface == nil { + panic(errors.New( + "core.State asked for vm-monitor downscale request, but Monitor.GetHandle() is nil, so it should be disabled", + )) + } }); !updated { continue // state has changed, retry. } - var result *api.DownscaleResult - var err error - - if monitorIface != nil { - result, err = monitorIface.Downscale(ctx, ifaceLogger, action.Current, action.Target) - } else { - err = errors.New("No currently active vm-monitor connection") - } + result, err := monitorIface.Downscale(ctx, ifaceLogger, action.Current, action.Target) endTime := time.Now() c.update(func(state *core.State) { @@ -148,16 +147,17 @@ func (c *ExecutorCoreWithClients) DoMonitorUpscales(ctx context.Context, logger startTime = time.Now() monitorIface = c.clients.Monitor.GetHandle() state.Monitor().StartingUpscaleRequest(startTime, action.Target) + + if monitorIface == nil { + panic(errors.New( + "core.State asked for vm-monitor upscale request, but Monitor.GetHandle() is nil, so it should be disabled", + )) + } }); !updated { continue // state has changed, retry. } - var err error - if monitorIface != nil { - err = monitorIface.Upscale(ctx, ifaceLogger, action.Current, action.Target) - } else { - err = errors.New("No currently active vm-monitor connection") - } + err := monitorIface.Upscale(ctx, ifaceLogger, action.Current, action.Target) endTime := time.Now() c.update(func(state *core.State) { diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index d7013840c..2f55a4712 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -56,18 +56,17 @@ func (c *ExecutorCoreWithClients) DoPluginRequests(ctx context.Context, logger * startTime = time.Now() pluginIface = c.clients.Plugin.GetHandle() state.Plugin().StartingRequest(startTime, action.Target) + + if pluginIface == nil { + panic(errors.New( + "core.State asked for plugin request, but Plugin.GetHandle() is nil, so it should be disabled", + )) + } }); !updated { continue // state has changed, retry. } - var resp *api.PluginResponse - var err error - - if pluginIface != nil { - resp, err = pluginIface.Request(ctx, ifaceLogger, action.LastPermit, action.Target, action.Metrics) - } else { - err = errors.New("No currently enabled plugin handle") - } + resp, err := pluginIface.Request(ctx, ifaceLogger, action.LastPermit, action.Target, action.Metrics) endTime := time.Now() c.update(func(state *core.State) { From 3592ff1baa322616f30f47ad70e5d6bd79a585ae Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 16:03:06 -0700 Subject: [PATCH 58/59] remove unnecessary atomics for Runner.{scheduler,monitor} Added comments re: synchronization explain why this is ok. tl;dr: it requires locking Runner.lock and executor's lock, which means that reading it with either is ok. --- pkg/agent/execbridge.go | 15 ++++++++++--- pkg/agent/executor/exec_monitor.go | 2 ++ pkg/agent/executor/exec_plugin.go | 2 ++ pkg/agent/globalstate.go | 4 ++-- pkg/agent/runner.go | 36 +++++++++++++++++------------- 5 files changed, 38 insertions(+), 21 deletions(-) diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go index d7d0afa09..04f300a71 100644 --- a/pkg/agent/execbridge.go +++ b/pkg/agent/execbridge.go @@ -43,9 +43,13 @@ func (iface *execPluginInterface) CurrentGeneration() executor.GenerationNumber return iface.generation.Get() } -// GetHandle implements executor.PluginInterface +// GetHandle implements executor.PluginInterface, and MUST only be called while holding the +// executor's lock. +// +// The locking requirement is why we're able to get away with an "unsynchronized" read of the value +// in the runner. For more, see the documentation on Runner.scheduler. func (iface *execPluginInterface) GetHandle() executor.PluginHandle { - scheduler := iface.runner.scheduler.Load() + scheduler := iface.runner.scheduler if scheduler == nil { return nil @@ -134,8 +138,13 @@ func (iface *execMonitorInterface) CurrentGeneration() executor.GenerationNumber return iface.generation.Get() } +// GetHandle implements executor.MonitorInterface, and MUST only be called while holding the +// executor's lock. +// +// The locking requirement is why we're able to get away with an "unsynchronized" read of the value +// in the runner. For more, see the documentation on Runner.monitor. func (iface *execMonitorInterface) GetHandle() executor.MonitorHandle { - monitor := iface.runner.monitor.Load() + monitor := iface.runner.monitor if monitor == nil || monitor.dispatcher.Exited() { return nil diff --git a/pkg/agent/executor/exec_monitor.go b/pkg/agent/executor/exec_monitor.go index 8d96cd2c8..01c85e3e3 100644 --- a/pkg/agent/executor/exec_monitor.go +++ b/pkg/agent/executor/exec_monitor.go @@ -14,6 +14,8 @@ import ( type MonitorInterface interface { CurrentGeneration() GenerationNumber + // GetHandle fetches a stable handle for the current monitor, or nil if there is not one. + // This method MUST NOT be called unless holding the executor's lock. GetHandle() MonitorHandle } diff --git a/pkg/agent/executor/exec_plugin.go b/pkg/agent/executor/exec_plugin.go index 2f55a4712..062571a23 100644 --- a/pkg/agent/executor/exec_plugin.go +++ b/pkg/agent/executor/exec_plugin.go @@ -14,6 +14,8 @@ import ( type PluginInterface interface { CurrentGeneration() GenerationNumber + // GetHandle fetches a stable handle for the current scheduler, or nil if there is not one. + // This method MUST NOT be called unless holding the executor's lock. GetHandle() PluginHandle } diff --git a/pkg/agent/globalstate.go b/pkg/agent/globalstate.go index b6ce9fb36..3727b025b 100644 --- a/pkg/agent/globalstate.go +++ b/pkg/agent/globalstate.go @@ -357,8 +357,8 @@ func (s *agentState) newRunner(vmInfo api.VmInfo, podName util.NamespacedName, p executorStateDump: nil, // set by (*Runner).Run - scheduler: atomic.Pointer[Scheduler]{}, - monitor: atomic.Pointer[monitorInfo]{}, + scheduler: nil, + monitor: nil, backgroundWorkerCount: atomic.Int64{}, backgroundPanic: make(chan error), diff --git a/pkg/agent/runner.go b/pkg/agent/runner.go index c0ebc1235..5f106f455 100644 --- a/pkg/agent/runner.go +++ b/pkg/agent/runner.go @@ -76,10 +76,16 @@ type Runner struct { // scheduler is the current scheduler that we're communicating with, or nil if there isn't one. // Each scheduler's info field is immutable. When a scheduler is replaced, only the pointer // value here is updated; the original Scheduler remains unchanged. - scheduler atomic.Pointer[Scheduler] + // + // Additionally, this field MAY ONLY be updated while holding both lock AND the executor's lock, + // which means that it may be read when EITHER holding lock OR the executor's lock. + scheduler *Scheduler // monitor, if non nil, stores the current Dispatcher in use for communicating with the - // vm-monitor - monitor atomic.Pointer[monitorInfo] + // vm-monitor, alongside a generation number. + // + // Additionally, this field MAY ONLY be updated while holding both lock AND the executor's lock, + // which means that it may be read when EITHER holding lock OR the executor's lock. + monitor *monitorInfo // backgroundWorkerCount tracks the current number of background workers. It is exclusively // updated by r.spawnBackgroundWorker @@ -120,9 +126,9 @@ func (r *Runner) State(ctx context.Context) (*RunnerState, error) { defer r.lock.Unlock() var scheduler *SchedulerState - if sched := r.scheduler.Load(); sched != nil { + if r.scheduler != nil { scheduler = &SchedulerState{ - Info: sched.info, + Info: r.scheduler.info, } } @@ -433,7 +439,7 @@ func (r *Runner) connectToMonitorLoop( defer r.lock.Unlock() callbacks.reset(func() { generation.Inc() - r.monitor.Store(nil) + r.monitor = nil logger.Info("Reset previous vm-monitor connection") }) }() @@ -504,10 +510,10 @@ func (r *Runner) connectToMonitorLoop( r.lock.Lock() defer r.lock.Unlock() callbacks.setActive(true, func() { - r.monitor.Store(&monitorInfo{ + r.monitor = &monitorInfo{ generation: generation.Inc(), dispatcher: dispatcher, - }) + } logger.Info("Connected to vm-monitor") }) }() @@ -566,11 +572,11 @@ startScheduler: newScheduler(func() { logger.Info(fmt.Sprintf("%s scheduler pod", verb), zap.Object("scheduler", currentInfo)) - r.scheduler.Store(&Scheduler{ + r.scheduler = &Scheduler{ runner: r, info: currentInfo, generation: generation.Inc(), - }) + } }) }() } @@ -585,12 +591,10 @@ startScheduler: r.lock.Lock() defer r.lock.Unlock() - scheduler := r.scheduler.Load() - - if scheduler.info.UID != info.UID { + if r.scheduler.info.UID != info.UID { logger.Info( "Scheduler candidate pod was deleted, but we aren't using it yet", - zap.Object("scheduler", scheduler.info), zap.Object("candidate", info), + zap.Object("scheduler", r.scheduler.info), zap.Object("candidate", info), ) return false } @@ -598,11 +602,11 @@ startScheduler: schedulerGone(func() { logger.Info( "Scheduler pod was deleted. Aborting further communication", - zap.Object("scheduler", scheduler.info), + zap.Object("scheduler", r.scheduler.info), ) generation.Inc() - r.scheduler.Store(nil) + r.scheduler = nil }) return true }() From 60e8e19cb562263c54988dcd2f1face12eb7c60e Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Sun, 8 Oct 2023 16:32:33 -0700 Subject: [PATCH 59/59] reduce executor state logs to debug level --- cmd/autoscaler-agent/main.go | 3 ++- pkg/agent/executor/core.go | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/autoscaler-agent/main.go b/cmd/autoscaler-agent/main.go index c6292877a..41e114d49 100644 --- a/cmd/autoscaler-agent/main.go +++ b/cmd/autoscaler-agent/main.go @@ -22,7 +22,8 @@ import ( func main() { logConfig := zap.NewProductionConfig() - logConfig.Sampling = nil // Disable sampling, which the production config enables by default. + logConfig.Sampling = nil // Disable sampling, which the production config enables by default. + logConfig.Level.SetLevel(zap.DebugLevel) // Allow debug logs logger := zap.Must(logConfig.Build()).Named("autoscaler-agent") defer logger.Sync() //nolint:errcheck // what are we gonna do, log something about it? diff --git a/pkg/agent/executor/core.go b/pkg/agent/executor/core.go index b5ae5bae3..3178a38e0 100644 --- a/pkg/agent/executor/core.go +++ b/pkg/agent/executor/core.go @@ -103,10 +103,10 @@ func (c *ExecutorCore) getActions() timedActions { // NOTE: Even though we cache the actions generated using time.Now(), it's *generally* ok. now := time.Now() - c.stateLogger.Info("Recalculating ActionSet", zap.Time("now", now), zap.Any("state", c.core.Dump())) + c.stateLogger.Debug("Recalculating ActionSet", zap.Time("now", now), zap.Any("state", c.core.Dump())) c.actions = &timedActions{id: id, actions: c.core.NextActions(now)} c.lastActionsID = id - c.stateLogger.Info("New ActionSet", zap.Time("now", now), zap.Any("actions", c.actions.actions)) + c.stateLogger.Debug("New ActionSet", zap.Time("now", now), zap.Any("actions", c.actions.actions)) } return *c.actions