From a6fcbba54173434e5ee964dd0d491fec69148049 Mon Sep 17 00:00:00 2001
From: Misha Sakhnov <misha@neon.tech>
Date: Tue, 27 Aug 2024 15:12:57 +0300
Subject: [PATCH] plugin: add warning log message for the pods with assigned
 scheduler (#1037)

plugin: add warning log message for the pods with assigned scheduler,
nodeName or nodeSelector


closes #93

Signed-off-by: Misha Sakhnov <misha@neon.tech>
---
 pkg/plugin/plugin.go |  3 ++-
 pkg/plugin/state.go  | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go
index cf8aa3250..95c425a01 100644
--- a/pkg/plugin/plugin.go
+++ b/pkg/plugin/plugin.go
@@ -153,7 +153,7 @@ func makeAutoscaleEnforcerPlugin(
 				incEventCount()
 			}
 			pushToQueue(logger, pod.Name, func() {
-				p.handleStarted(hlogger, pod)
+				p.handleStarted(hlogger, pod, preexisting)
 				if preexisting {
 					initEvents.dec()
 				}
@@ -822,6 +822,7 @@ func (e *AutoscaleEnforcer) Reserve(
 		// don't include buffer because we know that future changes by the autoscaler-agent must go
 		// through us.
 		includeBuffer: false,
+		preexisting:   false,
 	})
 	if err != nil {
 		return framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
diff --git a/pkg/plugin/state.go b/pkg/plugin/state.go
index ce134e94e..68eea4db1 100644
--- a/pkg/plugin/state.go
+++ b/pkg/plugin/state.go
@@ -572,7 +572,7 @@ func (e *AutoscaleEnforcer) handleNodeDeletion(logger *zap.Logger, nodeName stri
 // otherwise, we might (a) ignore resources from pods that weren't scheduled here, or (b) fail to
 // include pods that *were* scheduled here, but had spurious Unreserves.
 // (for more, see: https://github.com/neondatabase/autoscaling/pull/435)
-func (e *AutoscaleEnforcer) handleStarted(logger *zap.Logger, pod *corev1.Pod) {
+func (e *AutoscaleEnforcer) handleStarted(logger *zap.Logger, pod *corev1.Pod, preexisting bool) {
 	nodeName := pod.Spec.NodeName
 
 	logger = logger.With(
@@ -592,12 +592,14 @@ func (e *AutoscaleEnforcer) handleStarted(logger *zap.Logger, pod *corev1.Pod) {
 		// this may be a preexisting VM. If so, we should include it in "buffer" as long it's
 		// supposed to be handled by us (otherwise, the "buffer" will never be resolved)
 		includeBuffer: pod.Spec.SchedulerName == e.state.conf.SchedulerName,
+		preexisting:   preexisting,
 	})
 }
 
 type reserveOptions struct {
 	allowDeny     bool
 	includeBuffer bool
+	preexisting   bool
 }
 
 // reserveResources attempts to set aside resources on the node for the pod.
@@ -630,13 +632,25 @@ func (e *AutoscaleEnforcer) reserveResources(
 
 	e.state.lock.Lock()
 	defer e.state.lock.Unlock()
+	podName := util.GetNamespacedName(pod)
 
 	// If the pod already exists, nothing to do
-	if _, ok := e.state.pods[util.GetNamespacedName(pod)]; ok {
+	_, isPodInState := e.state.pods[podName]
+	if isPodInState {
 		logger.Info("Pod already exists in global state")
 		return true, &verdictSet{cpu: "", mem: ""}, nil
 	}
 
+	// If the following conditions are met, the pod has bypassed neon scheduler which might be a sign
+	// of a bug or misbehavior:
+	// - pod is assigned to autoscaler scheduler
+	// - pod not in the state
+	// - pod is not preexisting pod
+	// - pod has the node name
+	if !isPodInState && !opts.preexisting && pod.Spec.SchedulerName == e.state.conf.SchedulerName && pod.Spec.NodeName != "" {
+		logger.Warn("Pod has bypassed neon scheduler")
+	}
+
 	// Get information about the node
 	node, err := e.state.getOrFetchNodeState(ctx, logger, e.metrics, e.nodeStore, nodeName)
 	if err != nil {