Skip to content

Commit

Permalink
neonvm-controller: recreate the pod for VM even if the older pod stil…
Browse files Browse the repository at this point in the history
…l exists (#1055)

We have previously assumed running multiple pods for one VM is
dangerous.

We now believe it should be fine, even if the old pod still tries to
continue operating. What would happen is that the new pod would take
over the safekeepers quorum, leaving the old one to disconnect and
shutdown.

Signed-off-by: Oleg Vasilev <[email protected]>
  • Loading branch information
Omrigan committed Oct 14, 2024
1 parent fb1728c commit ca74cc3
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 23 deletions.
5 changes: 5 additions & 0 deletions neonvm-controller/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ func main() {
var memhpAutoMovableRatio string
var failurePendingPeriod time.Duration
var failingRefreshInterval time.Duration
var atMostOnePod bool
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
Expand Down Expand Up @@ -140,6 +141,9 @@ func main() {
"the period for the propagation of reconciliation failures to the observability instruments")
flag.DurationVar(&failingRefreshInterval, "failing-refresh-interval", 1*time.Minute,
"the interval between consecutive updates of metrics and logs, related to failing reconciliations")
flag.BoolVar(&atMostOnePod, "at-most-one-pod", false,
"If true, the controller will ensure that at most one pod is running at a time. "+
"Otherwise, the outdated pod might be left to terminate, while the new one is already running.")
flag.Parse()

if defaultMemoryProvider == "" {
Expand Down Expand Up @@ -195,6 +199,7 @@ func main() {
MemhpAutoMovableRatio: memhpAutoMovableRatio,
FailurePendingPeriod: failurePendingPeriod,
FailingRefreshInterval: failingRefreshInterval,
AtMostOnePod: atMostOnePod,
}

vmReconciler := &controllers.VMReconciler{
Expand Down
3 changes: 3 additions & 0 deletions pkg/neonvm/controllers/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,7 @@ type ReconcilerConfig struct {
// FailingRefreshInterval is the interval between consecutive
// updates of metrics and logs, related to failing reconciliations
FailingRefreshInterval time.Duration

// AtMostOnePod is the flag that indicates whether we should only have one pod per VM.
AtMostOnePod bool
}
1 change: 1 addition & 0 deletions pkg/neonvm/controllers/functests/vm_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ var _ = Describe("VirtualMachine controller", func() {
MemhpAutoMovableRatio: "301",
FailurePendingPeriod: 1 * time.Minute,
FailingRefreshInterval: 1 * time.Minute,
AtMostOnePod: false,
},
}

Expand Down
28 changes: 5 additions & 23 deletions pkg/neonvm/controllers/vm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -772,13 +772,12 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
return err
}

// We must keep the VM status the same until we know the neonvm-runner container has been
// terminated, otherwise we could end up starting a new runner pod while the VM in the old
// one is still running.
// By default, we cleanup the VM, even if previous pod still exists. This behavior is for the case
// where the pod is stuck deleting, and we want to progress without waiting for it.
//
// Note that this is required because 'VmSucceeded' and 'VmFailed' are true if *at least
// one* container inside the runner pod has finished; the VM itself may still be running.
if apierrors.IsNotFound(err) || runnerContainerStopped(vmRunner) {
// However, this opens up a possibility for cascading failures where the pods would be constantly
// recreated, and then stuck deleting. That's why we have AtMostOnePod.
if !r.Config.AtMostOnePod || apierrors.IsNotFound(err) {
// NB: Cleanup() leaves status .Phase and .RestartCount (+ some others) but unsets other fields.
vm.Cleanup()

Expand Down Expand Up @@ -957,23 +956,6 @@ func runnerStatus(pod *corev1.Pod) runnerStatusKind {
}
}

// runnerContainerStopped returns true iff the neonvm-runner container has exited.
//
// The guarantee is simple: It is only safe to start a new runner pod for a VM if
// runnerContainerStopped returns true (otherwise, we may end up with >1 instance of the same VM).
func runnerContainerStopped(pod *corev1.Pod) bool {
if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
return true
}

for _, stat := range pod.Status.ContainerStatuses {
if stat.Name == "neonvm-runner" {
return stat.State.Terminated != nil
}
}
return false
}

// deleteRunnerPodIfEnabled deletes the runner pod if buildtag.NeverDeleteRunnerPods is false, and
// then emits an event and log line about what it did, whether it actually deleted the runner pod.
func (r *VMReconciler) deleteRunnerPodIfEnabled(
Expand Down
1 change: 1 addition & 0 deletions pkg/neonvm/controllers/vm_controller_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ func newTestParams(t *testing.T) *testParams {
MemhpAutoMovableRatio: "301",
FailurePendingPeriod: time.Minute,
FailingRefreshInterval: time.Minute,
AtMostOnePod: false,
},
Metrics: reconcilerMetrics,
}
Expand Down

0 comments on commit ca74cc3

Please sign in to comment.