diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index dbd95b480..fbc11c723 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -175,7 +175,7 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp } } if machine.Spec.ProviderID == "" || machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff { - return c.triggerCreationFlow( + retry, err = c.triggerCreationFlow( ctx, &driver.CreateMachineRequest{ Machine: machine, @@ -183,6 +183,7 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp Secret: &corev1.Secret{Data: secretData}, }, ) + return c.adjustCreateRetryRequired(machine, retry), err } return machineutils.LongRetry, nil @@ -354,7 +355,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque // If node label is not present klog.V(2).Infof("Creating a VM for machine %q, please wait!", machine.Name) klog.V(2).Infof("The machine creation is triggered with timeout of %s", c.getEffectiveCreationTimeout(createMachineRequest.Machine).Duration) - createMachineResponse, err := c.driver.CreateMachine(ctx, createMachineRequest) + createMachineCtx, cancelFunc := c.getCreationContext(ctx, machine) + defer cancelFunc() +// TODO: Adapt all mcm-providers driver method implementations to use the passed context + createMachineResponse, err := c.driver.CreateMachine(createMachineCtx, createMachineRequest) if err != nil { // Create call returned an error. klog.Errorf("Error while creating machine %s: %s", machine.Name, err.Error()) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 22be3ff6f..ee02d06de 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -476,7 +476,7 @@ func SyncMachineTaints( return toBeUpdated } -// machineCreateErrorHandler TODO +// machineCreateErrorHandler handles errors when machine creation does not succeed func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1alpha1.Machine, createMachineResponse *driver.CreateMachineResponse, err error) (machineutils.RetryPeriod, error) { var ( retryRequired = machineutils.MediumRetry @@ -494,7 +494,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a lastKnownState = createMachineResponse.LastKnownState } - c.machineStatusUpdate( + statusUpdErr := c.machineStatusUpdate( ctx, machine, v1alpha1.LastOperation{ @@ -510,8 +510,23 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a }, lastKnownState, ) + return retryRequired, statusUpdErr +} + +// adjustCreateRetryRequired adjusts the retry period if needed so that we don't have a case where the machine is reconciled long (<=ShortRetry) after machineCreationTimeout +// if the retry period is too large, it is adjusted to cause a reconcile at the machineCreationTimeout +// if the machineCreationTimeout has already passed, return `ShortRetry` so that the machine is immediately reconciled +func (c *controller) adjustCreateRetryRequired(machine *v1alpha1.Machine, retryRequired machineutils.RetryPeriod) machineutils.RetryPeriod { + adjustedRetry := retryRequired + machineCreationDeadline := machine.CreationTimestamp.Time.Add(c.getEffectiveCreationTimeout(machine).Duration) + if time.Now().After(machineCreationDeadline) { + adjustedRetry = machineutils.ShortRetry + } else if time.Now().Add(time.Duration(retryRequired)).After(machineCreationDeadline) { + // Machine will reconcile after create deadline, so adapt RetryPeriod to reconcile machine at deadline + adjustedRetry = machineutils.RetryPeriod(machineCreationDeadline.Sub(time.Now())) + } - return retryRequired, nil + return adjustedRetry } func (c *controller) machineStatusUpdate( @@ -977,7 +992,7 @@ func isConditionEmpty(condition v1.NodeCondition) bool { return condition == v1.NodeCondition{} } -// initializes err and description with the passed string message +// printLogInitError initializes err and description with the passed string message func printLogInitError(s string, err *error, description *string, machine *v1alpha1.Machine) { klog.Warningf(s+" machine: %q ", machine.Name) *err = fmt.Errorf(s+" %s", machineutils.InitiateVMDeletion) @@ -1372,7 +1387,7 @@ func (c *controller) getEffectiveHealthTimeout(machine *v1alpha1.Machine) *metav return effectiveHealthTimeout } -// getEffectiveHealthTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag. +// getEffectiveCreationTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag. func (c *controller) getEffectiveCreationTimeout(machine *v1alpha1.Machine) *metav1.Duration { var effectiveCreationTimeout *metav1.Duration if machine.Spec.MachineConfiguration != nil && machine.Spec.MachineConfiguration.MachineCreationTimeout != nil { @@ -1394,6 +1409,11 @@ func (c *controller) getEffectiveNodeConditions(machine *v1alpha1.Machine) *stri return effectiveNodeConditions } +func (c *controller) getCreationContext(parentCtx context.Context, machine *v1alpha1.Machine) (context.Context, context.CancelFunc) { + timeOutDuration := c.getEffectiveCreationTimeout(machine).Duration + return context.WithDeadline(parentCtx, machine.CreationTimestamp.Time.Add(timeOutDuration)) +} + // UpdateNodeTerminationCondition updates termination condition on the node object func (c *controller) UpdateNodeTerminationCondition(ctx context.Context, machine *v1alpha1.Machine) error { if machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff { diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index d5bdbe5af..dcee66e39 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -2725,4 +2725,72 @@ var _ = Describe("machine_util", func() { }), ) }) + + Describe("#Adjust Machine Create Retry Period if near Timeout", func() { + type setup struct { + machine *machinev1.Machine + mcCreateTimeout metav1.Duration + createOpRetry machineutils.RetryPeriod + } + + type expect struct { + retryAdjusted bool + } + + type data struct { + setup setup + expect expect + } + + DescribeTable("##table", + func(data *data) { + stop := make(chan struct{}) + defer close(stop) + + c, trackers := createController(stop, testNamespace, nil, nil, nil, nil) + defer trackers.Stop() + + c.safetyOptions.MachineCreationTimeout = data.setup.mcCreateTimeout + + adjustedRetry := c.adjustCreateRetryRequired(data.setup.machine, machineutils.RetryPeriod(data.setup.createOpRetry)) + + if data.expect.retryAdjusted { + // Considering time.Now() = 0 + Expect(adjustedRetry).Should(BeNumerically("<=", machineutils.RetryPeriod(c.getEffectiveCreationTimeout(data.setup.machine).Duration))) + } else { + Expect(adjustedRetry).To(Equal(data.setup.createOpRetry)) + } + }, + Entry("Should not adjust machine create retry period if retry is set to occur before machine create deadline", &data{ + setup: setup{ + createOpRetry: machineutils.RetryPeriod(3 * time.Minute), + mcCreateTimeout: metav1.Duration{Duration: 20 * time.Minute}, + machine: newMachine( + &machinev1.MachineTemplateSpec{ + Spec: machinev1.MachineSpec{}, + }, + &machinev1.MachineStatus{}, + nil, nil, map[string]string{v1alpha1.NodeLabelKey: "test-node-0"}, true, metav1.Now()), + }, + expect: expect{ + retryAdjusted: false, + }, + }), + Entry("Should adjust machine create retry period to retry at machine create deadline if retry is set to occur after machine create deadline", &data{ + setup: setup{ + createOpRetry: machineutils.RetryPeriod(4 * time.Minute), + mcCreateTimeout: metav1.Duration{Duration: 3 * time.Minute}, + machine: newMachine( + &machinev1.MachineTemplateSpec{ + Spec: machinev1.MachineSpec{}, + }, + &machinev1.MachineStatus{}, + nil, nil, map[string]string{v1alpha1.NodeLabelKey: "test-node-0"}, true, metav1.Now()), + }, + expect: expect{ + retryAdjusted: true, + }, + }), + ) + }) })