Skip to content

Commit

Permalink
Agents stop update managedcluster status when clock is out of sync. (#…
Browse files Browse the repository at this point in the history
…770)

Signed-off-by: xuezhaojun <[email protected]>
  • Loading branch information
xuezhaojun authored Dec 23, 2024
1 parent b568123 commit 8737358
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 9 deletions.
11 changes: 8 additions & 3 deletions pkg/registration/hub/lease/clocksynccontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,15 @@ func (c *clockSyncController) sync(ctx context.Context, syncCtx factory.SyncCont
return err
}
// When the agent's lease get renewed, the "now" on hub should close to the RenewTime on agent.
// If the two time are not close(over 1 lease duration), we assume the clock is out of sync.
oneLeaseDuration := time.Duration(LeaseDurationSeconds) * time.Second
// If the two time are not close(the same duration in the lease controller), we assume the clock is out of sync.
// Then, if the Clock is out of sync, the agent will not be able to update the status of managed cluster.
leaseDuration := time.Duration(leaseDurationTimes*cluster.Spec.LeaseDurationSeconds) * time.Second
if leaseDuration == 0 {
leaseDuration = time.Duration(LeaseDurationSeconds*leaseDurationTimes) * time.Second
}

if err := c.updateClusterStatusClockSynced(ctx, cluster,
now.Sub(observedLease.Spec.RenewTime.Time) < oneLeaseDuration && observedLease.Spec.RenewTime.Time.Sub(now) < oneLeaseDuration); err != nil {
now.Sub(observedLease.Spec.RenewTime.Time) < leaseDuration && observedLease.Spec.RenewTime.Time.Sub(now) < leaseDuration); err != nil {
return err
}
return nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/registration/hub/lease/clocksynccontroller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func TestClockSyncController(t *testing.T) {
testinghelpers.NewManagedCluster(),
},
leases: []runtime.Object{
testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(61*time.Second)),
testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(301*time.Second)),
},
validateActions: func(t *testing.T, leaseActions, clusterActions []clienttesting.Action) {
expected := metav1.Condition{
Expand Down
9 changes: 9 additions & 0 deletions pkg/registration/spoke/managedcluster/status_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type managedClusterStatusController struct {
patcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]
hubClusterLister clusterv1listers.ManagedClusterLister
hubEventRecorder kevents.EventRecorder
recorder events.Recorder
}

type statusReconcile interface {
Expand Down Expand Up @@ -97,6 +98,7 @@ func newManagedClusterStatusController(
},
hubClusterLister: hubClusterInformer.Lister(),
hubEventRecorder: hubEventRecorder,
recorder: recorder,
}
}

Expand All @@ -121,6 +123,13 @@ func (c *managedClusterStatusController) sync(ctx context.Context, syncCtx facto
}
}

// check if managedcluster's clock is out of sync, if so, the agent will not be able to update the status of managed cluster.
outOfSynced := meta.IsStatusConditionFalse(newCluster.Status.Conditions, clusterv1.ManagedClusterConditionClockSynced)
if outOfSynced {
c.recorder.Eventf("ClockOutOfSync", "The managed cluster's clock is out of sync, the agent will not be able to update the status of managed cluster.")
return fmt.Errorf("the managed cluster's clock is out of sync, the agent will not be able to update the status of managed cluster.")
}

changed, err := c.patcher.PatchStatus(ctx, newCluster, newCluster.Status, cluster.Status)
if err != nil {
errs = append(errs, err)
Expand Down
18 changes: 13 additions & 5 deletions test/integration/registration/managedcluster_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,23 +186,31 @@ var _ = ginkgo.Describe("Cluster Lease Update", func() {
// stop the agent in case agent update the lease.
stop()

// update the managed cluster lease renew time
// update the managed cluster lease renew time, check if conditions are updated
now := time.Now()
gomega.Eventually(func() error {
lease, err := util.GetManagedClusterLease(kubeClient, managedClusterName)
if err != nil {
return err
}
// The default lease duration is 60s.
// The renewTime is 2 leaseDuration before the hub's now, so the clock should be out of sync.
// The renewTime + 5 * leaseDuration > now, so the available condition should be true
lease.Spec.RenewTime = &metav1.MicroTime{Time: now.Add(-120 * time.Second)}
// The renewTime + 5 * leaseDuration < now, so:
// * the clock should be out of sync
// * the available condition should be true
lease.Spec.RenewTime = &metav1.MicroTime{Time: now.Add(-301 * time.Second)}
_, err = kubeClient.CoordinationV1().Leases(managedClusterName).Update(context.TODO(), lease, metav1.UpdateOptions{})
return err
}, eventuallyInterval, eventuallyTimeout).ShouldNot(gomega.HaveOccurred())

assertAvailableCondition(managedClusterName, metav1.ConditionTrue, 0)
assertAvailableCondition(managedClusterName, metav1.ConditionUnknown, 0)
assertCloclSyncedCondition(managedClusterName, metav1.ConditionFalse, 0)

// run agent again, check if conditions are updated to True
stop = runAgent(managedClusterName, agentOptions, commOptions, spokeCfg)
defer stop()

assertAvailableCondition(managedClusterName, metav1.ConditionTrue, 0)
assertCloclSyncedCondition(managedClusterName, metav1.ConditionTrue, 0)
})
})

Expand Down

0 comments on commit 8737358

Please sign in to comment.