From 87373589195253005a0cf548815a1b90f6d01178 Mon Sep 17 00:00:00 2001 From: xuezhao Date: Mon, 23 Dec 2024 09:34:33 +0800 Subject: [PATCH] Agents stop update managedcluster status when clock is out of sync. (#770) Signed-off-by: xuezhaojun --- .../hub/lease/clocksynccontroller.go | 11 ++++++++--- .../hub/lease/clocksynccontroller_test.go | 2 +- .../spoke/managedcluster/status_controller.go | 9 +++++++++ .../registration/managedcluster_lease_test.go | 18 +++++++++++++----- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/pkg/registration/hub/lease/clocksynccontroller.go b/pkg/registration/hub/lease/clocksynccontroller.go index 080c40727..8a932e947 100644 --- a/pkg/registration/hub/lease/clocksynccontroller.go +++ b/pkg/registration/hub/lease/clocksynccontroller.go @@ -103,10 +103,15 @@ func (c *clockSyncController) sync(ctx context.Context, syncCtx factory.SyncCont return err } // When the agent's lease get renewed, the "now" on hub should close to the RenewTime on agent. - // If the two time are not close(over 1 lease duration), we assume the clock is out of sync. - oneLeaseDuration := time.Duration(LeaseDurationSeconds) * time.Second + // If the two time are not close(the same duration in the lease controller), we assume the clock is out of sync. + // Then, if the Clock is out of sync, the agent will not be able to update the status of managed cluster. + leaseDuration := time.Duration(leaseDurationTimes*cluster.Spec.LeaseDurationSeconds) * time.Second + if leaseDuration == 0 { + leaseDuration = time.Duration(LeaseDurationSeconds*leaseDurationTimes) * time.Second + } + if err := c.updateClusterStatusClockSynced(ctx, cluster, - now.Sub(observedLease.Spec.RenewTime.Time) < oneLeaseDuration && observedLease.Spec.RenewTime.Time.Sub(now) < oneLeaseDuration); err != nil { + now.Sub(observedLease.Spec.RenewTime.Time) < leaseDuration && observedLease.Spec.RenewTime.Time.Sub(now) < leaseDuration); err != nil { return err } return nil diff --git a/pkg/registration/hub/lease/clocksynccontroller_test.go b/pkg/registration/hub/lease/clocksynccontroller_test.go index 022d79406..61a2f74ff 100644 --- a/pkg/registration/hub/lease/clocksynccontroller_test.go +++ b/pkg/registration/hub/lease/clocksynccontroller_test.go @@ -63,7 +63,7 @@ func TestClockSyncController(t *testing.T) { testinghelpers.NewManagedCluster(), }, leases: []runtime.Object{ - testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(61*time.Second)), + testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(301*time.Second)), }, validateActions: func(t *testing.T, leaseActions, clusterActions []clienttesting.Action) { expected := metav1.Condition{ diff --git a/pkg/registration/spoke/managedcluster/status_controller.go b/pkg/registration/spoke/managedcluster/status_controller.go index 0e9508f0d..352aadb79 100644 --- a/pkg/registration/spoke/managedcluster/status_controller.go +++ b/pkg/registration/spoke/managedcluster/status_controller.go @@ -31,6 +31,7 @@ type managedClusterStatusController struct { patcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus] hubClusterLister clusterv1listers.ManagedClusterLister hubEventRecorder kevents.EventRecorder + recorder events.Recorder } type statusReconcile interface { @@ -97,6 +98,7 @@ func newManagedClusterStatusController( }, hubClusterLister: hubClusterInformer.Lister(), hubEventRecorder: hubEventRecorder, + recorder: recorder, } } @@ -121,6 +123,13 @@ func (c *managedClusterStatusController) sync(ctx context.Context, syncCtx facto } } + // check if managedcluster's clock is out of sync, if so, the agent will not be able to update the status of managed cluster. + outOfSynced := meta.IsStatusConditionFalse(newCluster.Status.Conditions, clusterv1.ManagedClusterConditionClockSynced) + if outOfSynced { + c.recorder.Eventf("ClockOutOfSync", "The managed cluster's clock is out of sync, the agent will not be able to update the status of managed cluster.") + return fmt.Errorf("the managed cluster's clock is out of sync, the agent will not be able to update the status of managed cluster.") + } + changed, err := c.patcher.PatchStatus(ctx, newCluster, newCluster.Status, cluster.Status) if err != nil { errs = append(errs, err) diff --git a/test/integration/registration/managedcluster_lease_test.go b/test/integration/registration/managedcluster_lease_test.go index e8c11e46e..17709d366 100644 --- a/test/integration/registration/managedcluster_lease_test.go +++ b/test/integration/registration/managedcluster_lease_test.go @@ -186,7 +186,7 @@ var _ = ginkgo.Describe("Cluster Lease Update", func() { // stop the agent in case agent update the lease. stop() - // update the managed cluster lease renew time + // update the managed cluster lease renew time, check if conditions are updated now := time.Now() gomega.Eventually(func() error { lease, err := util.GetManagedClusterLease(kubeClient, managedClusterName) @@ -194,15 +194,23 @@ var _ = ginkgo.Describe("Cluster Lease Update", func() { return err } // The default lease duration is 60s. - // The renewTime is 2 leaseDuration before the hub's now, so the clock should be out of sync. - // The renewTime + 5 * leaseDuration > now, so the available condition should be true - lease.Spec.RenewTime = &metav1.MicroTime{Time: now.Add(-120 * time.Second)} + // The renewTime + 5 * leaseDuration < now, so: + // * the clock should be out of sync + // * the available condition should be true + lease.Spec.RenewTime = &metav1.MicroTime{Time: now.Add(-301 * time.Second)} _, err = kubeClient.CoordinationV1().Leases(managedClusterName).Update(context.TODO(), lease, metav1.UpdateOptions{}) return err }, eventuallyInterval, eventuallyTimeout).ShouldNot(gomega.HaveOccurred()) - assertAvailableCondition(managedClusterName, metav1.ConditionTrue, 0) + assertAvailableCondition(managedClusterName, metav1.ConditionUnknown, 0) assertCloclSyncedCondition(managedClusterName, metav1.ConditionFalse, 0) + + // run agent again, check if conditions are updated to True + stop = runAgent(managedClusterName, agentOptions, commOptions, spokeCfg) + defer stop() + + assertAvailableCondition(managedClusterName, metav1.ConditionTrue, 0) + assertCloclSyncedCondition(managedClusterName, metav1.ConditionTrue, 0) }) })