Skip to content

Commit

Permalink
Use the real expire time to check if the token should refresh (open-c…
Browse files Browse the repository at this point in the history
  • Loading branch information
zhujian7 authored Oct 21, 2024
1 parent 97ceb83 commit e4ad803
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 61 deletions.
3 changes: 1 addition & 2 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
cliflag "k8s.io/component-base/cli/flag"
"k8s.io/klog/v2"
"k8s.io/klog/v2/klogr"
"open-cluster-management.io/addon-framework/pkg/addonfactory"
"open-cluster-management.io/addon-framework/pkg/addonmanager"
"open-cluster-management.io/addon-framework/pkg/agent"
Expand Down Expand Up @@ -73,7 +72,7 @@ func main() {
var imagePullSecretName string
var featureGatesFlags map[string]bool

logger := klogr.New()
logger := klog.Background()
klog.SetOutput(os.Stdout)
klog.InitFlags(flag.CommandLine)
flag.StringVar(&metricsAddr, "metrics-bind-address", ":38080", "The address the metric endpoint binds to.")
Expand Down
99 changes: 56 additions & 43 deletions pkg/addon/agent/controller/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ func (r *TokenReconciler) Reconcile(ctx context.Context, request reconcile.Reque

if err := r.Cache.Get(ctx, request.NamespacedName, msa); err != nil {
if !apierrors.IsNotFound(err) {
//fail to get managed-serviceaccount, requeue
// fail to get managed-serviceaccount, requeue
return reconcile.Result{}, errors.Wrapf(err, "fail to get managed serviceaccount")
}

sai := r.SpokeNativeClient.CoreV1().ServiceAccounts(r.SpokeNamespace)
if err := sai.Delete(ctx, request.Name, metav1.DeleteOptions{}); err != nil {
if !apierrors.IsNotFound(err) {
//fail to delete related serviceaccount, requeue
// fail to delete related serviceaccount, requeue
return reconcile.Result{}, errors.Wrapf(err, "fail to delete related serviceaccount")
}
}
Expand Down Expand Up @@ -107,67 +107,73 @@ func (r *TokenReconciler) Reconcile(ctx context.Context, request reconcile.Reque
}

now := metav1.Now()
tokenRefreshTime := now

var requeueAfter time.Duration = 0
if expiring == nil {
logger.Info("Skipped creating token")
// token is not refreshed, keep the previous expiration timestamp
expiring = msa.Status.ExpirationTimestamp
if msa.Status.TokenSecretRef != nil {
tokenRefreshTime = msa.Status.TokenSecretRef.LastRefreshTimestamp
// token is not expiried, no need to refresh, just calculate the requeue time
if msa.Status.TokenSecretRef == nil || msa.Status.ExpirationTimestamp == nil {
return reconcile.Result{}, errors.New("token secret ref or expiration time is nil but token not refreshed")
}

setManagedServiceAccountSuccessStatus(msaCopy, msa.Status.ExpirationTimestamp,
now, msa.Status.TokenSecretRef.LastRefreshTimestamp)

// Requeue even if the token is not refreshed, otherwise if the agent restarts
// at the time that the token is not expried, no chance to trigger the expiration
// check again
requeueAfter = checkTokenRefreshAfter(now,
*msa.Status.ExpirationTimestamp, msa.Status.TokenSecretRef.LastRefreshTimestamp)

} else {
// after sync func succeeds, the secret must exist, add the conditions if not exist
setManagedServiceAccountSuccessStatus(msaCopy, expiring, now, now)
}

if !reflect.DeepEqual(msa.Status, msaCopy.Status) {
if err := r.HubClient.Status().Update(context.TODO(), msaCopy); err != nil {
return reconcile.Result{}, errors.Wrapf(err, "failed to update status")
}
}

return reconcile.Result{RequeueAfter: requeueAfter}, nil
}

func setManagedServiceAccountSuccessStatus(msaCopy *authv1beta1.ManagedServiceAccount,
expiring *metav1.Time, lastTransitionTime, lastRreshTimestamp metav1.Time) {

// after sync func succeeds, the secret must exist, add the conditions if not exist
meta.SetStatusCondition(&msaCopy.Status.Conditions, metav1.Condition{
Type: authv1beta1.ConditionTypeSecretCreated,
Status: metav1.ConditionTrue,
Reason: "SecretCreated",
LastTransitionTime: now,
LastTransitionTime: lastTransitionTime,
})

meta.SetStatusCondition(&msaCopy.Status.Conditions, metav1.Condition{
Type: authv1beta1.ConditionTypeTokenReported,
Status: metav1.ConditionTrue,
Reason: "TokenReported",
LastTransitionTime: now,
LastTransitionTime: lastTransitionTime,
})

msaCopy.Status.ExpirationTimestamp = expiring
msaCopy.Status.TokenSecretRef = &authv1beta1.SecretRef{
Name: msa.Name,
LastRefreshTimestamp: tokenRefreshTime,
}

if !reflect.DeepEqual(msa.Status, msaCopy.Status) {
if err := r.HubClient.Status().Update(context.TODO(), msaCopy); err != nil {
return reconcile.Result{}, errors.Wrapf(err, "failed to update status")
}
logger.Info("Token refreshed")
return reconcile.Result{}, nil
Name: msaCopy.Name,
LastRefreshTimestamp: lastRreshTimestamp,
}

return reconcile.Result{
// Requeue even if the token is not refreshed, otherwise if the agent restarts
// at the time that the token is not expried, no chance to trigger the expiration
// check again
RequeueAfter: checkTokenRefreshAfter(now, expiring, msa.Spec.Rotation.Validity.Duration),
}, nil
}

func checkTokenRefreshAfter(now metav1.Time, expiring *metav1.Time, validityDuration time.Duration) time.Duration {
refreshThreshold := validityDuration / 5 * 1
lifetime := expiring.Sub(now.Time)
if (lifetime - refreshThreshold) > 0 {
return lifetime - refreshThreshold + time.Duration(5*time.Second)
} else {
func checkTokenRefreshAfter(now metav1.Time, expiring metav1.Time, lastRefreshTimestamp metav1.Time) time.Duration {
exceed, threshold := exceedThreshold(now, expiring, lastRefreshTimestamp)
if exceed {
return time.Duration(5 * time.Second)
}
return threshold.Sub(now.Time) + time.Duration(5*time.Second)
}

// sync is the main logic of token rotation, it returns the expiration time of the token if the token is created/updated
func (r *TokenReconciler) sync(ctx context.Context,
managed *authv1beta1.ManagedServiceAccount) (*metav1.Time, error) {
logger := log.FromContext(ctx)
secretExists := true
currentTokenSecret := &corev1.Secret{}
if err := r.HubClient.Get(ctx, types.NamespacedName{
Expand Down Expand Up @@ -213,6 +219,7 @@ func (r *TokenReconciler) sync(ctx context.Context,
}
}

logger.Info("Token refreshed", "expirationTimestamp", expiring)
return &expiring, nil
}

Expand Down Expand Up @@ -314,25 +321,21 @@ func buildSecret(managed *authv1beta1.ManagedServiceAccount, caData, tokenData [
}
}

func (r *TokenReconciler) isSoonExpiring(managed *authv1beta1.ManagedServiceAccount, tokenSecret *corev1.Secret) (bool, error) {
if managed.Status.TokenSecretRef == nil || tokenSecret == nil {
func (r *TokenReconciler) isSoonExpiring(msa *authv1beta1.ManagedServiceAccount, secret *corev1.Secret) (bool, error) {
if msa.Status.TokenSecretRef == nil || msa.Status.ExpirationTimestamp == nil || secret == nil {
return true, nil
}

// check if the token should be refreshed
// the token will not be rotated unless its remaining lifetime is less
// than 20% of its rotation validity
now := metav1.Now()
refreshThreshold := managed.Spec.Rotation.Validity.Duration / 5 * 1
lifetime := managed.Status.ExpirationTimestamp.Sub(now.Time)
if lifetime < refreshThreshold {
if exceed, _ := exceedThreshold(now, *msa.Status.ExpirationTimestamp,
msa.Status.TokenSecretRef.LastRefreshTimestamp); exceed {
return true, nil
}

// check if the token is valid or not
tokenReview := &authv1.TokenReview{
Spec: authv1.TokenReviewSpec{
Token: string(tokenSecret.Data[corev1.ServiceAccountTokenKey]),
Token: string(secret.Data[corev1.ServiceAccountTokenKey]),
},
}
tr, err := r.SpokeNativeClient.AuthenticationV1().TokenReviews().Create(
Expand All @@ -343,3 +346,13 @@ func (r *TokenReconciler) isSoonExpiring(managed *authv1beta1.ManagedServiceAcco

return !tr.Status.Authenticated, nil
}

func exceedThreshold(now metav1.Time, expiring metav1.Time, lastRefreshTimestamp metav1.Time) (bool, time.Time) {
// Check if the token should be refreshed, the token will not be rotated unless its remaining lifetime is
// less than 20% of its rotation validity
// Some kubernetes distribution may have a maximum token lifetime, for example, eks will shorten the token lifetime
// to 1 day, so here we use the real expiration time and last refresh time, instead of the requested expiration time
// in the managedserviceaccount.spec.rotation.validity, to calculate the refresh threshold
threshold := lastRefreshTimestamp.Add(expiring.Sub(lastRefreshTimestamp.Time) / 5 * 4)
return now.Time.After(threshold), threshold
}
40 changes: 24 additions & 16 deletions pkg/addon/agent/controller/token_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ import (
fakekube "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/rest"
clienttesting "k8s.io/client-go/testing"
"k8s.io/klog/v2"
authv1beta1 "open-cluster-management.io/managed-serviceaccount/apis/authentication/v1beta1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
Expand All @@ -32,7 +34,10 @@ func TestReconcile(t *testing.T) {
token2 := "token2"
ca1 := "ca1"
ca2 := "ca2"
logger := klog.Background()
ctrl.SetLogger(logger)

now := time.Now()
cases := []struct {
name string
msa *authv1beta1.ManagedServiceAccount
Expand Down Expand Up @@ -106,7 +111,7 @@ func TestReconcile(t *testing.T) {
secret: newSecret(clusterName, msaName, token1, ca1),
msa: newManagedServiceAccount(clusterName, msaName).
withRotationValidity(500*time.Second).
withTokenSecretRef(msaName, time.Now().Add(300*time.Second)).
withTokenSecretRef(msaName, now.Add(300*time.Second), now).
build(),
newToken: token1,
validateFunc: func(t *testing.T, hubClient client.Client, actions []clienttesting.Action) {
Expand Down Expand Up @@ -157,7 +162,7 @@ func TestReconcile(t *testing.T) {
secret: newSecret(clusterName, msaName, token1, ca1),
msa: newManagedServiceAccount(clusterName, msaName).
withRotationValidity(500*time.Second).
withTokenSecretRef(msaName, time.Now().Add(80*time.Second)).
withTokenSecretRef(msaName, now.Add(10*time.Second), now.Add(-100*time.Second)).
build(),
newToken: token2,
validateFunc: func(t *testing.T, hubClient client.Client, actions []clienttesting.Action) {
Expand All @@ -179,14 +184,14 @@ func TestReconcile(t *testing.T) {
secret: newSecret(clusterName, msaName, token2, ca2),
msa: newManagedServiceAccount(clusterName, msaName).
withRotationValidity(500*time.Second).
withTokenSecretRef(msaName, time.Now().Add(300*time.Second)).
withTokenSecretRef(msaName, now.Add(300*time.Second), now).
build(),
newToken: token1,
isExistingTokenInvalid: true,
validateFunc: func(t *testing.T, hubClient client.Client, actions []clienttesting.Action) {
assertActions(t, actions, "create", // create serviceaccount
"create", // create tokenreview
"create", // create tokenreview
"create", // create token
)
assertToken(t, hubClient, clusterName, msaName, token1, ca1)
assertMSAConditions(t, hubClient, clusterName, msaName, []metav1.Condition{
Expand All @@ -206,7 +211,7 @@ func TestReconcile(t *testing.T) {
}),
msa: newManagedServiceAccount(clusterName, msaName).
withRotationValidity(500*time.Second).
withTokenSecretRef(msaName, time.Now().Add(300*time.Second)).
withTokenSecretRef(msaName, now.Add(300*time.Second), now).
build(),
newToken: token1,
isExistingTokenInvalid: true,
Expand All @@ -223,6 +228,7 @@ func TestReconcile(t *testing.T) {
},
}
for _, c := range cases {

t.Run(c.name, func(t *testing.T) {
// create fake kube client of the managed cluster
objs := []runtime.Object{}
Expand Down Expand Up @@ -394,11 +400,13 @@ func (b *managedServiceAccountBuilder) withRotationValidity(duration time.Durati
return b
}

func (b *managedServiceAccountBuilder) withTokenSecretRef(secretName string, expirationTimestamp time.Time) *managedServiceAccountBuilder {
func (b *managedServiceAccountBuilder) withTokenSecretRef(secretName string,
expiration, lastRefresh time.Time) *managedServiceAccountBuilder {
b.msa.Status.TokenSecretRef = &authv1beta1.SecretRef{
Name: secretName,
Name: secretName,
LastRefreshTimestamp: metav1.NewTime(lastRefresh),
}
timestamp := metav1.NewTime(expirationTimestamp)
timestamp := metav1.NewTime(expiration)
b.msa.Status.ExpirationTimestamp = &timestamp
return b
}
Expand Down Expand Up @@ -651,30 +659,30 @@ func TestReconcileCreateTokenByDefaultSecret(t *testing.T) {
}

func TestCheckTokenRefreshAfter(t *testing.T) {
now := metav1.Time{Time: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)}
now := metav1.Time{Time: time.Date(2024, 1, 1, 1, 0, 0, 0, time.UTC)}
cases := []struct {
name string
expiring *metav1.Time
validityDuration time.Duration
expiring metav1.Time
lastRefreshTimestamp metav1.Time
expectedRequeueAfter time.Duration
}{
{
name: "expired",
expiring: &metav1.Time{Time: time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)},
validityDuration: 10 * time.Hour,
expiring: metav1.Time{Time: time.Date(2024, 1, 1, 0, 0, 10, 0, time.UTC)},
lastRefreshTimestamp: metav1.Time{Time: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)},
expectedRequeueAfter: 5 * time.Second,
},
{
name: "not expired",
expiring: &metav1.Time{Time: time.Date(2024, 1, 1, 9, 0, 0, 0, time.UTC)},
validityDuration: 10 * time.Hour,
expiring: metav1.Time{Time: time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC)},
lastRefreshTimestamp: metav1.Time{Time: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)},
expectedRequeueAfter: 7*time.Hour + 5*time.Second,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {

ra := checkTokenRefreshAfter(now, c.expiring, c.validityDuration)
ra := checkTokenRefreshAfter(now, c.expiring, c.lastRefreshTimestamp)
if ra != c.expectedRequeueAfter {
t.Errorf("expected %v but got %v", c.expectedRequeueAfter, ra)
}
Expand Down

0 comments on commit e4ad803

Please sign in to comment.