From 0961981158472b5764a9ba43f008ee38223b53b3 Mon Sep 17 00:00:00 2001 From: Renan Campos Date: Fri, 13 Sep 2024 14:36:41 -0400 Subject: [PATCH] fix: tolerate required delay between service account creation and configuration It was discovered through testing that service accounts created on GCP need a duration of time between creation and being referenced, otherwise a BadRequest error occurs. A delayed retry logic is introduced to ensure the service account is available before making additional configuration calls. --- cmd/ocm/gcp/gcp-client-shim.go | 23 ++++++++++++++++++----- pkg/gcp/client.go | 13 ++++++++----- pkg/utils/helper.go | 13 +++++++++++++ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/cmd/ocm/gcp/gcp-client-shim.go b/cmd/ocm/gcp/gcp-client-shim.go index 2bee4652..fc5998be 100644 --- a/cmd/ocm/gcp/gcp-client-shim.go +++ b/cmd/ocm/gcp/gcp-client-shim.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "strings" + "time" "cloud.google.com/go/iam/admin/apiv1/adminpb" "github.com/googleapis/gax-go/v2/apierror" @@ -16,6 +17,12 @@ import ( "google.golang.org/grpc/codes" "github.com/openshift-online/ocm-cli/pkg/gcp" + "github.com/openshift-online/ocm-cli/pkg/utils" +) + +const ( + maxRetries = 10 + retryDelayMs = 500 ) type GcpClientWifConfigShim interface { @@ -287,11 +294,17 @@ func (c *shim) bindRolesToServiceAccount( serviceAccountId := serviceAccount.ServiceAccountId() roles := serviceAccount.Roles() - return c.bindRolesToPrincipal( - ctx, - fmt.Sprintf("serviceAccount:%s@%s.iam.gserviceaccount.com", serviceAccountId, c.wifConfig.Gcp().ProjectId()), - roles, - ) + // It was found that there is a window of time between when a service + // account creation call is made that the service account is not available + // in adjacent API calls. The call is therefore wrapped in retry logic to + // be robust to these types of synchronization issues. + return utils.DelayedRetry(func() error { + return c.bindRolesToPrincipal( + ctx, + fmt.Sprintf("serviceAccount:%s@%s.iam.gserviceaccount.com", serviceAccountId, c.wifConfig.Gcp().ProjectId()), + roles, + ) + }, maxRetries, retryDelayMs*time.Millisecond) } func (c *shim) bindRolesToGroup( diff --git a/pkg/gcp/client.go b/pkg/gcp/client.go index 371734e5..dd227e44 100644 --- a/pkg/gcp/client.go +++ b/pkg/gcp/client.go @@ -17,11 +17,6 @@ import ( ) type GcpClient interface { - /* - ListServiceAccounts(ctx context.Context, project string, filter func(s string) bool) ([]string, error) //nolint:lll - DeleteRole(context.Context, *adminpb.DeleteRoleRequest) (*adminpb.Role, error) - ListRoles(context.Context, *adminpb.ListRolesRequest) (*adminpb.ListRolesResponse, error) - */ AttachImpersonator(ctx context.Context, saId, projectId, impersonatorResourceId string) error AttachWorkloadIdentityPool(ctx context.Context, sa *cmv1.WifServiceAccount, poolId, projectId string) error CreateRole(context.Context, *adminpb.CreateRoleRequest) (*adminpb.Role, error) @@ -32,6 +27,7 @@ type GcpClient interface { DeleteWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.Operation, error) //nolint:lll GetProjectIamPolicy(ctx context.Context, projectName string, request *cloudresourcemanager.GetIamPolicyRequest) (*cloudresourcemanager.Policy, error) //nolint:lll GetRole(context.Context, *adminpb.GetRoleRequest) (*adminpb.Role, error) + GetServiceAccount(ctx context.Context, request *adminpb.GetServiceAccountRequest) (*adminpb.ServiceAccount, error) GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) //nolint:lll GetWorkloadIdentityProvider(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPoolProvider, error) //nolint:lll ProjectNumberFromId(ctx context.Context, projectId string) (int64, error) @@ -195,6 +191,13 @@ func (c *gcpClient) GetRole(ctx context.Context, request *adminpb.GetRoleRequest return c.iamClient.GetRole(ctx, request) } +func (c *gcpClient) GetServiceAccount( + ctx context.Context, + request *adminpb.GetServiceAccountRequest, +) (*adminpb.ServiceAccount, error) { + return c.iamClient.GetServiceAccount(ctx, request) +} + //nolint:lll func (c *gcpClient) GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) { return c.oldIamClient.Projects.Locations.WorkloadIdentityPools.Get(resource).Context(ctx).Do() diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 77a42ba1..070f975e 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -6,6 +6,7 @@ import ( "net/url" "os" "regexp" + "time" ) // the following regex defines four different patterns: @@ -90,3 +91,15 @@ func HasDuplicates(valSlice []string) (string, bool) { } return "", false } + +func DelayedRetry(f func() error, maxRetries int, delay time.Duration) error { + var err error + for i := 0; i < maxRetries; i++ { + err = f() + if err == nil { + return nil + } + time.Sleep(delay) + } + return fmt.Errorf("Reached max retries. Last error: %s", err.Error()) +}