From 351773d346711b1c7f849fcedadfc7d184add5b5 Mon Sep 17 00:00:00 2001 From: Renan Campos Date: Fri, 13 Sep 2024 14:36:41 -0400 Subject: [PATCH] fix: tolerate required delay between service account creation and configuration It was discovered through testing that service accounts created on GCP need a duration of time between creation and being referenced, otherwise a BadRequest error occurs. A delayed retry logic is introduced to ensure the service account is available before making additional configuration calls. --- cmd/ocm/gcp/gcp_client_shim.go | 27 +++++++++++++++++++++------ pkg/gcp/client.go | 13 ++++++++----- pkg/utils/helper.go | 13 +++++++++++++ 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/cmd/ocm/gcp/gcp_client_shim.go b/cmd/ocm/gcp/gcp_client_shim.go index f5454297..dc163db4 100644 --- a/cmd/ocm/gcp/gcp_client_shim.go +++ b/cmd/ocm/gcp/gcp_client_shim.go @@ -6,6 +6,7 @@ import ( "log" "reflect" "strings" + "time" "cloud.google.com/go/iam/admin/apiv1/adminpb" "github.com/googleapis/gax-go/v2/apierror" @@ -17,6 +18,7 @@ import ( "google.golang.org/grpc/codes" "github.com/openshift-online/ocm-cli/pkg/gcp" + "github.com/openshift-online/ocm-cli/pkg/utils" ) type GcpClientWifConfigShim interface { @@ -142,7 +144,7 @@ func (c *shim) CreateServiceAccounts( log *log.Logger, ) error { for _, serviceAccount := range c.wifConfig.Gcp().ServiceAccounts() { - if err := c.createServiceAccount(ctx, log, serviceAccount, c.wifConfig); err != nil { + if err := c.createServiceAccount(ctx, log, serviceAccount); err != nil { return err } if err := c.createOrUpdateRoles(ctx, log, serviceAccount.Roles()); err != nil { @@ -177,11 +179,10 @@ func (c *shim) createServiceAccount( ctx context.Context, log *log.Logger, serviceAccount *cmv1.WifServiceAccount, - wifConfig *cmv1.WifConfig, ) error { serviceAccountId := serviceAccount.ServiceAccountId() - serviceAccountName := wifConfig.DisplayName() + "-" + serviceAccountId - serviceAccountDesc := poolDescription + " for WIF config " + wifConfig.DisplayName() + serviceAccountName := c.wifConfig.DisplayName() + "-" + serviceAccountId + serviceAccountDesc := poolDescription + " for WIF config " + c.wifConfig.DisplayName() request := &adminpb.CreateServiceAccountRequest{ Name: fmt.Sprintf("projects/%s", c.wifConfig.Gcp().ProjectId()), @@ -191,7 +192,7 @@ func (c *shim) createServiceAccount( Description: serviceAccountDesc, }, } - _, err := c.gcpClient.CreateServiceAccount(ctx, request) + createdServiceAccount, err := c.gcpClient.CreateServiceAccount(ctx, request) if err != nil { pApiError, ok := err.(*apierror.APIError) if ok { @@ -204,7 +205,21 @@ func (c *shim) createServiceAccount( return errors.Wrap(err, "Failed to create IAM service account") } log.Printf("IAM service account %s created", serviceAccountId) - return nil + + // It was found that there is a window of time between when a service + // account creation call is made that the service account is not + // available in API calls. This call waits until the service account is + // available before contiuing. + getRequest := &adminpb.GetServiceAccountRequest{ + Name: fmt.Sprintf("projects/%s/serviceAccounts/%s", + c.wifConfig.Gcp().ProjectId(), + createdServiceAccount.UniqueId, + ), + } + return utils.DelayedRetry(func() error { + _, err := c.gcpClient.GetServiceAccount(ctx, getRequest) + return err + }, 10, 500*time.Millisecond) } func (c *shim) createOrUpdateRoles(ctx context.Context, log *log.Logger, roles []*cmv1.WifRole) error { diff --git a/pkg/gcp/client.go b/pkg/gcp/client.go index 371734e5..dd227e44 100644 --- a/pkg/gcp/client.go +++ b/pkg/gcp/client.go @@ -17,11 +17,6 @@ import ( ) type GcpClient interface { - /* - ListServiceAccounts(ctx context.Context, project string, filter func(s string) bool) ([]string, error) //nolint:lll - DeleteRole(context.Context, *adminpb.DeleteRoleRequest) (*adminpb.Role, error) - ListRoles(context.Context, *adminpb.ListRolesRequest) (*adminpb.ListRolesResponse, error) - */ AttachImpersonator(ctx context.Context, saId, projectId, impersonatorResourceId string) error AttachWorkloadIdentityPool(ctx context.Context, sa *cmv1.WifServiceAccount, poolId, projectId string) error CreateRole(context.Context, *adminpb.CreateRoleRequest) (*adminpb.Role, error) @@ -32,6 +27,7 @@ type GcpClient interface { DeleteWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.Operation, error) //nolint:lll GetProjectIamPolicy(ctx context.Context, projectName string, request *cloudresourcemanager.GetIamPolicyRequest) (*cloudresourcemanager.Policy, error) //nolint:lll GetRole(context.Context, *adminpb.GetRoleRequest) (*adminpb.Role, error) + GetServiceAccount(ctx context.Context, request *adminpb.GetServiceAccountRequest) (*adminpb.ServiceAccount, error) GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) //nolint:lll GetWorkloadIdentityProvider(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPoolProvider, error) //nolint:lll ProjectNumberFromId(ctx context.Context, projectId string) (int64, error) @@ -195,6 +191,13 @@ func (c *gcpClient) GetRole(ctx context.Context, request *adminpb.GetRoleRequest return c.iamClient.GetRole(ctx, request) } +func (c *gcpClient) GetServiceAccount( + ctx context.Context, + request *adminpb.GetServiceAccountRequest, +) (*adminpb.ServiceAccount, error) { + return c.iamClient.GetServiceAccount(ctx, request) +} + //nolint:lll func (c *gcpClient) GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) { return c.oldIamClient.Projects.Locations.WorkloadIdentityPools.Get(resource).Context(ctx).Do() diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 77a42ba1..070f975e 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -6,6 +6,7 @@ import ( "net/url" "os" "regexp" + "time" ) // the following regex defines four different patterns: @@ -90,3 +91,15 @@ func HasDuplicates(valSlice []string) (string, bool) { } return "", false } + +func DelayedRetry(f func() error, maxRetries int, delay time.Duration) error { + var err error + for i := 0; i < maxRetries; i++ { + err = f() + if err == nil { + return nil + } + time.Sleep(delay) + } + return fmt.Errorf("Reached max retries. Last error: %s", err.Error()) +}