diff --git a/Makefile b/Makefile index 2668bdcf10f..e8308a06ad7 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,11 @@ test-cleanup-azure-resources: az group delete --name $$rgname --no-wait --yes; \ done + for rgname in `az group list --query "[*].[name]" -o table | grep 'rg-prime$$' `; do \ + echo "$$rgname will be deleted"; \ + az group delete --name $$rgname --no-wait --yes; \ + done + # Build the docker image docker-build: docker build . -t ${IMG} ${ARGS} diff --git a/controllers/azuresql_combined_test.go b/controllers/azuresql_combined_test.go index 79643f945f6..f957de77125 100644 --- a/controllers/azuresql_combined_test.go +++ b/controllers/azuresql_combined_test.go @@ -7,6 +7,7 @@ package controllers import ( "context" + "fmt" "strings" "testing" @@ -112,7 +113,7 @@ func TestAzureSqlServerCombinedHappyPath(t *testing.T) { EnsureInstance(ctx, t, tc, sqlDatabaseInstance1) }) - t.Run("set up second database in primary server using sku with maxsizebytes", func(t *testing.T) { + t.Run("set up second database in primary server using sku with maxsizebytes, then update it to use a different SKU", func(t *testing.T) { t.Parallel() maxSize := resource.MustParse("500Mi") @@ -135,6 +136,48 @@ func TestAzureSqlServerCombinedHappyPath(t *testing.T) { } EnsureInstance(ctx, t, tc, sqlDatabaseInstance2) + + namespacedName := types.NamespacedName{Name: sqlDatabaseName2, Namespace: "default"} + err = tc.k8sClient.Get(ctx, namespacedName, sqlDatabaseInstance2) + assert.Equal(nil, err, "get sql database in k8s") + + originalHash := sqlDatabaseInstance2.Status.SpecHash + + sqlDatabaseInstance2.Spec.Sku = &v1beta1.SqlDatabaseSku{ + Name: "Basic", + Tier: "Basic", + } + maxSizeMb := 100 + maxSize = resource.MustParse(fmt.Sprintf("%dMi", maxSizeMb)) + sqlDatabaseInstance2.Spec.MaxSize = &maxSize + + err = tc.k8sClient.Update(ctx, sqlDatabaseInstance2) + assert.Equal(nil, err, "updating sql database in k8s") + + assert.Eventually(func() bool { + db := &v1beta1.AzureSqlDatabase{} + err = tc.k8sClient.Get(ctx, namespacedName, db) + assert.Equal(nil, err, "err getting DB from k8s") + return originalHash != db.Status.SpecHash + }, tc.timeout, tc.retry, "wait for sql database to be updated in k8s") + + assert.Eventually(func() bool { + db, err := tc.sqlDbManager.GetDB(ctx, rgName, sqlServerName, sqlDatabaseName2) + assert.Equal(nil, err, "err getting DB fromAzure") + return db.Sku.Name != nil && *db.Sku.Name == "Basic" + }, tc.timeout, tc.retry, "wait for sql database Sku.Name to be updated in azure") + + assert.Eventually(func() bool { + db, err := tc.sqlDbManager.GetDB(ctx, rgName, sqlServerName, sqlDatabaseName2) + assert.Equal(nil, err, "err getting DB fromAzure") + return db.Sku.Tier != nil && *db.Sku.Tier == "Basic" + }, tc.timeout, tc.retry, "wait for sql database Sku.Tier to be updated in azure") + + assert.Eventually(func() bool { + db, err := tc.sqlDbManager.GetDB(ctx, rgName, sqlServerName, sqlDatabaseName2) + assert.Equal(nil, err, "err getting DB fromAzure") + return db.MaxSizeBytes != nil && *db.MaxSizeBytes == int64(maxSizeMb)*int64(1024)*int64(1024) + }, tc.timeout, tc.retry, "wait for sql database MaxSizeBytes to be updated in azure") }) // Create FirewallRules --------------------------------------- diff --git a/controllers/suite_test.go b/controllers/suite_test.go index acafff6f731..6191c07087e 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -6,6 +6,7 @@ package controllers import ( "context" "fmt" + resourcemanagersqldb "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqldb" "log" "net/http" "os" @@ -24,7 +25,6 @@ import ( resourcemanagerapimgmt "github.com/Azure/azure-service-operator/pkg/resourcemanager/apim/apimgmt" resourcemanagerappinsights "github.com/Azure/azure-service-operator/pkg/resourcemanager/appinsights" resourcemanagersqlaction "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlaction" - resourcemanagersqldb "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqldb" resourcemanagersqlfailovergroup "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlfailovergroup" resourcemanagersqlfirewallrule "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlfirewallrule" resourcemanagersqlmanageduser "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlmanageduser" @@ -151,6 +151,7 @@ func setup() error { keyVaultManager := resourcemanagerkeyvaults.NewAzureKeyVaultManager(k8sManager.GetScheme()) eventhubClient := resourcemanagereventhub.NewEventhubClient(secretClient, scheme.Scheme) consumerGroupClient := resourcemanagereventhub.NewConsumerGroupClient() + azureSqlDatabaseManager := resourcemanagersqldb.NewAzureSqlDbManager() timeout = time.Second * 780 @@ -398,7 +399,7 @@ func setup() error { err = (&AzureSqlDatabaseReconciler{ Reconciler: &AsyncReconciler{ Client: k8sManager.GetClient(), - AzureClient: resourcemanagersqldb.NewAzureSqlDbManager(), + AzureClient: azureSqlDatabaseManager, Telemetry: telemetry.InitializeTelemetryDefault( "AzureSqlDb", ctrl.Log.WithName("controllers").WithName("AzureSqlDb"), @@ -856,6 +857,7 @@ func setup() error { eventhubClient: eventhubClient, resourceGroupManager: resourceGroupManager, keyVaultManager: keyVaultManager, + sqlDbManager: azureSqlDatabaseManager, timeout: timeout, timeoutFast: time.Minute * 3, retry: time.Second * 3, diff --git a/pkg/errhelp/errors.go b/pkg/errhelp/errors.go index b369e6d0ab5..63c00ddca26 100644 --- a/pkg/errhelp/errors.go +++ b/pkg/errhelp/errors.go @@ -58,6 +58,7 @@ const ( NameNotAvailable = "NameNotAvailable" PublicIPIdleTimeoutIsOutOfRange = "PublicIPIdleTimeoutIsOutOfRange" InvalidRequestContent = "InvalidRequestContent" + InvalidMaxSizeTierCombination = "InvalidMaxSizeTierCombination" InternalServerError = "InternalServerError" NetworkAclsValidationFailure = "NetworkAclsValidationFailure" SubnetHasServiceEndpointWithInvalidServiceName = "SubnetHasServiceEndpointWithInvalidServiceName" diff --git a/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb.go b/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb.go index 91d59024658..39625e0dd4c 100644 --- a/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb.go +++ b/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb.go @@ -18,6 +18,9 @@ import ( type AzureSqlDbManager struct { } +// Ensure we implement the interface we expect +var _ SqlDbManager = &AzureSqlDbManager{} + func NewAzureSqlDbManager() *AzureSqlDbManager { return &AzureSqlDbManager{} } @@ -52,30 +55,30 @@ func (_ *AzureSqlDbManager) GetDB(ctx context.Context, resourceGroupName string, } // DeleteDB deletes a DB -func (sdk *AzureSqlDbManager) DeleteDB(ctx context.Context, resourceGroupName string, serverName string, databaseName string) (result *http.Response, err error) { - // TODO: Probably shouldn't return a response at all in the err case here (all through this function) - result = &http.Response{ - StatusCode: 200, - } +func (sdk *AzureSqlDbManager) DeleteDB( + ctx context.Context, + resourceGroupName string, + serverName string, + databaseName string) (future *sql.DatabasesDeleteFuture, err error) { // check to see if the server exists, if it doesn't then short-circuit server, err := sdk.GetServer(ctx, resourceGroupName, serverName) if err != nil || *server.State != "Ready" { - return result, nil + return nil, nil } // check to see if the db exists, if it doesn't then short-circuit _, err = sdk.GetDB(ctx, resourceGroupName, serverName, databaseName) if err != nil { - return result, nil + return nil, nil } dbClient, err := azuresqlshared.GetGoDbClient() if err != nil { - return result, err + return nil, err } - future, err := dbClient.Delete( + result, err := dbClient.Delete( ctx, resourceGroupName, serverName, @@ -83,23 +86,24 @@ func (sdk *AzureSqlDbManager) DeleteDB(ctx context.Context, resourceGroupName st ) if err != nil { - return result, err + return nil, err } - return future.Response(), err + return &result, err } // CreateOrUpdateDB creates or updates a DB in Azure -func (_ *AzureSqlDbManager) CreateOrUpdateDB(ctx context.Context, resourceGroupName string, location string, serverName string, tags map[string]*string, properties azuresqlshared.SQLDatabaseProperties) (*http.Response, error) { - - // TODO: Probably shouldn't return a response at all in the err case here (all through this function) - result := &http.Response{ - StatusCode: 0, - } +func (_ *AzureSqlDbManager) CreateOrUpdateDB( + ctx context.Context, + resourceGroupName string, + location string, + serverName string, + tags map[string]*string, + properties azuresqlshared.SQLDatabaseProperties) (string, *sql.Database, error) { dbClient, err := azuresqlshared.GetGoDbClient() if err != nil { - return result, err + return "", nil, err } dbProp := azuresqlshared.SQLDatabasePropertiesToDatabase(properties) @@ -118,10 +122,12 @@ func (_ *AzureSqlDbManager) CreateOrUpdateDB(ctx context.Context, resourceGroupN }) if err != nil { - return result, err + return "", nil, err } - return future.Response(), err + result, err := future.Result(dbClient) + + return future.PollingURL(), &result, err } // AddLongTermRetention enables / disables long term retention diff --git a/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_manager.go b/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_manager.go index bd9e660b0fa..6aa078072bd 100644 --- a/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_manager.go +++ b/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_manager.go @@ -11,7 +11,6 @@ import ( azuresqlshared "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlshared" "github.com/Azure/azure-service-operator/pkg/resourcemanager" - "github.com/Azure/go-autorest/autorest" ) // SqlDbManager is the client for the resource manager for SQL databases @@ -21,12 +20,12 @@ type SqlDbManager interface { location string, serverName string, tags map[string]*string, - properties azuresqlshared.SQLDatabaseProperties) (*http.Response, error) + properties azuresqlshared.SQLDatabaseProperties) (pollingUrl string, db *sql.Database, err error) DeleteDB(ctx context.Context, resourceGroupName string, serverName string, - databaseName string) (result autorest.Response, err error) + databaseName string) (future *sql.DatabasesDeleteFuture, err error) GetDB(ctx context.Context, resourceGroupName string, diff --git a/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_reconcile.go b/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_reconcile.go index 174165466bc..4c5a8c7da4f 100644 --- a/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_reconcile.go +++ b/pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_reconcile.go @@ -6,6 +6,7 @@ package azuresqldb import ( "context" "fmt" + "github.com/Azure/azure-service-operator/pkg/resourcemanager/pollclient" "net/http" "strings" @@ -34,10 +35,6 @@ func (db *AzureSqlDbManager) Ensure(ctx context.Context, obj runtime.Object, opt return true, nil } - if instance.Status.SpecHash == "" { - instance.Status.SpecHash = hash - } - location := instance.Spec.Location groupName := instance.Spec.ResourceGroup server := instance.Spec.Server @@ -69,55 +66,96 @@ func (db *AzureSqlDbManager) Ensure(ctx context.Context, obj runtime.Object, opt instance.Status.Provisioning = true instance.Status.Provisioned = false - dbGet, err := db.GetDB(ctx, groupName, server, dbName) - if err == nil { - - // optionally set the long term retention policy - _, err = db.AddLongTermRetention(ctx, - groupName, - server, - dbName, - instance.Spec.WeeklyRetention, - instance.Spec.MonthlyRetention, - instance.Spec.YearlyRetention, - instance.Spec.WeekOfYear) + // Before we attempt to issue a new update, check if there is a previously ongoing update + if instance.Status.PollingURL != "" { + pClient := pollclient.NewPollClient() + res, err := pClient.Get(ctx, instance.Status.PollingURL) if err != nil { - failureErrors := []string{ - errhelp.LongTermRetentionPolicyInvalid, - } - instance.Status.Message = fmt.Sprintf("Azure DB long-term retention policy error: %s", errhelp.StripErrorIDs(err)) - azerr := errhelp.NewAzureErrorAzureError(err) - if helpers.ContainsString(failureErrors, azerr.Type) { - instance.Status.Provisioning = false - instance.Status.Provisioned = false + return false, err + } + + if res.Status == pollclient.LongRunningOperationPollStatusFailed { + instance.Status.Message = res.Error.Error() + + // TODO: Unfortunate that this is duplicated below... this stems from a race condition where + // TODO: depending on when the LRO status is updated, we either notice it below or right here + if res.Error.Code == errhelp.InvalidMaxSizeTierCombination { instance.Status.FailedProvisioning = true + instance.Status.Provisioning = false return true, nil - } else { - return false, err } + + // There can be intermediate errors and various other things that cause requests to fail, so we need to try again + return false, nil } - // db exists, we have successfully provisioned everything - instance.Status.Provisioning = false - instance.Status.Provisioned = true - instance.Status.FailedProvisioning = false - instance.Status.State = string(dbGet.Status) - instance.Status.Message = resourcemanager.SuccessMsg - instance.Status.ResourceId = *dbGet.ID - return true, nil - } - instance.Status.Message = fmt.Sprintf("AzureSqlDb Get error %s", err.Error()) - azerr := errhelp.NewAzureErrorAzureError(err) - requeuErrors := []string{ - errhelp.ParentNotFoundErrorCode, - errhelp.ResourceGroupNotFoundErrorCode, + if res.Status == "InProgress" { + // We're waiting for an async op... keep waiting + return false, nil + } + + // Previous operation was a success + if res.Status == pollclient.LongRunningOperationPollStatusSucceeded { + instance.Status.Provisioning = false + instance.Status.SpecHash = hash + instance.Status.PollingURL = "" + } } - if helpers.ContainsString(requeuErrors, azerr.Type) { - instance.Status.Provisioning = false - return false, nil + + // No point in checking the status of the DB if our spec hashes don't match, + // just seek to new target and check later + if hash == instance.Status.SpecHash { + dbGet, err := db.GetDB(ctx, groupName, server, dbName) + if err == nil { + // optionally set the long term retention policy + _, err = db.AddLongTermRetention(ctx, + groupName, + server, + dbName, + instance.Spec.WeeklyRetention, + instance.Spec.MonthlyRetention, + instance.Spec.YearlyRetention, + instance.Spec.WeekOfYear) + if err != nil { + failureErrors := []string{ + errhelp.LongTermRetentionPolicyInvalid, + } + instance.Status.Message = fmt.Sprintf("Azure DB long-term retention policy error: %s", errhelp.StripErrorIDs(err)) + azerr := errhelp.NewAzureErrorAzureError(err) + if helpers.ContainsString(failureErrors, azerr.Type) { + instance.Status.Provisioning = false + instance.Status.Provisioned = false + instance.Status.FailedProvisioning = true + return true, nil + } else { + return false, err + } + } + + // db exists, we have successfully provisioned everything + instance.Status.Provisioning = false + instance.Status.Provisioned = true + instance.Status.FailedProvisioning = false + instance.Status.State = string(dbGet.Status) + instance.Status.Message = resourcemanager.SuccessMsg + instance.Status.ResourceId = *dbGet.ID + return true, nil + } else { + instance.Status.Message = fmt.Sprintf("AzureSqlDb Get error %s", err.Error()) + azerr := errhelp.NewAzureErrorAzureError(err) + requeuErrors := []string{ + errhelp.ParentNotFoundErrorCode, + errhelp.ResourceGroupNotFoundErrorCode, + } + if helpers.ContainsString(requeuErrors, azerr.Type) { + instance.Status.Provisioning = false + return false, nil + } + } } - resp, err := db.CreateOrUpdateDB(ctx, groupName, location, server, labels, azureSQLDatabaseProperties) + pollingUrl, _, err := db.CreateOrUpdateDB(ctx, groupName, location, server, labels, azureSQLDatabaseProperties) + if err != nil { instance.Status.Message = err.Error() azerr := errhelp.NewAzureErrorAzureError(err) @@ -125,10 +163,18 @@ func (db *AzureSqlDbManager) Ensure(ctx context.Context, obj runtime.Object, opt // handle errors // resource request has been sent to ARM if azerr.Type == errhelp.AsyncOpIncompleteError { + instance.Status.Message = "Resource request successfully submitted to Azure" instance.Status.Provisioning = true + instance.Status.PollingURL = pollingUrl return false, nil } + if azerr.Type == errhelp.InvalidMaxSizeTierCombination { + instance.Status.FailedProvisioning = true + instance.Status.Provisioning = false + return true, nil + } + // the errors that can arise during reconcilliation where we simply requeue catch := []string{ errhelp.ResourceGroupNotFoundErrorCode, @@ -146,19 +192,13 @@ func (db *AzureSqlDbManager) Ensure(ctx context.Context, obj runtime.Object, opt return false, nil } - // assertion that a 404 error implies that the Azure SQL server hasn't been provisioned yet - if resp != nil && resp.StatusCode == 404 { - instance.Status.Message = fmt.Sprintf("Waiting for SQL Server %s to provision", server) - instance.Status.Provisioning = false - return false, nil - } - switch azerr.Code { case http.StatusBadRequest: instance.Status.FailedProvisioning = true instance.Status.Provisioning = false return true, nil case http.StatusNotFound: + instance.Status.Message = fmt.Sprintf("Waiting for SQL DB %s to provision", dbName) instance.Status.Provisioning = false return false, nil } diff --git a/pkg/resourcemanager/azuresql/azuresqlserver/azuresqlserver_reconcile.go b/pkg/resourcemanager/azuresql/azuresqlserver/azuresqlserver_reconcile.go index c5f62d1d358..454e1f24a87 100644 --- a/pkg/resourcemanager/azuresql/azuresqlserver/azuresqlserver_reconcile.go +++ b/pkg/resourcemanager/azuresql/azuresqlserver/azuresqlserver_reconcile.go @@ -134,7 +134,7 @@ func (s *AzureSqlServerManager) Ensure(ctx context.Context, obj runtime.Object, return false, err } - if res.Status == "Failed" { + if res.Status == pollclient.LongRunningOperationPollStatusFailed { instance.Status.Message = res.Error.Error() instance.Status.Provisioning = false return true, nil diff --git a/pkg/resourcemanager/cosmosdbs/cosmosdb_reconcile.go b/pkg/resourcemanager/cosmosdbs/cosmosdb_reconcile.go index 3cc5cc77e3b..8675e305df8 100644 --- a/pkg/resourcemanager/cosmosdbs/cosmosdb_reconcile.go +++ b/pkg/resourcemanager/cosmosdbs/cosmosdb_reconcile.go @@ -58,7 +58,7 @@ func (m *AzureCosmosDBManager) Ensure(ctx context.Context, obj runtime.Object, o instance.Status.PollingURL = "" - if pollResponse.Status == "Failed" { + if pollResponse.Status == pollclient.LongRunningOperationPollStatusFailed { instance.Status.Provisioning = false instance.Status.Message = pollResponse.Error.Error() return true, nil diff --git a/pkg/resourcemanager/mysql/server/reconcile.go b/pkg/resourcemanager/mysql/server/reconcile.go index 3ec84275e01..627b9232986 100644 --- a/pkg/resourcemanager/mysql/server/reconcile.go +++ b/pkg/resourcemanager/mysql/server/reconcile.go @@ -89,7 +89,7 @@ func (m *MySQLServerClient) Ensure(ctx context.Context, obj runtime.Object, opts return false, err } - if res.Status == "Failed" { + if res.Status == pollclient.LongRunningOperationPollStatusFailed { instance.Status.Provisioning = false instance.Status.RequestedAt = nil ignore := []string{ diff --git a/pkg/resourcemanager/pollclient/pollclient.go b/pkg/resourcemanager/pollclient/pollclient.go index 9aa7927f909..a4e337b1718 100644 --- a/pkg/resourcemanager/pollclient/pollclient.go +++ b/pkg/resourcemanager/pollclient/pollclient.go @@ -14,6 +14,12 @@ import ( "github.com/Azure/go-autorest/tracing" ) +const ( + LongRunningOperationPollStatusFailed = "Failed" + LongRunningOperationPollStatusSucceeded = "Succeeded" + LongRunningOperationPollStatusCancelled = "Cancelled" +) + const fqdn = "github.com/Azure/azure-service-operator/pollingclient" // BaseClient was modeled off some of the other Baseclients in the go sdk and contains an autorest client @@ -51,8 +57,8 @@ func NewWithBaseURI(baseURI string, subscriptionID string) BaseClient { } } -// PollRespons models the expected response from the poll url -type PollRespons struct { +// PollResponse models the expected response from the poll url +type PollResponse struct { autorest.Response `json:"-"` Name string `json:"name,omitempty"` Status string `json:"status,omitempty"` @@ -60,7 +66,7 @@ type PollRespons struct { } // Get takes a context and a polling url and performs a Get request on the url -func (client PollClient) Get(ctx context.Context, pollURL string) (result PollRespons, err error) { +func (client PollClient) Get(ctx context.Context, pollURL string) (result PollResponse, err error) { if tracing.IsEnabled() { ctx = tracing.StartSpan(ctx, fqdn+"/PollClient.Get") defer func() { @@ -109,7 +115,7 @@ func (client PollClient) GetSender(req *http.Request) (*http.Response, error) { // GetResponder handles the response to the Get request. The method always // closes the http.Response Body. -func (client PollClient) GetResponder(resp *http.Response) (result PollRespons, err error) { +func (client PollClient) GetResponder(resp *http.Response) (result PollResponse, err error) { err = autorest.Respond( resp, client.ByInspecting(), diff --git a/pkg/resourcemanager/psql/server/server_reconcile.go b/pkg/resourcemanager/psql/server/server_reconcile.go index dd9656d807c..6b04d1bb2fa 100644 --- a/pkg/resourcemanager/psql/server/server_reconcile.go +++ b/pkg/resourcemanager/psql/server/server_reconcile.go @@ -103,7 +103,7 @@ func (p *PSQLServerClient) Ensure(ctx context.Context, obj runtime.Object, opts return false, err } - if res.Status == "Failed" { + if res.Status == pollclient.LongRunningOperationPollStatusFailed { instance.Status.Provisioning = false instance.Status.RequestedAt = nil ignore := []string{ diff --git a/pkg/resourcemanager/storages/storageaccount/storageaccount_reconcile.go b/pkg/resourcemanager/storages/storageaccount/storageaccount_reconcile.go index ca49ebaca28..941e62fa2f4 100644 --- a/pkg/resourcemanager/storages/storageaccount/storageaccount_reconcile.go +++ b/pkg/resourcemanager/storages/storageaccount/storageaccount_reconcile.go @@ -65,7 +65,7 @@ func (sa *azureStorageManager) Ensure(ctx context.Context, obj runtime.Object, o return false, err } - if res.Status == "Failed" { + if res.Status == pollclient.LongRunningOperationPollStatusFailed { instance.Status.Message = res.Error.Error() instance.Status.Provisioning = false return true, nil