From 24a04f6ca04ab35ae597c7a7afe86897cdfa5038 Mon Sep 17 00:00:00 2001 From: christian-calabrese Date: Wed, 20 Nov 2024 15:19:51 +0100 Subject: [PATCH] [CES-517] - Cosmos account migration datafactory resources (#1315) --- src/_modules/data_factory_cosmos/data.tf | 25 +++++++++ src/_modules/data_factory_cosmos/datasets.tf | 17 ++++++ src/_modules/data_factory_cosmos/iam.tf | 17 ++++++ .../data_factory_cosmos/linked_services.tf | 17 ++++++ src/_modules/data_factory_cosmos/locals.tf | 14 +++++ src/_modules/data_factory_cosmos/main.tf | 21 ++++++++ src/_modules/data_factory_cosmos/network.tf | 13 +++++ src/_modules/data_factory_cosmos/outputs.tf | 10 ++++ src/_modules/data_factory_cosmos/pipelines.tf | 35 +++++++++++++ src/_modules/data_factory_cosmos/variables.tf | 52 +++++++++++++++++++ .../datasets_containers.tf | 4 +- .../datasets_tables.tf | 4 +- .../pipeline_containers.tf | 4 +- .../pipeline_tables.tf | 4 +- src/migration/prod/README.md | 2 + src/migration/prod/italynorth.tf | 19 +++++++ src/migration/prod/locals.tf | 22 ++++++-- src/migration/prod/outputs.tf | 6 ++- 18 files changed, 273 insertions(+), 13 deletions(-) create mode 100644 src/_modules/data_factory_cosmos/data.tf create mode 100644 src/_modules/data_factory_cosmos/datasets.tf create mode 100644 src/_modules/data_factory_cosmos/iam.tf create mode 100644 src/_modules/data_factory_cosmos/linked_services.tf create mode 100644 src/_modules/data_factory_cosmos/locals.tf create mode 100644 src/_modules/data_factory_cosmos/main.tf create mode 100644 src/_modules/data_factory_cosmos/network.tf create mode 100644 src/_modules/data_factory_cosmos/outputs.tf create mode 100644 src/_modules/data_factory_cosmos/pipelines.tf create mode 100644 src/_modules/data_factory_cosmos/variables.tf diff --git a/src/_modules/data_factory_cosmos/data.tf b/src/_modules/data_factory_cosmos/data.tf new file mode 100644 index 000000000..3ebeb1dcf --- /dev/null +++ b/src/_modules/data_factory_cosmos/data.tf @@ -0,0 +1,25 @@ +data "azurerm_subscription" "current" { +} + +data "azurerm_cosmosdb_account" "source" { + name = var.cosmos_accounts.source.name + resource_group_name = var.cosmos_accounts.source.resource_group_name +} + +data "azurerm_cosmosdb_account" "target" { + name = var.cosmos_accounts.target.name + resource_group_name = var.cosmos_accounts.target.resource_group_name +} + +data "azapi_resource_list" "databases" { + type = "Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2024-05-15" + parent_id = data.azurerm_cosmosdb_account.source.id + response_export_values = ["*"] +} + +data "azapi_resource_list" "containers" { + for_each = local.databases + type = "Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-05-15" + parent_id = each.key + response_export_values = ["*"] +} \ No newline at end of file diff --git a/src/_modules/data_factory_cosmos/datasets.tf b/src/_modules/data_factory_cosmos/datasets.tf new file mode 100644 index 000000000..f3012b110 --- /dev/null +++ b/src/_modules/data_factory_cosmos/datasets.tf @@ -0,0 +1,17 @@ +resource "azurerm_data_factory_dataset_cosmosdb_sqlapi" "source_dataset" { + for_each = local.containers_per_database + name = replace(each.value.container.name, "/[$-]/", "_") + data_factory_id = var.data_factory_id + folder = "cosmos/account=${var.cosmos_accounts.source.name}/db=${each.value.container.database_name}/source" + linked_service_name = azurerm_data_factory_linked_service_cosmosdb.source_linked_service_cosmos[each.value.container.database_id].name + collection_name = each.value.container.name +} + +resource "azurerm_data_factory_dataset_cosmosdb_sqlapi" "target_dataset" { + for_each = local.containers_per_database + name = replace(each.value.container.name, "/[$-]/", "_") + data_factory_id = var.data_factory_id + folder = "cosmos/account=${var.cosmos_accounts.target.name}/db=${each.value.container.database_name}/target" + linked_service_name = azurerm_data_factory_linked_service_cosmosdb.target_linked_service_cosmos[each.value.container.database_id].name + collection_name = each.value.container.name +} \ No newline at end of file diff --git a/src/_modules/data_factory_cosmos/iam.tf b/src/_modules/data_factory_cosmos/iam.tf new file mode 100644 index 000000000..91cb6fd54 --- /dev/null +++ b/src/_modules/data_factory_cosmos/iam.tf @@ -0,0 +1,17 @@ +module "roles" { + source = "github.com/pagopa/dx//infra/modules/azure_role_assignments?ref=main" + principal_id = var.data_factory_principal_id + + cosmos = [ + { + account_name = var.cosmos_accounts.source.name + resource_group_name = var.cosmos_accounts.source.resource_group_name + role = "reader" + }, + { + account_name = var.cosmos_accounts.target.name + resource_group_name = var.cosmos_accounts.target.resource_group_name + role = "writer" + } + ] +} \ No newline at end of file diff --git a/src/_modules/data_factory_cosmos/linked_services.tf b/src/_modules/data_factory_cosmos/linked_services.tf new file mode 100644 index 000000000..d7cc9ad53 --- /dev/null +++ b/src/_modules/data_factory_cosmos/linked_services.tf @@ -0,0 +1,17 @@ +resource "azurerm_data_factory_linked_service_cosmosdb" "source_linked_service_cosmos" { + for_each = local.databases + name = "${var.cosmos_accounts.source.name}-${each.value}-cosmos" + data_factory_id = var.data_factory_id + account_endpoint = data.azurerm_cosmosdb_account.source.endpoint + account_key = data.azurerm_cosmosdb_account.source.primary_key + database = each.value +} + +resource "azurerm_data_factory_linked_service_cosmosdb" "target_linked_service_cosmos" { + for_each = local.databases + name = "${var.cosmos_accounts.target.name}-${each.value}-cosmos" + data_factory_id = var.data_factory_id + account_endpoint = data.azurerm_cosmosdb_account.target.endpoint + account_key = data.azurerm_cosmosdb_account.target.primary_key + database = each.value +} \ No newline at end of file diff --git a/src/_modules/data_factory_cosmos/locals.tf b/src/_modules/data_factory_cosmos/locals.tf new file mode 100644 index 000000000..569041ed3 --- /dev/null +++ b/src/_modules/data_factory_cosmos/locals.tf @@ -0,0 +1,14 @@ +locals { + azapi_databases = jsondecode(data.azapi_resource_list.databases.output) + databases = length(var.what_to_migrate.databases) > 0 ? { for database in var.what_to_migrate.databases : "${data.azurerm_cosmosdb_account.source.id}/sqlDatabases/${database}" => database } : { for database in local.azapi_databases.value : database.id => reverse(split("/", database.id))[0] } + + containers = { for id, name in local.databases : name => [for container in jsondecode(data.azapi_resource_list.containers[id].output).value : { name = container.name, database_id = id, database_name = name }] } + + containers_per_database = { + for pair in flatten([ + for database, containers in local.containers : + [for container in containers : { database = database, container = container }] + ]) : + "${pair.database}|${pair.container.name}" => pair + } +} diff --git a/src/_modules/data_factory_cosmos/main.tf b/src/_modules/data_factory_cosmos/main.tf new file mode 100644 index 000000000..70b9d5218 --- /dev/null +++ b/src/_modules/data_factory_cosmos/main.tf @@ -0,0 +1,21 @@ +terraform { + required_providers { + azapi = { + source = "Azure/azapi" + version = "<= 1.15.0" + } + } +} + +module "naming_convention" { + source = "github.com/pagopa/dx//infra/modules/azure_naming_convention/?ref=main" + + environment = { + prefix = var.environment.prefix + env_short = var.environment.env_short + location = var.environment.location + domain = var.environment.domain + app_name = var.environment.app_name + instance_number = var.environment.instance_number + } +} diff --git a/src/_modules/data_factory_cosmos/network.tf b/src/_modules/data_factory_cosmos/network.tf new file mode 100644 index 000000000..b31b67dec --- /dev/null +++ b/src/_modules/data_factory_cosmos/network.tf @@ -0,0 +1,13 @@ +resource "azurerm_data_factory_managed_private_endpoint" "cosmos_source" { + name = "${module.naming_convention.prefix}-adf-${var.cosmos_accounts.source.name}-cosmos-${module.naming_convention.suffix}" + data_factory_id = var.data_factory_id + target_resource_id = data.azurerm_cosmosdb_account.source.id + subresource_name = "Sql" +} + +resource "azurerm_data_factory_managed_private_endpoint" "cosmos_target" { + name = "${module.naming_convention.prefix}-adf-${var.cosmos_accounts.target.name}-cosmos-${module.naming_convention.suffix}" + data_factory_id = var.data_factory_id + target_resource_id = data.azurerm_cosmosdb_account.target.id + subresource_name = "Sql" +} diff --git a/src/_modules/data_factory_cosmos/outputs.tf b/src/_modules/data_factory_cosmos/outputs.tf new file mode 100644 index 000000000..71e9613da --- /dev/null +++ b/src/_modules/data_factory_cosmos/outputs.tf @@ -0,0 +1,10 @@ +output "pipelines" { + value = { + for pipeline in azurerm_data_factory_pipeline.pipeline + : pipeline.name => { + id = pipeline.id + name = pipeline.name + url = "https://adf.azure.com/en/authoring/pipeline/${pipeline.name}?factory=${pipeline.data_factory_id}" + } + } +} diff --git a/src/_modules/data_factory_cosmos/pipelines.tf b/src/_modules/data_factory_cosmos/pipelines.tf new file mode 100644 index 000000000..dd79b9ce4 --- /dev/null +++ b/src/_modules/data_factory_cosmos/pipelines.tf @@ -0,0 +1,35 @@ +resource "azurerm_data_factory_pipeline" "pipeline" { + for_each = local.containers_per_database + name = replace(each.value.container.name, "/[$-]/", "_") + data_factory_id = var.data_factory_id + description = "Copy data from Cosmos (${var.cosmos_accounts.source.name}) to (${var.cosmos_accounts.target.name})" + folder = "cosmos/account=${var.cosmos_accounts.source.name}/db=${each.value.container.database_name}" + + activities_json = jsonencode([ + { + name = "CopyFromCosmosToCosmos" + type = "Copy" + inputs = [ + { + referenceName = azurerm_data_factory_dataset_cosmosdb_sqlapi.source_dataset[each.key].name + type = "DatasetReference" + } + ] + outputs = [ + { + referenceName = azurerm_data_factory_dataset_cosmosdb_sqlapi.target_dataset[each.key].name + type = "DatasetReference" + } + ] + typeProperties = { + source = { + type = "CosmosDbSqlApiSource" + } + sink = { + type = "CosmosDbSqlApiSink" + writeBehavior = var.cosmos_accounts.target.write_behavior + } + } + } + ]) +} \ No newline at end of file diff --git a/src/_modules/data_factory_cosmos/variables.tf b/src/_modules/data_factory_cosmos/variables.tf new file mode 100644 index 000000000..6173f0ca0 --- /dev/null +++ b/src/_modules/data_factory_cosmos/variables.tf @@ -0,0 +1,52 @@ +variable "environment" { + type = object({ + prefix = string + env_short = string + location = string + domain = optional(string) + app_name = string + instance_number = string + }) + + description = "Values which are used to generate resource names and location short names. They are all mandatory except for domain, which should not be used only in the case of a resource used by multiple domains." +} + +variable "data_factory_id" { + description = "Data Factory id where to create resources." + type = string +} + +variable "data_factory_principal_id" { + description = "Data Factory principal id to grant access to." + type = string +} + +variable "cosmos_accounts" { + type = object({ + source = object({ + name = string + resource_group_name = string + }) + + target = object({ + name = string + resource_group_name = string + write_behavior = optional(string, "upsert") + }) + }) + + description = "Cosmos accounts to migrate. The target account must have a write_behavior defined. The write_behavior must be one of the following values: insert, upsert." + + validation { + condition = contains(["insert", "upsert"], var.cosmos_accounts.target.write_behavior) + error_message = "The write_behavior must be one of the following values: insert or upsert." + } +} + +variable "what_to_migrate" { + type = object({ + databases = optional(list(string), []) + }) + + description = "List of database names of the source cosmos db account to migrate. If no database names are provided, all of them are migrated." +} diff --git a/src/_modules/data_factory_storage_account/datasets_containers.tf b/src/_modules/data_factory_storage_account/datasets_containers.tf index 6e4080cc6..b43cf3e75 100644 --- a/src/_modules/data_factory_storage_account/datasets_containers.tf +++ b/src/_modules/data_factory_storage_account/datasets_containers.tf @@ -1,9 +1,9 @@ resource "azurerm_data_factory_custom_dataset" "source_dataset_container" { for_each = toset(local.containers) - name = replace("${module.naming_convention.prefix}-adf-${var.storage_accounts.source.name}-${each.value}-blob-${module.naming_convention.suffix}", "/[$-]/", "_") + name = replace(each.value, "/[$-]/", "_") data_factory_id = var.data_factory_id type = "AzureBlob" - folder = "${var.storage_accounts.source.name}/source/blob" + folder = "storage/account=${var.storage_accounts.source.name}/source/blob" linked_service { name = azurerm_data_factory_linked_service_azure_blob_storage.source_linked_service_blob[0].name diff --git a/src/_modules/data_factory_storage_account/datasets_tables.tf b/src/_modules/data_factory_storage_account/datasets_tables.tf index 17a0dd8c3..63eeaacc6 100644 --- a/src/_modules/data_factory_storage_account/datasets_tables.tf +++ b/src/_modules/data_factory_storage_account/datasets_tables.tf @@ -1,9 +1,9 @@ resource "azurerm_data_factory_custom_dataset" "source_dataset_table" { for_each = toset(local.tables) - name = replace("${module.naming_convention.prefix}-adf-${var.storage_accounts.source.name}-${each.value}-table-${module.naming_convention.suffix}", "/[$-]/", "_") + name = replace(each.value, "/[$-]/", "_") data_factory_id = var.data_factory_id type = "AzureTable" - folder = "${var.storage_accounts.source.name}/source/table" + folder = "storage/account=${var.storage_accounts.source.name}/source/table" linked_service { name = azurerm_data_factory_linked_service_azure_table_storage.source_linked_service_table[0].name diff --git a/src/_modules/data_factory_storage_account/pipeline_containers.tf b/src/_modules/data_factory_storage_account/pipeline_containers.tf index 7828c686f..0c0881e00 100644 --- a/src/_modules/data_factory_storage_account/pipeline_containers.tf +++ b/src/_modules/data_factory_storage_account/pipeline_containers.tf @@ -1,8 +1,8 @@ resource "azurerm_data_factory_pipeline" "pipeline_container" { for_each = toset(local.containers) - name = replace("${module.naming_convention.prefix}-adf-${var.storage_accounts.source.name}-${each.value}-blob-${module.naming_convention.suffix}", "/[$-]/", "_") + name = replace(each.value, "/[$-]/", "_") data_factory_id = var.data_factory_id - folder = "${var.storage_accounts.source.name}/blob" + folder = "storage/account=${var.storage_accounts.source.name}/blob" activities_json = jsonencode( [ diff --git a/src/_modules/data_factory_storage_account/pipeline_tables.tf b/src/_modules/data_factory_storage_account/pipeline_tables.tf index 54f086b77..27279e5ab 100644 --- a/src/_modules/data_factory_storage_account/pipeline_tables.tf +++ b/src/_modules/data_factory_storage_account/pipeline_tables.tf @@ -1,8 +1,8 @@ resource "azurerm_data_factory_pipeline" "pipeline_table" { for_each = toset(local.tables) - name = replace("${module.naming_convention.prefix}-adf-${var.storage_accounts.source.name}-${each.value}-table-${module.naming_convention.suffix}", "/[$-]/", "_") + name = replace(each.value, "/[$-]/", "_") data_factory_id = var.data_factory_id - folder = "${var.storage_accounts.source.name}/table" + folder = "storage/account=${var.storage_accounts.source.name}/table" activities_json = jsonencode( [ diff --git a/src/migration/prod/README.md b/src/migration/prod/README.md index 37297372f..d1ec50613 100644 --- a/src/migration/prod/README.md +++ b/src/migration/prod/README.md @@ -18,6 +18,7 @@ | Name | Source | Version | |------|--------|---------| +| [migrate\_cosmos\_accounts](#module\_migrate\_cosmos\_accounts) | ../../_modules/data_factory_cosmos | n/a | | [migrate\_storage\_accounts](#module\_migrate\_storage\_accounts) | ../../_modules/data_factory_storage_account | n/a | ## Resources @@ -37,5 +38,6 @@ No inputs. | Name | Description | |------|-------------| | [data\_factory](#output\_data\_factory) | n/a | +| [data\_factory\_cosmos\_pipelines](#output\_data\_factory\_cosmos\_pipelines) | n/a | | [data\_factory\_st\_pipelines](#output\_data\_factory\_st\_pipelines) | n/a | diff --git a/src/migration/prod/italynorth.tf b/src/migration/prod/italynorth.tf index 2d777fb85..64141e848 100644 --- a/src/migration/prod/italynorth.tf +++ b/src/migration/prod/italynorth.tf @@ -53,4 +53,23 @@ module "migrate_storage_accounts" { tables = try(each.value.table.tables, []) } } +} + +module "migrate_cosmos_accounts" { + for_each = { for migration in local.cosmos_accounts : "${migration.source.name}|${migration.target.name}" => migration } + source = "../../_modules/data_factory_cosmos" + + environment = local.environment + + data_factory_id = azurerm_data_factory.this.id + data_factory_principal_id = azurerm_data_factory.this.identity[0].principal_id + + cosmos_accounts = { + source = each.value.source + target = each.value.target + } + + what_to_migrate = { + databases = try(each.value.databases, []) + } } \ No newline at end of file diff --git a/src/migration/prod/locals.tf b/src/migration/prod/locals.tf index 5cd76e9d7..d5ca10284 100644 --- a/src/migration/prod/locals.tf +++ b/src/migration/prod/locals.tf @@ -26,16 +26,30 @@ locals { storage_accounts = [ # Copy both containers and tables # { - # source = { name = "stdevbiptest1", resource_group_name = "RG-BIP-DEV-TEST" } - # target = { name = "stbipdevtest1", resource_group_name = "dev-fasanorg" } + # source = { name = "", resource_group_name = "" } + # target = { name = "", resource_group_name = "" } # }, # # Copy only selected containers and tables # { - # source = { name = "stdevbiptest1", resource_group_name = "RG-BIP-DEV-TEST" } - # target = { name = "stbipdevtest1", resource_group_name = "dev-fasanorg" } + # source = { name = "", resource_group_name = "" } + # target = { name = "", resource_group_name = "" } # blob = {enabled = true, containers = ["c1", "c2", "c3"]} # table = {enabled = true, tables = ["t1", "t2", "t3"]} # } ] + + cosmos_accounts = [ + # Copy all databases (with write_behavior to insert) + # { + # source = { name = "", resource_group_name = "" } + # target = { name = "", resource_group_name = "", write_behavior = "insert" } + # }, + # Copy only selected databases (with write_behavior defaulting to upsert) + # { + # source = { name = "", resource_group_name = "" } + # target = { name = "", resource_group_name = "" } + # databases = ["db1", "db2", "db3"] + # } + ] } diff --git a/src/migration/prod/outputs.tf b/src/migration/prod/outputs.tf index cc35cc082..085e9692a 100644 --- a/src/migration/prod/outputs.tf +++ b/src/migration/prod/outputs.tf @@ -8,4 +8,8 @@ output "data_factory" { output "data_factory_st_pipelines" { value = { for migration in local.storage_accounts : "${migration.source.name}|${migration.target.name}" => module.migrate_storage_accounts["${migration.source.name}|${migration.target.name}"].pipelines } -} \ No newline at end of file +} + +output "data_factory_cosmos_pipelines" { + value = { for migration in local.cosmos_accounts : "${migration.source.name}|${migration.target.name}" => module.migrate_cosmos_accounts["${migration.source.name}|${migration.target.name}"].pipelines } +}