From 8eca0dc7c23cd96139db9c80bc70ba27ca3f69a4 Mon Sep 17 00:00:00 2001 From: Webster Mudge Date: Thu, 12 Sep 2024 11:40:51 -0400 Subject: [PATCH 1/7] Initial commit for external data locations for Azure Signed-off-by: Webster Mudge --- roles/common/defaults/main.yml | 6 ++ roles/data/defaults/main.yml | 9 ++ roles/data/tasks/initialize_azure.yml | 76 ++++++++++++++++ roles/data/tasks/setup_azure.yml | 86 +++++++++++++++++++ .../data/tasks/setup_azure_data_locations.yml | 44 ++++++++++ roles/data/tasks/teardown.yml | 10 +-- ...down_aws_policies.yml => teardown_aws.yml} | 22 ++++- roles/data/tasks/teardown_aws_roles.yml | 33 ------- roles/data/tasks/teardown_azure.yml | 38 ++++++++ roles/data/tasks/validate.yml | 7 +- roles/data/tasks/validate_aws.yml | 13 +++ roles/data/tasks/validate_azure.yml | 28 ++++++ roles/platform/defaults/main.yml | 10 +-- 13 files changed, 332 insertions(+), 50 deletions(-) create mode 100644 roles/data/tasks/initialize_azure.yml create mode 100644 roles/data/tasks/setup_azure.yml create mode 100644 roles/data/tasks/setup_azure_data_locations.yml rename roles/data/tasks/{teardown_aws_policies.yml => teardown_aws.yml} (78%) delete mode 100644 roles/data/tasks/teardown_aws_roles.yml create mode 100644 roles/data/tasks/teardown_azure.yml create mode 100644 roles/data/tasks/validate_aws.yml create mode 100644 roles/data/tasks/validate_azure.yml diff --git a/roles/common/defaults/main.yml b/roles/common/defaults/main.yml index 759ef3a8..46aa656a 100644 --- a/roles/common/defaults/main.yml +++ b/roles/common/defaults/main.yml @@ -54,6 +54,7 @@ common__group_suffix: "{{ globals.labels.group | default('gr common__admin_suffix: "{{ globals.labels.admin | default('admin') }}" common__user_suffix: "{{ globals.labels.user | default('user') }}" common__ngw_suffix: "{{ globals.labels.nat_gateway | default('ngw') }}" +common__assignment_suffix: "{{ globals.labels.assignment | default('assign') }}" common__unique_storage_name_suffix: "{{ globals.storage.name | default((common__region + common__aws_profile) if 'aws' in common__infra_type else common__region) }}" @@ -137,6 +138,11 @@ common__azure_netapp_pool_name: "{{ infra.azure.netapp.pool.name | def common__azure_netapp_vol_name: "{{ infra.azure.netapp.volume.name | default([common__namespace, common__azure_netapp_suffix, common__azure_volume_suffix] | join('-')) }}" common__azure_netapp_nfs_version: "{{ infra.azure.netapp.nfs.version | default('3') }}" +common__azure_contributor_name_suffix: "{{ env.azure.role.name_suffix.contributor | default('contributor') }}" +common__azure_identity_suffix: "{{ env.azure.role.label.identity | default(common__identity_suffix) }}" +common__azure_datalake_admin_suffix: "{{ env.azure.role.label.datalake_admin | default(common__datalake_admin_suffix) }}" +common__azure_datalakeadmin_identity_name: "{{ env.azure.role.name.datalake_admin | default([common__namespace, common__azure_datalake_admin_suffix, common__azure_identity_suffix] | join('-')) }}" + # GCP Infra common__gcp_project: "{{ infra.gcp.project | default('gcp-se') }}" common__gcp_region: "{{ infra.gcp.region | default('europe-west1') }}" diff --git a/roles/data/defaults/main.yml b/roles/data/defaults/main.yml index f3be63bf..83f24bc4 100644 --- a/roles/data/defaults/main.yml +++ b/roles/data/defaults/main.yml @@ -26,9 +26,11 @@ data__storage: "{{ data.storage | default([]) }}" data__external_data_suffix: "{{ common__external_data_suffix }}" data__policy_suffix: "{{ data.policy.suffix | default(common__policy_suffix) }}" data__role_suffix: "{{ data.role.suffix | default(common__role_suffix) }}" +data__assignment_suffix: "{{ data.assignment.suffix | default(common__assignment_suffix) }}" data__teardown_deletes_policies: "{{ data.teardown.delete_policies | default(False) }}" data__teardown_deletes_roles: "{{ data.teardown.delete_roles | default(False) }}" +data__teardown_deletes_assignments: "{{ data.teardown.delete_assignments | default(False) }}" # AWS data__aws_policy_suffix: "{{ data.policy.aws.suffix | default(data__policy_suffix) }}" @@ -42,3 +44,10 @@ data__aws_read_write_policy_url: "{{ data.policy.aws.read_write.url data__aws_idbroker_role_name: "{{ common__aws_idbroker_role_name }}" data__aws_datalake_admin_role_name: "{{ common__aws_datalake_admin_role_name }}" + +# Azure +data__azure_metagroup_name: "{{ common__azure_metagroup_name }}" +data__azure_contributor_name_suffix: "{{ common__azure_contributor_name_suffix }}" +data__azure_reader_name_suffix: "{{ env.azure.role.name_suffix.reader | default('reader') }}" +data__azure_datalakeadmin_identity_name: "{{ common__azure_datalakeadmin_identity_name }}" +data__azure_assignment_suffix: "{{ data.assignment.azure.suffix | default(data__assignment_suffix) }}" diff --git a/roles/data/tasks/initialize_azure.yml b/roles/data/tasks/initialize_azure.yml new file mode 100644 index 00000000..92b9d47f --- /dev/null +++ b/roles/data/tasks/initialize_azure.yml @@ -0,0 +1,76 @@ +--- + +# Copyright 2021 Cloudera, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Get Azure Resource Group matching Namespace, if exists + azure.azcollection.azure_rm_resourcegroup_info: + name: "{{ data__azure_metagroup_name }}" + register: __azure_resource_metagroup_info + +- name: Set fact Azure Resource Group URI, if exists + ansible.builtin.set_fact: + data__azure_metagroup_uri: "{{ __azure_resource_metagroup_info.resourcegroups[0].id | default('') }}" + +- name: Query for the Azure Datalake Admin Identity + command: "az identity list -g {{ data__azure_metagroup_name }}" + register: __azure_identity_list + failed_when: __azure_identity_list.rc != 0 + delay: 5 + retries: 10 + until: + #- plat__azure_idbroker_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) + - data__azure_datalakeadmin_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) + #- plat__azure_log_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) + #- plat__azure_ranger_audit_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) + +- name: Extract Azure Identity Principals + ansible.builtin.set_fact: + #__azure_idbroker_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_idbroker_uuid) | first }}" + #__azure_idbroker_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_idbroker_uri) | first }}" + __azure_datalakeadmin_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_dtadmin_uuid) | first }}" + #__azure_datalakeadmin_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_dtadmin_uri) | first }}" + #__azure_log_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_log_rl_uuid) | first }}" + #__azure_log_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_log_rl_uri) | first }}" + #__azure_ranger_audit_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_rngr_rl_uuid) | first }}" + #__azure_ranger_audit_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_rngr_rl_uri) | first }}" + vars: + #jq_idbroker_uuid: "[?name=='{{ plat__azure_idbroker_identity_name }}'].principalId" + #jq_idbroker_uri: "[?name=='{{ plat__azure_idbroker_identity_name }}'].id" + jq_dtadmin_uuid: "[?name=='{{ data__azure_datalakeadmin_identity_name }}'].principalId" + #jq_dtadmin_uri: "[?name=='{{ data__azure_datalakeadmin_identity_name }}'].id" + #jq_log_rl_uuid: "[?name=='{{ plat__azure_log_identity_name }}'].principalId" + #jq_log_rl_uri: "[?name=='{{ plat__azure_log_identity_name }}'].id" + #jq_rngr_rl_uuid: "[?name=='{{ plat__azure_ranger_audit_identity_name }}'].principalId" + #jq_rngr_rl_uri: "[?name=='{{ plat__azure_ranger_audit_identity_name }}'].id" + +- name: Retrieve Storage Blob Data Contributor Role definition + azure_rm_roledefinition_info: + scope: data__azure_metagroup_uri + role_name: Storage Blob Data Contributor + register: __azure_storage_blob_data_contributor_info + +- name: Set fact Storage Blob Data Contributor Role URI + ansible.builtin.set_fact: + __azure_storage_blob_data_contributor_uri: "{{ __azure_storage_blob_data_contributor_info.id }}" + +- name: Get Storage Blob Data Reader Role definition + azure_rm_roledefinition_info: + scope: data__azure_metagroup_uri + role_name: Storage Blob Data Reader + register: __azure_storage_blob_data_reader_info + +- name: Set fact Storage Blob Data Reader Role URI + ansible.builtin.set_fact: + __azure_storage_blob_data_reader_uri: "{{ __azure_storage_blob_data_reader_info.id }}" \ No newline at end of file diff --git a/roles/data/tasks/setup_azure.yml b/roles/data/tasks/setup_azure.yml new file mode 100644 index 00000000..ce6a892b --- /dev/null +++ b/roles/data/tasks/setup_azure.yml @@ -0,0 +1,86 @@ +--- + +# Copyright 2021 Cloudera, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- OLD__data: + storage: + # A list of lists of locations (read/[only|write]) defined in a policy and assigned to a role + - read_only: bool + locations: + aws: + - bucket: + azure: + - resource_group: # Default to data__azure_metagroup_uri + storage_account: + container: + policy: + aws: + name: + suffix: + delete: bool + role: + aws: + datalake_admin: bool + name: + suffix: + delete: bool + assignment: + azure: + suffix: + delete: bool + policy: + suffix: + aws: + suffix: + read_only: + suffix: + url: + read_write: + suffix: + url: + role: + suffix: + aws: + suffix: + assignment: + suffix: + azure: + suffix: + teardown: + delete_policies: + delete_roles: + +# Outer loop variable: __data_storage + +- name: Set fact for Azure data storage assignments + ansible.builtin.set_fact: + __azure_data_location_storage_assignments: [] + +- name: Prepare data storage location entries + ansible.builtin.include_tasks: "setup_azure_data_locations.yml" + loop: "{{ __data_storage.locations.azure }}" + loop_control: + loop_var: __data_store_azure_location + +- name: Process Azure role assignments for external data storage locations + azure.azcollection.azure_rm_roleassignment: # This Azure module is not idempotent on removals + state: present + scope: "{{ __azure_data_location_assignment.scope }}" + name: "{{ __azure_data_location_assignment.name }}" + assignee_object_id: "{{ __azure_data_location_assignment.assignee }}" + role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_contributor_uri) }}" + loop: "{{ __azure_data_location_storage_assignments }}" + loop_control: + loop_var: __azure_data_location_assignment \ No newline at end of file diff --git a/roles/data/tasks/setup_azure_data_locations.yml b/roles/data/tasks/setup_azure_data_locations.yml new file mode 100644 index 00000000..f5ce092e --- /dev/null +++ b/roles/data/tasks/setup_azure_data_locations.yml @@ -0,0 +1,44 @@ +--- + +# Copyright 2021 Cloudera, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO: This can move to initialize, as it is shared between setup and teardown for Azure + +# Outer loop variable: __data_storage +# Inner loop variable: __data_store_azure_location + +- name: Resolve location-specific Resource Group URI + when: __data_store_azure_location.resource_group is defined + azure.azcollection.azure_rm_resourcegroup_info: + name: "{{ __data_store_azure_location.resource_group }}" + register: __azure_data_location_resource_metagroup_info + +- name: Set fact Azure Resource Group URI + ansible.builtin.set_fact: + __azure_data_location_metagroup_uri: "{{ __azure_data_location_resource_metagroup_info.resourcegroups[0].id | default(data__azure_metagroup_uri) }}" + +- name: Set fact Azure data storage assignment name + ansible.builtin.set_fact: + __azure_data_location_assignment_name: "{{ [data__namespace, __data_store.read_only | default(False) | ternary(data__azure_reader_name_suffix, data__azure_contributor_name_suffix), __data_store_azure_location.storage_account, __data_store_azure_location.container, __data_store_azure_location.assignment.azure.suffix | default(data__azure_assignment_suffix)] | join('-') }}" + +- name: Set fact for Azure storage location Role Assignments + ansible.builtin.set_fact: + __azure_data_location_storage_assignments: "{{ __azure_data_location_storage_assignments | default([]) | union([entry]) }}" + vars: + entry: + name: "{{ __azure_data_location_assignment_name | to_uuid }}" + scope: "{{ __azure_data_location_metagroup_uri }}/providers/Microsoft.Storage/storageAccounts/{{ __data_store_azure_location.storage_account }}/blobServices/default/containers/{{ __data_store_azure_location.container }}" + assignee: "{{ __azure_datalakeadmin_identity_uuid }}" + desc: external data location assignment \ No newline at end of file diff --git a/roles/data/tasks/teardown.yml b/roles/data/tasks/teardown.yml index 567d1e9d..88251c30 100644 --- a/roles/data/tasks/teardown.yml +++ b/roles/data/tasks/teardown.yml @@ -14,14 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -- name: Tear down external storage location policies - ansible.builtin.include_tasks: "teardown_{{ data__infra_type }}_policies.yml" - loop: "{{ __data_storage_read_only | union(__data_storage_read_write) }}" - loop_control: - loop_var: __data_storage - -- name: Tear down external storage location access roles - ansible.builtin.include_tasks: "teardown_{{ data__infra_type }}_roles.yml" +- name: Tear down external storage locations + ansible.builtin.include_tasks: "teardown_{{ data__infra_type }}.yml" loop: "{{ __data_storage_read_only | union(__data_storage_read_write) }}" loop_control: loop_var: __data_storage diff --git a/roles/data/tasks/teardown_aws_policies.yml b/roles/data/tasks/teardown_aws.yml similarity index 78% rename from roles/data/tasks/teardown_aws_policies.yml rename to roles/data/tasks/teardown_aws.yml index 9c6b9b5a..5aae5726 100644 --- a/roles/data/tasks/teardown_aws_policies.yml +++ b/roles/data/tasks/teardown_aws.yml @@ -1,6 +1,6 @@ --- -# Copyright 2023 Cloudera, Inc. All Rights Reserved. +# Copyright 2021 Cloudera, Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,12 +34,12 @@ __data_role_name: "{{ __data_storage.role.datalake_admin | default(True) | ternary(data__aws_datalake_admin_role_name, __data_role_name_default) }}" __data_role_name_default: "{{ [data__namespace, data__external_data_suffix, __data_storage.role.suffix | default(data__aws_role_suffix)] | join('-') }}" - - name: Query the external data AWS role + - name: Query the external data AWS role community.aws.iam_role_info: region: "{{ data__region }}" name: "{{ __data_aws_access_role_name }}" register: __data_aws_access_role_info - + - name: Set facts for existing managed policies for external data AWS role when: __data_aws_access_role_info.iam_roles | length > 0 ansible.builtin.set_fact: @@ -69,3 +69,19 @@ region: "{{ data__region }}" policy_name: "{{ __data_aws_external_policy_name }}" state: absent + +- name: Tear down AWS external role + when: __data_storage.role.delete | default(data__teardown_deletes_roles) + block: + - name: Set fact for external data AWS role name + ansible.builtin.set_fact: + __data_aws_access_role_name: "{{ __data_storage.role.name | default(__data_role_name) }}" + vars: + __data_role_name: "{{ __data_storage.role.datalake_admin | default(True) | ternary(data__aws_datalake_admin_role_name, __data_role_name_default) }}" + __data_role_name_default: "{{ [data__namespace, data__external_data_suffix, __data_storage.role.suffix | default(data__aws_role_suffix)] | join('-') }}" + + - name: Delete external data AWS role + community.aws.iam_role: + region: "{{ data__region }}" + name: "{{ __data_aws_access_role_name }}" + state: absent \ No newline at end of file diff --git a/roles/data/tasks/teardown_aws_roles.yml b/roles/data/tasks/teardown_aws_roles.yml deleted file mode 100644 index 28a79bc9..00000000 --- a/roles/data/tasks/teardown_aws_roles.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- - -# Copyright 2023 Cloudera, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Outer loop variable: __data_storage - -- name: Tear down AWS external role - when: __data_storage.role.delete | default(data__teardown_deletes_roles) - block: - - name: Set fact for external data AWS role name - ansible.builtin.set_fact: - __data_aws_access_role_name: "{{ __data_storage.role.name | default(__data_role_name) }}" - vars: - __data_role_name: "{{ __data_storage.role.datalake_admin | default(True) | ternary(data__aws_datalake_admin_role_name, __data_role_name_default) }}" - __data_role_name_default: "{{ [data__namespace, data__external_data_suffix, __data_storage.role.suffix | default(data__aws_role_suffix)] | join('-') }}" - - - name: Delete external data AWS role - community.aws.iam_role: - region: "{{ data__region }}" - name: "{{ __data_aws_access_role_name }}" - state: absent diff --git a/roles/data/tasks/teardown_azure.yml b/roles/data/tasks/teardown_azure.yml new file mode 100644 index 00000000..adaf096f --- /dev/null +++ b/roles/data/tasks/teardown_azure.yml @@ -0,0 +1,38 @@ +# Copyright 2021 Cloudera, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Outer loop variable: __data_storage + +- name: Set fact for Azure data storage assignments + ansible.builtin.set_fact: + __azure_data_location_storage_assignments: [] + +- name: Prepare data storage location entries + ansible.builtin.include_tasks: "setup_azure_data_locations.yml" + loop: "{{ __data_storage.locations.azure }}" + loop_control: + loop_var: __data_store_azure_location + +- name: Tear down Azure external role assignments + when: __data_storage.assignment.delete | default(data__teardown_deletes_assignments) + block: + - name: Process Azure role assignment removal for external data storage locations + azure.azcollection.azure_rm_roleassignment: # This Azure module is not idempotent on removals + state: absent + scope: "{{ __azure_data_location_assignment.scope }}" + assignee_object_id: "{{ __azure_data_location_assignment.assignee }}" + role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_contributor_uri) }}" + loop: "{{ __azure_data_location_storage_assignments }}" + loop_control: + loop_var: __azure_data_location_assignment \ No newline at end of file diff --git a/roles/data/tasks/validate.yml b/roles/data/tasks/validate.yml index e3cf5040..82c13c9c 100644 --- a/roles/data/tasks/validate.yml +++ b/roles/data/tasks/validate.yml @@ -1,4 +1,6 @@ -# Copyright 2023 Cloudera, Inc. All Rights Reserved. +--- + +# Copyright 2021 Cloudera, Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,3 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +- name: Validate external data storage locations + ansible.builtin.include_tasks: "validate_{{ data__infra_type }}.yml" diff --git a/roles/data/tasks/validate_aws.yml b/roles/data/tasks/validate_aws.yml new file mode 100644 index 00000000..37e30437 --- /dev/null +++ b/roles/data/tasks/validate_aws.yml @@ -0,0 +1,13 @@ +# Copyright 2021 Cloudera, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/roles/data/tasks/validate_azure.yml b/roles/data/tasks/validate_azure.yml new file mode 100644 index 00000000..fcac8449 --- /dev/null +++ b/roles/data/tasks/validate_azure.yml @@ -0,0 +1,28 @@ +--- + +# Copyright 2021 Cloudera, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Confirm required data location parameters + ansible.builtin.assert: + that: + - __data_location.storage_account is defined + - __data_location.storage_account is string + - __data_location.container is defined + - __data_location.container is string + fail_msg: "Each Azure external data location must provide both 'storage_account' and 'container' names." + quiet: yes + loop: "{{ __data_storage.locations.azure }}" + loop_control: + loop_var: __data_location \ No newline at end of file diff --git a/roles/platform/defaults/main.yml b/roles/platform/defaults/main.yml index 4543cb69..c4081031 100644 --- a/roles/platform/defaults/main.yml +++ b/roles/platform/defaults/main.yml @@ -187,18 +187,18 @@ plat__azure_storage_suffix: "{{ env.azure.storage.suffix | def plat__azure_xaccount_suffix: "{{ env.azure.role.label.xaccount | default(common__xaccount_suffix) }}" plat__azure_log_suffix: "{{ env.azure.role.label.log | default(common__logs_suffix) }}" plat__azure_data_suffix: "{{ env.azure.role.label.data | default(common__data_suffix) }}" -plat__azure_identity_suffix: "{{ env.azure.role.label.identity | default(common__identity_suffix) }}" -plat__azure_datalake_admin_suffix: "{{ env.azure.role.label.datalake_admin | default(common__datalake_admin_suffix) }}" +plat__azure_identity_suffix: "{{ common__azure_identity_suffix }}" +plat__azure_datalake_admin_suffix: "{{ common__azure_datalake_admin_suffix }}" plat__azure_ranger_audit_suffix: "{{ env.azure.role.label.ranger_audit | default(common__ranger_audit_suffix) }}" plat__azure_raz_suffix: "{{ env.azure.role.label.raz | default(common__raz_suffix) }}" plat__azure_idbroker_suffix: "{{ env.azure.role.label.idbroker | default(common__idbroker_suffix) }}" plat__azure_owner_name_suffix: "{{ env.azure.role.name_suffix.owner | default('owner') }}" -plat__azure_contributor_name_suffix: "{{ env.azure.role.name_suffix.contributor | default('contributor') }}" +plat__azure_contributor_name_suffix: "{{ common__azure_contributor_name_suffix }}" plat__azure_operator_name_suffix: "{{ env.azure.role.name_suffix.operator | default('operator') }}" plat__azure_admin_name_suffix: "{{ env.azure.role.name_suffix.admin | default(common__admin_suffix) }}" plat__azure_user_name_suffix: "{{ env.azure.role.name_suffix.user | default(common__user_suffix) }}" -plat__azure_assignment_name_suffix: "{{ env.azure.role.name_suffix.assignment | default('assignment') }}" +plat__azure_assignment_name_suffix: "{{ env.azure.role.name_suffix.assignment | default(common__assignment_suffix) }}" plat__azure_metagroup_name: "{{ common__azure_metagroup_name }}" plat__azure_storage_name: "{{ common__azure_storage_name }}" @@ -212,7 +212,7 @@ plat__azure_xaccount_role_name: "{{ env.azure.role.name.cross_acco plat__azure_policy_url: "{{ env.azure.policy.url | default('https://raw.githubusercontent.com/cloudera-labs/snippets/main/policies/azure/cloudbreak_minimal_multiple_rgs_v1.json') }}" plat__azure_log_identity_name: "{{ env.azure.role.name.log | default([plat__namespace, plat__azure_log_suffix, plat__azure_identity_suffix] | join('-')) }}" -plat__azure_datalakeadmin_identity_name: "{{ env.azure.role.name.datalake_admin | default([plat__namespace, plat__azure_datalake_admin_suffix, plat__azure_identity_suffix] | join('-')) }}" +plat__azure_datalakeadmin_identity_name: "{{ common__azure_datalakeadmin_identity_name }}" plat__azure_ranger_audit_identity_name: "{{ env.azure.role.name.ranger_audit | default([plat__namespace, plat__azure_ranger_audit_suffix, plat__azure_identity_suffix] | join('-')) }}" plat__azure_raz_identity_name: "{{ env.azure.role.name.raz | default([plat__namespace, plat__azure_raz_suffix, plat__azure_identity_suffix] | join('-')) }}" plat__azure_idbroker_identity_name: "{{ env.azure.role.name.idbroker | default([plat__namespace, plat__azure_idbroker_suffix, plat__azure_identity_suffix] | join('-')) }}" From 8f0d83e456f9489e0611737c2005b93900cb741e Mon Sep 17 00:00:00 2001 From: Webster Mudge Date: Thu, 12 Sep 2024 11:40:51 -0400 Subject: [PATCH 2/7] Remove WIP configuration example Signed-off-by: Webster Mudge --- roles/data/tasks/setup_azure.yml | 48 -------------------------------- 1 file changed, 48 deletions(-) diff --git a/roles/data/tasks/setup_azure.yml b/roles/data/tasks/setup_azure.yml index ce6a892b..d47aa1e9 100644 --- a/roles/data/tasks/setup_azure.yml +++ b/roles/data/tasks/setup_azure.yml @@ -13,54 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -- OLD__data: - storage: - # A list of lists of locations (read/[only|write]) defined in a policy and assigned to a role - - read_only: bool - locations: - aws: - - bucket: - azure: - - resource_group: # Default to data__azure_metagroup_uri - storage_account: - container: - policy: - aws: - name: - suffix: - delete: bool - role: - aws: - datalake_admin: bool - name: - suffix: - delete: bool - assignment: - azure: - suffix: - delete: bool - policy: - suffix: - aws: - suffix: - read_only: - suffix: - url: - read_write: - suffix: - url: - role: - suffix: - aws: - suffix: - assignment: - suffix: - azure: - suffix: - teardown: - delete_policies: - delete_roles: # Outer loop variable: __data_storage From a5d1cb71af8d4465560414a2cffcff9c3a0669ae Mon Sep 17 00:00:00 2001 From: Webster Mudge Date: Thu, 12 Sep 2024 11:40:51 -0400 Subject: [PATCH 3/7] Convert to using Storage Blob Data Owner for read/write access for DL Admin Signed-off-by: Webster Mudge --- roles/data/tasks/initialize_azure.yml | 33 +++++++-------------------- roles/data/tasks/setup_azure.yml | 2 +- roles/data/tasks/teardown_azure.yml | 2 +- 3 files changed, 10 insertions(+), 27 deletions(-) diff --git a/roles/data/tasks/initialize_azure.yml b/roles/data/tasks/initialize_azure.yml index 92b9d47f..bea6de8a 100644 --- a/roles/data/tasks/initialize_azure.yml +++ b/roles/data/tasks/initialize_azure.yml @@ -30,47 +30,30 @@ delay: 5 retries: 10 until: - #- plat__azure_idbroker_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) - data__azure_datalakeadmin_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) - #- plat__azure_log_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) - #- plat__azure_ranger_audit_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) - name: Extract Azure Identity Principals ansible.builtin.set_fact: - #__azure_idbroker_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_idbroker_uuid) | first }}" - #__azure_idbroker_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_idbroker_uri) | first }}" __azure_datalakeadmin_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_dtadmin_uuid) | first }}" - #__azure_datalakeadmin_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_dtadmin_uri) | first }}" - #__azure_log_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_log_rl_uuid) | first }}" - #__azure_log_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_log_rl_uri) | first }}" - #__azure_ranger_audit_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_rngr_rl_uuid) | first }}" - #__azure_ranger_audit_identity_uri: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_rngr_rl_uri) | first }}" vars: - #jq_idbroker_uuid: "[?name=='{{ plat__azure_idbroker_identity_name }}'].principalId" - #jq_idbroker_uri: "[?name=='{{ plat__azure_idbroker_identity_name }}'].id" jq_dtadmin_uuid: "[?name=='{{ data__azure_datalakeadmin_identity_name }}'].principalId" - #jq_dtadmin_uri: "[?name=='{{ data__azure_datalakeadmin_identity_name }}'].id" - #jq_log_rl_uuid: "[?name=='{{ plat__azure_log_identity_name }}'].principalId" - #jq_log_rl_uri: "[?name=='{{ plat__azure_log_identity_name }}'].id" - #jq_rngr_rl_uuid: "[?name=='{{ plat__azure_ranger_audit_identity_name }}'].principalId" - #jq_rngr_rl_uri: "[?name=='{{ plat__azure_ranger_audit_identity_name }}'].id" -- name: Retrieve Storage Blob Data Contributor Role definition +- name: Retrieve Storage Blob Data Owner Role definition azure_rm_roledefinition_info: - scope: data__azure_metagroup_uri - role_name: Storage Blob Data Contributor - register: __azure_storage_blob_data_contributor_info + scope: "{{ data__azure_metagroup_uri }}" + role_name: Storage Blob Data Owner + register: __azure_storage_blob_data_owner_info -- name: Set fact Storage Blob Data Contributor Role URI +- name: Set fact Storage Blob Data Owner Role URI ansible.builtin.set_fact: - __azure_storage_blob_data_contributor_uri: "{{ __azure_storage_blob_data_contributor_info.id }}" + __azure_storage_blob_data_owner_uri: "{{ __azure_storage_blob_data_owner_info.roledefinitions | map(attribute='id') | list | first }}" - name: Get Storage Blob Data Reader Role definition azure_rm_roledefinition_info: - scope: data__azure_metagroup_uri + scope: "{{ data__azure_metagroup_uri }}" role_name: Storage Blob Data Reader register: __azure_storage_blob_data_reader_info - name: Set fact Storage Blob Data Reader Role URI ansible.builtin.set_fact: - __azure_storage_blob_data_reader_uri: "{{ __azure_storage_blob_data_reader_info.id }}" \ No newline at end of file + __azure_storage_blob_data_reader_uri: "{{ __azure_storage_blob_data_reader_info.roledefinitions | map(attribute='id') | list | first }}" \ No newline at end of file diff --git a/roles/data/tasks/setup_azure.yml b/roles/data/tasks/setup_azure.yml index d47aa1e9..ccc000bf 100644 --- a/roles/data/tasks/setup_azure.yml +++ b/roles/data/tasks/setup_azure.yml @@ -32,7 +32,7 @@ scope: "{{ __azure_data_location_assignment.scope }}" name: "{{ __azure_data_location_assignment.name }}" assignee_object_id: "{{ __azure_data_location_assignment.assignee }}" - role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_contributor_uri) }}" + role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_owner_uri) }}" loop: "{{ __azure_data_location_storage_assignments }}" loop_control: loop_var: __azure_data_location_assignment \ No newline at end of file diff --git a/roles/data/tasks/teardown_azure.yml b/roles/data/tasks/teardown_azure.yml index adaf096f..675c99e5 100644 --- a/roles/data/tasks/teardown_azure.yml +++ b/roles/data/tasks/teardown_azure.yml @@ -32,7 +32,7 @@ state: absent scope: "{{ __azure_data_location_assignment.scope }}" assignee_object_id: "{{ __azure_data_location_assignment.assignee }}" - role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_contributor_uri) }}" + role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_owner_uri) }}" loop: "{{ __azure_data_location_storage_assignments }}" loop_control: loop_var: __azure_data_location_assignment \ No newline at end of file From 86d3cb9216f02f3d8deeec1321a35f8fbc017511 Mon Sep 17 00:00:00 2001 From: Webster Mudge Date: Thu, 12 Sep 2024 11:40:52 -0400 Subject: [PATCH 4/7] Update data location validation check Signed-off-by: Webster Mudge --- roles/data/tasks/validate_azure.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/data/tasks/validate_azure.yml b/roles/data/tasks/validate_azure.yml index fcac8449..0a318897 100644 --- a/roles/data/tasks/validate_azure.yml +++ b/roles/data/tasks/validate_azure.yml @@ -23,6 +23,6 @@ - __data_location.container is string fail_msg: "Each Azure external data location must provide both 'storage_account' and 'container' names." quiet: yes - loop: "{{ __data_storage.locations.azure }}" + loop: "{{ data__storage | map(attribute='locations') | map(attribute='azure') | default([]) | flatten | list }}" loop_control: loop_var: __data_location \ No newline at end of file From 9cbcc0e0e92d1b68d6af8d5656a3d8afb7b701a7 Mon Sep 17 00:00:00 2001 From: Webster Mudge Date: Thu, 12 Sep 2024 11:40:52 -0400 Subject: [PATCH 5/7] WIP of configuration Signed-off-by: Webster Mudge --- roles/data/config-wip.yml | 51 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 roles/data/config-wip.yml diff --git a/roles/data/config-wip.yml b/roles/data/config-wip.yml new file mode 100644 index 00000000..6b551969 --- /dev/null +++ b/roles/data/config-wip.yml @@ -0,0 +1,51 @@ +--- + +# Potential configuration for cloudera.exe.data + +data: + storage: + # A list of lists of locations (read/[only|write]) defined in a policy and assigned to a role + - read_only: bool + locations: + aws: + - bucket: + azure: + - resource_group: # Default to data__azure_metagroup_uri + storage_account: + container: + policy: + aws: + name: + suffix: + delete: bool + role: + aws: + datalake_admin: bool + name: + suffix: + delete: bool + assignment: + azure: + suffix: + delete: bool + policy: + suffix: + aws: + suffix: + read_only: + suffix: + url: + read_write: + suffix: + url: + role: + suffix: + aws: + suffix: + assignment: + suffix: + azure: + suffix: + teardown: + delete_policies: + delete_roles: \ No newline at end of file From 805d5ea4eb088befb52cc69728724997753f5cc1 Mon Sep 17 00:00:00 2001 From: Webster Mudge Date: Thu, 12 Sep 2024 11:40:52 -0400 Subject: [PATCH 6/7] Convert DL Admin identity details to use Azure module Signed-off-by: Webster Mudge --- roles/data/tasks/initialize_azure.yml | 24 +++++++++---------- .../data/tasks/setup_azure_data_locations.yml | 2 +- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/roles/data/tasks/initialize_azure.yml b/roles/data/tasks/initialize_azure.yml index bea6de8a..92de71b0 100644 --- a/roles/data/tasks/initialize_azure.yml +++ b/roles/data/tasks/initialize_azure.yml @@ -23,20 +23,18 @@ ansible.builtin.set_fact: data__azure_metagroup_uri: "{{ __azure_resource_metagroup_info.resourcegroups[0].id | default('') }}" -- name: Query for the Azure Datalake Admin Identity - command: "az identity list -g {{ data__azure_metagroup_name }}" - register: __azure_identity_list - failed_when: __azure_identity_list.rc != 0 - delay: 5 - retries: 10 - until: - - data__azure_datalakeadmin_identity_name in ( __azure_identity_list.stdout | from_json | community.general.json_query('[*].name') ) - -- name: Extract Azure Identity Principals +- name: Retrieve Azure Datalake Admin Identity + azure.azcollection.azure_rm_resource_info: + resource_group: "{{ data__azure_metagroup_name }}" + provider: ManagedIdentity + resource_type: userAssignedIdentities + resource_name: "{{ data__azure_datalakeadmin_identity_name }}" + #api_version: '2018-11-30' + register: __azure_dl_admin_identity + +- name: Set fact Azure Datalake Admin Principal details ansible.builtin.set_fact: - __azure_datalakeadmin_identity_uuid: "{{ __azure_identity_list.stdout | from_json | community.general.json_query(jq_dtadmin_uuid) | first }}" - vars: - jq_dtadmin_uuid: "[?name=='{{ data__azure_datalakeadmin_identity_name }}'].principalId" + __azure_datalakeadmin_identity: "{{ __azure_dl_admin_identity.response | selectattr('name', 'eq', data__azure_datalakeadmin_identity_name) | first }}" - name: Retrieve Storage Blob Data Owner Role definition azure_rm_roledefinition_info: diff --git a/roles/data/tasks/setup_azure_data_locations.yml b/roles/data/tasks/setup_azure_data_locations.yml index f5ce092e..f566d54a 100644 --- a/roles/data/tasks/setup_azure_data_locations.yml +++ b/roles/data/tasks/setup_azure_data_locations.yml @@ -40,5 +40,5 @@ entry: name: "{{ __azure_data_location_assignment_name | to_uuid }}" scope: "{{ __azure_data_location_metagroup_uri }}/providers/Microsoft.Storage/storageAccounts/{{ __data_store_azure_location.storage_account }}/blobServices/default/containers/{{ __data_store_azure_location.container }}" - assignee: "{{ __azure_datalakeadmin_identity_uuid }}" + assignee: "{{ __azure_datalakeadmin_identity.properties.principalId }}" desc: external data location assignment \ No newline at end of file From a8127ddcd31b0c6ed50c88ee50420dea490d5f93 Mon Sep 17 00:00:00 2001 From: Webster Mudge Date: Thu, 12 Sep 2024 11:40:52 -0400 Subject: [PATCH 7/7] Remove comment Signed-off-by: Webster Mudge --- roles/data/tasks/setup_azure.yml | 8 ++++++-- roles/data/tasks/teardown_azure.yml | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/roles/data/tasks/setup_azure.yml b/roles/data/tasks/setup_azure.yml index ccc000bf..5aa8b2b4 100644 --- a/roles/data/tasks/setup_azure.yml +++ b/roles/data/tasks/setup_azure.yml @@ -27,7 +27,7 @@ loop_var: __data_store_azure_location - name: Process Azure role assignments for external data storage locations - azure.azcollection.azure_rm_roleassignment: # This Azure module is not idempotent on removals + azure.azcollection.azure_rm_roleassignment: state: present scope: "{{ __azure_data_location_assignment.scope }}" name: "{{ __azure_data_location_assignment.name }}" @@ -35,4 +35,8 @@ role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_owner_uri) }}" loop: "{{ __azure_data_location_storage_assignments }}" loop_control: - loop_var: __azure_data_location_assignment \ No newline at end of file + loop_var: __azure_data_location_assignment + register: __data_azure_assignment_results + until: __data_azure_assignment_results is not failed + retries: 3 + delay: 3 \ No newline at end of file diff --git a/roles/data/tasks/teardown_azure.yml b/roles/data/tasks/teardown_azure.yml index 675c99e5..dc45e414 100644 --- a/roles/data/tasks/teardown_azure.yml +++ b/roles/data/tasks/teardown_azure.yml @@ -28,7 +28,7 @@ when: __data_storage.assignment.delete | default(data__teardown_deletes_assignments) block: - name: Process Azure role assignment removal for external data storage locations - azure.azcollection.azure_rm_roleassignment: # This Azure module is not idempotent on removals + azure.azcollection.azure_rm_roleassignment: state: absent scope: "{{ __azure_data_location_assignment.scope }}" assignee_object_id: "{{ __azure_data_location_assignment.assignee }}"