Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update external data location support for Azure #87

Draft
wants to merge 7 commits into
base: devel
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions roles/common/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ common__group_suffix: "{{ globals.labels.group | default('gr
common__admin_suffix: "{{ globals.labels.admin | default('admin') }}"
common__user_suffix: "{{ globals.labels.user | default('user') }}"
common__ngw_suffix: "{{ globals.labels.nat_gateway | default('ngw') }}"
common__assignment_suffix: "{{ globals.labels.assignment | default('assign') }}"

common__unique_storage_name_suffix: "{{ globals.storage.name | default((common__region + common__aws_profile) if 'aws' in common__infra_type else common__region) }}"

Expand Down Expand Up @@ -137,6 +138,11 @@ common__azure_netapp_pool_name: "{{ infra.azure.netapp.pool.name | def
common__azure_netapp_vol_name: "{{ infra.azure.netapp.volume.name | default([common__namespace, common__azure_netapp_suffix, common__azure_volume_suffix] | join('-')) }}"
common__azure_netapp_nfs_version: "{{ infra.azure.netapp.nfs.version | default('3') }}"

common__azure_contributor_name_suffix: "{{ env.azure.role.name_suffix.contributor | default('contributor') }}"
common__azure_identity_suffix: "{{ env.azure.role.label.identity | default(common__identity_suffix) }}"
common__azure_datalake_admin_suffix: "{{ env.azure.role.label.datalake_admin | default(common__datalake_admin_suffix) }}"
common__azure_datalakeadmin_identity_name: "{{ env.azure.role.name.datalake_admin | default([common__namespace, common__azure_datalake_admin_suffix, common__azure_identity_suffix] | join('-')) }}"

# GCP Infra
common__gcp_project: "{{ infra.gcp.project | default('gcp-se') }}"
common__gcp_region: "{{ infra.gcp.region | default('europe-west1') }}"
Expand Down
51 changes: 51 additions & 0 deletions roles/data/config-wip.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---

# Potential configuration for cloudera.exe.data

data:
storage:
# A list of lists of locations (read/[only|write]) defined in a policy and assigned to a role
- read_only: bool
locations:
aws:
- bucket:
azure:
- resource_group: # Default to data__azure_metagroup_uri
storage_account:
container:
policy:
aws:
name:
suffix:
delete: bool
role:
aws:
datalake_admin: bool
name:
suffix:
delete: bool
assignment:
azure:
suffix:
delete: bool
policy:
suffix:
aws:
suffix:
read_only:
suffix:
url:
read_write:
suffix:
url:
role:
suffix:
aws:
suffix:
assignment:
suffix:
azure:
suffix:
teardown:
delete_policies:
delete_roles:
9 changes: 9 additions & 0 deletions roles/data/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ data__storage: "{{ data.storage | default([]) }}"
data__external_data_suffix: "{{ common__external_data_suffix }}"
data__policy_suffix: "{{ data.policy.suffix | default(common__policy_suffix) }}"
data__role_suffix: "{{ data.role.suffix | default(common__role_suffix) }}"
data__assignment_suffix: "{{ data.assignment.suffix | default(common__assignment_suffix) }}"

data__teardown_deletes_policies: "{{ data.teardown.delete_policies | default(False) }}"
data__teardown_deletes_roles: "{{ data.teardown.delete_roles | default(False) }}"
data__teardown_deletes_assignments: "{{ data.teardown.delete_assignments | default(False) }}"

# AWS
data__aws_policy_suffix: "{{ data.policy.aws.suffix | default(data__policy_suffix) }}"
Expand All @@ -42,3 +44,10 @@ data__aws_read_write_policy_url: "{{ data.policy.aws.read_write.url

data__aws_idbroker_role_name: "{{ common__aws_idbroker_role_name }}"
data__aws_datalake_admin_role_name: "{{ common__aws_datalake_admin_role_name }}"

# Azure
data__azure_metagroup_name: "{{ common__azure_metagroup_name }}"
data__azure_contributor_name_suffix: "{{ common__azure_contributor_name_suffix }}"
data__azure_reader_name_suffix: "{{ env.azure.role.name_suffix.reader | default('reader') }}"
data__azure_datalakeadmin_identity_name: "{{ common__azure_datalakeadmin_identity_name }}"
data__azure_assignment_suffix: "{{ data.assignment.azure.suffix | default(data__assignment_suffix) }}"
57 changes: 57 additions & 0 deletions roles/data/tasks/initialize_azure.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---

# Copyright 2021 Cloudera, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

- name: Get Azure Resource Group matching Namespace, if exists
azure.azcollection.azure_rm_resourcegroup_info:
name: "{{ data__azure_metagroup_name }}"
register: __azure_resource_metagroup_info

- name: Set fact Azure Resource Group URI, if exists
ansible.builtin.set_fact:
data__azure_metagroup_uri: "{{ __azure_resource_metagroup_info.resourcegroups[0].id | default('') }}"

- name: Retrieve Azure Datalake Admin Identity
azure.azcollection.azure_rm_resource_info:
resource_group: "{{ data__azure_metagroup_name }}"
provider: ManagedIdentity
resource_type: userAssignedIdentities
resource_name: "{{ data__azure_datalakeadmin_identity_name }}"
#api_version: '2018-11-30'
register: __azure_dl_admin_identity

- name: Set fact Azure Datalake Admin Principal details
ansible.builtin.set_fact:
__azure_datalakeadmin_identity: "{{ __azure_dl_admin_identity.response | selectattr('name', 'eq', data__azure_datalakeadmin_identity_name) | first }}"

- name: Retrieve Storage Blob Data Owner Role definition
azure_rm_roledefinition_info:
scope: "{{ data__azure_metagroup_uri }}"
role_name: Storage Blob Data Owner
register: __azure_storage_blob_data_owner_info

- name: Set fact Storage Blob Data Owner Role URI
ansible.builtin.set_fact:
__azure_storage_blob_data_owner_uri: "{{ __azure_storage_blob_data_owner_info.roledefinitions | map(attribute='id') | list | first }}"

- name: Get Storage Blob Data Reader Role definition
azure_rm_roledefinition_info:
scope: "{{ data__azure_metagroup_uri }}"
role_name: Storage Blob Data Reader
register: __azure_storage_blob_data_reader_info

- name: Set fact Storage Blob Data Reader Role URI
ansible.builtin.set_fact:
__azure_storage_blob_data_reader_uri: "{{ __azure_storage_blob_data_reader_info.roledefinitions | map(attribute='id') | list | first }}"
42 changes: 42 additions & 0 deletions roles/data/tasks/setup_azure.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---

# Copyright 2021 Cloudera, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Outer loop variable: __data_storage

- name: Set fact for Azure data storage assignments
ansible.builtin.set_fact:
__azure_data_location_storage_assignments: []

- name: Prepare data storage location entries
ansible.builtin.include_tasks: "setup_azure_data_locations.yml"
loop: "{{ __data_storage.locations.azure }}"
loop_control:
loop_var: __data_store_azure_location

- name: Process Azure role assignments for external data storage locations
azure.azcollection.azure_rm_roleassignment:
state: present
scope: "{{ __azure_data_location_assignment.scope }}"
name: "{{ __azure_data_location_assignment.name }}"
assignee_object_id: "{{ __azure_data_location_assignment.assignee }}"
role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_owner_uri) }}"
loop: "{{ __azure_data_location_storage_assignments }}"
loop_control:
loop_var: __azure_data_location_assignment
register: __data_azure_assignment_results
until: __data_azure_assignment_results is not failed
retries: 3
delay: 3
44 changes: 44 additions & 0 deletions roles/data/tasks/setup_azure_data_locations.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---

# Copyright 2021 Cloudera, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TODO: This can move to initialize, as it is shared between setup and teardown for Azure

# Outer loop variable: __data_storage
# Inner loop variable: __data_store_azure_location

- name: Resolve location-specific Resource Group URI
when: __data_store_azure_location.resource_group is defined
azure.azcollection.azure_rm_resourcegroup_info:
name: "{{ __data_store_azure_location.resource_group }}"
register: __azure_data_location_resource_metagroup_info

- name: Set fact Azure Resource Group URI
ansible.builtin.set_fact:
__azure_data_location_metagroup_uri: "{{ __azure_data_location_resource_metagroup_info.resourcegroups[0].id | default(data__azure_metagroup_uri) }}"

- name: Set fact Azure data storage assignment name
ansible.builtin.set_fact:
__azure_data_location_assignment_name: "{{ [data__namespace, __data_store.read_only | default(False) | ternary(data__azure_reader_name_suffix, data__azure_contributor_name_suffix), __data_store_azure_location.storage_account, __data_store_azure_location.container, __data_store_azure_location.assignment.azure.suffix | default(data__azure_assignment_suffix)] | join('-') }}"

- name: Set fact for Azure storage location Role Assignments
ansible.builtin.set_fact:
__azure_data_location_storage_assignments: "{{ __azure_data_location_storage_assignments | default([]) | union([entry]) }}"
vars:
entry:
name: "{{ __azure_data_location_assignment_name | to_uuid }}"
scope: "{{ __azure_data_location_metagroup_uri }}/providers/Microsoft.Storage/storageAccounts/{{ __data_store_azure_location.storage_account }}/blobServices/default/containers/{{ __data_store_azure_location.container }}"
assignee: "{{ __azure_datalakeadmin_identity.properties.principalId }}"
desc: external data location assignment
10 changes: 2 additions & 8 deletions roles/data/tasks/teardown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

- name: Tear down external storage location policies
ansible.builtin.include_tasks: "teardown_{{ data__infra_type }}_policies.yml"
loop: "{{ __data_storage_read_only | union(__data_storage_read_write) }}"
loop_control:
loop_var: __data_storage

- name: Tear down external storage location access roles
ansible.builtin.include_tasks: "teardown_{{ data__infra_type }}_roles.yml"
- name: Tear down external storage locations
ansible.builtin.include_tasks: "teardown_{{ data__infra_type }}.yml"
loop: "{{ __data_storage_read_only | union(__data_storage_read_write) }}"
loop_control:
loop_var: __data_storage
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---

# Copyright 2023 Cloudera, Inc. All Rights Reserved.
# Copyright 2021 Cloudera, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,12 +34,12 @@
__data_role_name: "{{ __data_storage.role.datalake_admin | default(True) | ternary(data__aws_datalake_admin_role_name, __data_role_name_default) }}"
__data_role_name_default: "{{ [data__namespace, data__external_data_suffix, __data_storage.role.suffix | default(data__aws_role_suffix)] | join('-') }}"

- name: Query the external data AWS role
- name: Query the external data AWS role
community.aws.iam_role_info:
region: "{{ data__region }}"
name: "{{ __data_aws_access_role_name }}"
register: __data_aws_access_role_info

- name: Set facts for existing managed policies for external data AWS role
when: __data_aws_access_role_info.iam_roles | length > 0
ansible.builtin.set_fact:
Expand Down Expand Up @@ -69,3 +69,19 @@
region: "{{ data__region }}"
policy_name: "{{ __data_aws_external_policy_name }}"
state: absent

- name: Tear down AWS external role
when: __data_storage.role.delete | default(data__teardown_deletes_roles)
block:
- name: Set fact for external data AWS role name
ansible.builtin.set_fact:
__data_aws_access_role_name: "{{ __data_storage.role.name | default(__data_role_name) }}"
vars:
__data_role_name: "{{ __data_storage.role.datalake_admin | default(True) | ternary(data__aws_datalake_admin_role_name, __data_role_name_default) }}"
__data_role_name_default: "{{ [data__namespace, data__external_data_suffix, __data_storage.role.suffix | default(data__aws_role_suffix)] | join('-') }}"

- name: Delete external data AWS role
community.aws.iam_role:
region: "{{ data__region }}"
name: "{{ __data_aws_access_role_name }}"
state: absent
33 changes: 0 additions & 33 deletions roles/data/tasks/teardown_aws_roles.yml

This file was deleted.

38 changes: 38 additions & 0 deletions roles/data/tasks/teardown_azure.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2021 Cloudera, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Outer loop variable: __data_storage

- name: Set fact for Azure data storage assignments
ansible.builtin.set_fact:
__azure_data_location_storage_assignments: []

- name: Prepare data storage location entries
ansible.builtin.include_tasks: "setup_azure_data_locations.yml"
loop: "{{ __data_storage.locations.azure }}"
loop_control:
loop_var: __data_store_azure_location

- name: Tear down Azure external role assignments
when: __data_storage.assignment.delete | default(data__teardown_deletes_assignments)
block:
- name: Process Azure role assignment removal for external data storage locations
azure.azcollection.azure_rm_roleassignment:
state: absent
scope: "{{ __azure_data_location_assignment.scope }}"
assignee_object_id: "{{ __azure_data_location_assignment.assignee }}"
role_definition_id: "{{ __data_store.read_only | default(False) | ternary(__azure_storage_blob_data_reader_uri, __azure_storage_blob_data_owner_uri) }}"
loop: "{{ __azure_data_location_storage_assignments }}"
loop_control:
loop_var: __azure_data_location_assignment
7 changes: 6 additions & 1 deletion roles/data/tasks/validate.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Copyright 2023 Cloudera, Inc. All Rights Reserved.
---

# Copyright 2021 Cloudera, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,3 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

- name: Validate external data storage locations
ansible.builtin.include_tasks: "validate_{{ data__infra_type }}.yml"
Loading
Loading