-
Notifications
You must be signed in to change notification settings - Fork 170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat!: migrating to TheLook Ecommerce dataset #257
Changes from 59 commits
eb68dcb
dd0e158
5831fa2
22271ea
dd4ac1d
d27e903
d8a45b4
9a559f8
cd6ad43
4b452cd
359148c
9f77d83
1eb414f
2277866
0fddd9d
2e94bb0
e0e76da
0fc4a22
7b63f3f
d7bc72d
ed0dd4a
8ffee33
559b426
7353572
afb5483
7e553ad
b3a8516
bf63e23
db37112
051c1f2
e6fba8e
98e55e4
4150cd0
dbf6672
8bdd2ed
c55a63d
bafe1db
6e9a7ed
3219110
6aa8f8e
562fd2e
e95dc23
667eeb9
765fa96
91c8923
990d813
ddcace7
4e77194
727ddf5
bbe9467
da0cf37
e14b5aa
292289f
592babc
3abcf76
83bd6f1
f13318c
3e585a2
87ec2ba
1f03801
7180b9b
747ffa9
37aa014
3faa38f
9b40a01
080b796
ec2527d
fb52bc6
781c1e4
9e7aa8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,8 @@ resource "google_bigquery_dataset" "ds_edw" { | |
location = var.region | ||
labels = var.labels | ||
delete_contents_on_destroy = var.force_destroy | ||
|
||
depends_on = [time_sleep.wait_after_apis] | ||
} | ||
|
||
# # Create a BigQuery connection | ||
|
@@ -33,6 +35,7 @@ resource "google_bigquery_connection" "ds_connection" { | |
location = var.region | ||
friendly_name = "Storage Bucket Connection" | ||
cloud_resource {} | ||
depends_on = [time_sleep.wait_after_apis] | ||
} | ||
|
||
# # Grant IAM access to the BigQuery Connection account for Cloud Storage | ||
|
@@ -48,22 +51,127 @@ resource "google_storage_bucket_iam_binding" "bq_connection_iam_object_viewer" { | |
] | ||
} | ||
|
||
# # Create a BigQuery external table | ||
resource "google_bigquery_table" "tbl_edw_taxi" { | ||
# # Create a Biglake table for events with metadata caching | ||
resource "google_bigquery_table" "tbl_edw_events" { | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
table_id = "events" | ||
project = module.project-services.project_id | ||
deletion_protection = var.deletion_protection | ||
# max_staleness = "1:0:0" | ||
|
||
schema = file("${path.module}/src/schema/events_schema.json") | ||
|
||
external_data_configuration { | ||
autodetect = true | ||
connection_id = google_bigquery_connection.ds_connection.name | ||
source_format = "PARQUET" | ||
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/events.parquet"] | ||
# metadata_cache_mode = "AUTOMATIC" | ||
} | ||
|
||
labels = var.labels | ||
|
||
depends_on = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can remove these since the solution calls out the resources specifically above (applies to other tables below). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cleaned up these dependencies, as well as those across all .tf files |
||
google_bigquery_connection.ds_connection, | ||
google_storage_bucket.raw_bucket, | ||
] | ||
} | ||
|
||
# # Create a Biglake table for inventory_items | ||
resource "google_bigquery_table" "tbl_edw_inventory_items" { | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
table_id = "taxi_trips" | ||
table_id = "inventory_items" | ||
project = module.project-services.project_id | ||
deletion_protection = var.deletion_protection | ||
# max_staleness = "1:0:0" | ||
|
||
schema = file("${path.module}/src/schema/inventory_items_schema.json") | ||
|
||
external_data_configuration { | ||
autodetect = true | ||
connection_id = "${module.project-services.project_id}.${var.region}.ds_connection" | ||
connection_id = google_bigquery_connection.ds_connection.name | ||
source_format = "PARQUET" | ||
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/new-york-taxi-trips/tlc-yellow-trips-2022/taxi-*.Parquet"] | ||
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/inventory_items.parquet"] | ||
# metadata_cache_mode = "AUTOMATIC" | ||
} | ||
|
||
labels = var.labels | ||
|
||
depends_on = [ | ||
google_bigquery_connection.ds_connection, | ||
google_storage_bucket.raw_bucket, | ||
] | ||
} | ||
|
||
# # Create a Biglake table with metadata caching for order_items | ||
resource "google_bigquery_table" "tbl_edw_order_items" { | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
table_id = "order_items" | ||
project = module.project-services.project_id | ||
deletion_protection = var.deletion_protection | ||
# max_staleness = "1:0:0" | ||
|
||
schema = file("${path.module}/src/schema/order_items_schema.json") | ||
|
||
external_data_configuration { | ||
autodetect = true | ||
connection_id = google_bigquery_connection.ds_connection.name | ||
source_format = "PARQUET" | ||
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/order_items.parquet"] | ||
# metadata_cache_mode = "AUTOMATIC" | ||
} | ||
|
||
labels = var.labels | ||
|
||
depends_on = [ | ||
google_bigquery_connection.ds_connection, | ||
google_storage_bucket.raw_bucket, | ||
] | ||
} | ||
|
||
# # Create a Biglake table for orders | ||
resource "google_bigquery_table" "tbl_edw_orders" { | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
table_id = "orders" | ||
project = module.project-services.project_id | ||
deletion_protection = var.deletion_protection | ||
# max_staleness = "1:0:0" | ||
|
||
schema = file("${path.module}/src/schema/orders_schema.json") | ||
|
||
external_data_configuration { | ||
autodetect = true | ||
connection_id = google_bigquery_connection.ds_connection.name | ||
source_format = "PARQUET" | ||
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/orders.parquet"] | ||
# metadata_cache_mode = "AUTOMATIC" | ||
} | ||
|
||
schema = file("${path.module}/src/taxi_trips_schema.json") | ||
labels = var.labels | ||
|
||
depends_on = [ | ||
google_bigquery_connection.ds_connection, | ||
google_storage_bucket.raw_bucket, | ||
] | ||
} | ||
|
||
# # Create a Biglake table for products | ||
resource "google_bigquery_table" "tbl_edw_products" { | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
table_id = "products" | ||
project = module.project-services.project_id | ||
deletion_protection = var.deletion_protection | ||
# max_staleness = "1:0:0" | ||
|
||
schema = file("${path.module}/src/schema/products_schema.json") | ||
|
||
external_data_configuration { | ||
autodetect = true | ||
connection_id = google_bigquery_connection.ds_connection.name | ||
source_format = "PARQUET" | ||
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/products.parquet"] | ||
# metadata_cache_mode = "AUTOMATIC" | ||
} | ||
|
||
labels = var.labels | ||
|
||
|
@@ -73,8 +181,33 @@ resource "google_bigquery_table" "tbl_edw_taxi" { | |
] | ||
} | ||
|
||
# # Create a Biglake table for products | ||
resource "google_bigquery_table" "tbl_edw_users" { | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
table_id = "users" | ||
project = module.project-services.project_id | ||
deletion_protection = var.deletion_protection | ||
# max_staleness = "1:0:0" | ||
|
||
schema = file("${path.module}/src/schema/users_schema.json") | ||
|
||
external_data_configuration { | ||
autodetect = true | ||
connection_id = google_bigquery_connection.ds_connection.name | ||
source_format = "PARQUET" | ||
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/users.parquet"] | ||
# metadata_cache_mode = "AUTOMATIC" | ||
} | ||
|
||
labels = var.labels | ||
depends_on = [ | ||
google_bigquery_connection.ds_connection, | ||
google_storage_bucket.raw_bucket, | ||
] | ||
} | ||
|
||
# Load Queries for Stored Procedure Execution | ||
# # Load Lookup Data Tables | ||
# # Load Distribution Center Lookup Data Tables | ||
resource "google_bigquery_routine" "sp_provision_lookup_tables" { | ||
project = module.project-services.project_id | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
|
@@ -88,9 +221,8 @@ resource "google_bigquery_routine" "sp_provision_lookup_tables" { | |
] | ||
} | ||
|
||
|
||
# # Add Looker Studio Data Report Procedure | ||
resource "google_bigquery_routine" "sproc_sp_demo_datastudio_report" { | ||
# Add Looker Studio Data Report Procedure | ||
resource "google_bigquery_routine" "sproc_sp_demo_lookerstudio_report" { | ||
project = module.project-services.project_id | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
routine_id = "sp_lookerstudio_report" | ||
|
@@ -99,7 +231,7 @@ resource "google_bigquery_routine" "sproc_sp_demo_datastudio_report" { | |
definition_body = templatefile("${path.module}/src/sql/sp_lookerstudio_report.sql", { project_id = module.project-services.project_id }) | ||
|
||
depends_on = [ | ||
google_bigquery_table.tbl_edw_taxi, | ||
google_bigquery_table.tbl_edw_inventory_items, | ||
davenportjw marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] | ||
} | ||
|
||
|
@@ -113,11 +245,12 @@ resource "google_bigquery_routine" "sp_sample_queries" { | |
definition_body = templatefile("${path.module}/src/sql/sp_sample_queries.sql", { project_id = module.project-services.project_id }) | ||
|
||
depends_on = [ | ||
google_bigquery_table.tbl_edw_taxi, | ||
google_bigquery_table.tbl_edw_inventory_items, | ||
] | ||
} | ||
|
||
# # Add Bigquery ML Model | ||
|
||
# Add Bigquery ML Model | ||
resource "google_bigquery_routine" "sp_bigqueryml_model" { | ||
project = module.project-services.project_id | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
|
@@ -127,21 +260,7 @@ resource "google_bigquery_routine" "sp_bigqueryml_model" { | |
definition_body = templatefile("${path.module}/src/sql/sp_bigqueryml_model.sql", { project_id = module.project-services.project_id }) | ||
|
||
depends_on = [ | ||
google_bigquery_table.tbl_edw_taxi, | ||
] | ||
} | ||
|
||
# # Add Translation Scripts | ||
resource "google_bigquery_routine" "sp_sample_translation_queries" { | ||
project = module.project-services.project_id | ||
dataset_id = google_bigquery_dataset.ds_edw.dataset_id | ||
routine_id = "sp_sample_translation_queries" | ||
routine_type = "PROCEDURE" | ||
language = "SQL" | ||
definition_body = templatefile("${path.module}/src/sql/sp_sample_translation_queries.sql", { project_id = module.project-services.project_id }) | ||
|
||
depends_on = [ | ||
google_bigquery_table.tbl_edw_taxi, | ||
google_bigquery_table.tbl_edw_inventory_items, | ||
] | ||
} | ||
|
||
|
@@ -151,6 +270,8 @@ resource "google_project_service_identity" "bigquery_data_transfer_sa" { | |
provider = google-beta | ||
project = module.project-services.project_id | ||
service = "bigquerydatatransfer.googleapis.com" | ||
|
||
depends_on = [time_sleep.wait_after_apis] | ||
} | ||
|
||
# # Grant the DTS service account access | ||
|
@@ -162,6 +283,8 @@ resource "google_project_iam_member" "dts_service_account_roles" { | |
project = module.project-services.project_id | ||
role = each.key | ||
member = "serviceAccount:${google_project_service_identity.bigquery_data_transfer_sa.email}" | ||
|
||
depends_on = [time_sleep.wait_after_apis] | ||
} | ||
|
||
# Create specific service account for DTS Run | ||
|
@@ -182,6 +305,8 @@ resource "google_project_iam_member" "dts_roles" { | |
project = module.project-services.project_id | ||
role = each.key | ||
member = "serviceAccount:${google_service_account.dts.email}" | ||
|
||
depends_on = [google_service_account.dts] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one isnt needed since you have an output attribute of DTS resource on 307 |
||
} | ||
|
||
# # Grant the DTS specific service account Token Creator to the DTS Service Identity | ||
|
@@ -194,6 +319,8 @@ resource "google_service_account_iam_binding" "dts_token_creator" { | |
|
||
depends_on = [ | ||
google_project_iam_member.dts_service_account_roles, | ||
google_service_account.dts, | ||
google_project_service_identity.bigquery_data_transfer_sa | ||
] | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if we arent planning to use these, I'd probably just remove them to keep the code simple.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I kept them in here because I thought it would be good for folks to see the syntax, but commented them out so that the Looker Studio report works immediately.