Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sandbox #79

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ require-dbt-version: [">=1.0.0", "<2.0.0"]

models:
jaffle_shop:
materialized: table
staging:

materialized: table
staging:
materialized: view
airbnb:
raw:
materialized: view
schema: airbnb


14 changes: 14 additions & 0 deletions macros/generate_schema_name.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{% macro generate_schema_name(custom_schema_name, node) -%}

{%- set default_schema = target.schema -%}
{%- if custom_schema_name is none -%}

{{ default_schema }}

{%- else -%}

{{ custom_schema_name | trim }}

{%- endif -%}

{%- endmacro %}
7 changes: 7 additions & 0 deletions macros/not_null_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{% macro no_nulls_in_columns(model) %}
SELECT * FROM {{ model }} WHERE
{% for col in adapter.get_columns_in_relation(model) -%}
{{ col.column }} IS NULL OR
{% endfor %}
FALSE
{% endmacro %}
20 changes: 20 additions & 0 deletions models/airbnb/marts/dim_listings_with_hosts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

with dim_listings_cleansed as (
select * from {{ ref('stg_listings')}}
),
dim_hosts_cleansed as (
select * from {{ ref('stg_hosts')}}
)
SELECT l.listing_id,
l.listing_name,
l.room_type,
l.minimum_nights,
l.price,
h.host_id,
h.created_at,
h.host_name,
h.is_superhost
from dim_listings_cleansed l
LEFT JOIN
dim_hosts_cleansed h
on l.host_id =h.host_id
16 changes: 16 additions & 0 deletions models/airbnb/marts/fct_reviews.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{
config(
materialized = 'incremental',
on_schema_change='fail'
)
}}
WITH stg_reviews AS (
SELECT * FROM {{ ref('stg_reviews') }}
)
SELECT
*
FROM stg_reviews
WHERE review_text is not null
{% if is_incremental() %}
AND review_date > (select max(review_date) from {{ this }})
{% endif %}
10 changes: 10 additions & 0 deletions models/airbnb/raw/src_airbnb.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: 2

sources:
- name: airbnb
database: user_pchauhan
schema: airbnb
tables:
- name: raw_hosts
- name: raw_listings
- name: raw_reviews
24 changes: 24 additions & 0 deletions models/airbnb/raw/src_hosts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
with source as (

{#-
Normally we would select from the table here, but we are using seeds to load
our data in this project
#}
select * from {{ source('airbnb','raw_hosts') }}

),

renamed as (

select
id as host_id,
name as host_name,
is_superhost,
created_at,
updated_at

from source

)

select * from renamed
26 changes: 26 additions & 0 deletions models/airbnb/raw/src_listings.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
with source as (

{#-
Normally we would select from the table here, but we are using seeds to load
our data in this project
#}
select * from {{ source('airbnb','raw_listings') }}

),

renamed as (

select
id as listing_id,
listing_url,
name as listing_name,
room_type,
minimum_nights,
host_id,
price as price_str,
CREATED_AT,
updated_at
from source
)

select * from renamed
23 changes: 23 additions & 0 deletions models/airbnb/raw/src_reviews.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
with source as (

{#-
Normally we would select from the table here, but we are using seeds to load
our data in this project
#}
select * from {{ source('airbnb','raw_reviews') }}

),

renamed as (

select
LISTING_ID,
DATE as review_date,
REVIEWER_NAME,
COMMENTS as review_text,
SENTIMENT as review_sentiment

from source
)

select * from renamed
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ with source as (
Normally we would select from the table here, but we are using seeds to load
our data in this project
#}
select * from {{ ref('raw_customers') }}
select * from {{ ref('customers1') }}

),

renamed as (

select
id as customer_id,
customer_id,
first_name,
last_name

Expand Down
10 changes: 10 additions & 0 deletions models/airbnb/staging/stg_hosts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@


with cte as (
select * from {{ ref('src_hosts')}}
)
select
host_id, NVL(host_name, 'Anonymous') as host_name ,
IS_SUPERHOST,CREATED_AT,UPDATED_AT,
current_timestamp as staged_at
from cte
11 changes: 11 additions & 0 deletions models/airbnb/staging/stg_listings.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

with cte as (
select * from {{ ref('src_listings')}}
)
select
listing_id, listing_name, room_type, host_id,
case when minimum_nights=0 then 1
when minimum_nights >1 then minimum_nights end as minimum_nights,
cast(replace(price_str,'$','') as decimal) as price ,
CREATED_AT,UPDATED_AT
,current_timestamp as staged_at from cte
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ with source as (
Normally we would select from the table here, but we are using seeds to load
our data in this project
#}
select * from {{ ref('raw_orders') }}
select * from user_pchauhan.jaffle_shop.orders

),

renamed as (

select
id as order_id,
user_id as customer_id,
order_id,
customer_id,
order_date,
status

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ with source as (
Normally we would select from the table here, but we are using seeds to load
our data in this project
#}
select * from {{ ref('raw_payments') }}
select * from {{ source('stripe','payment') }}

),

renamed as (

select
id as payment_id,
order_id,
payment_method,
orderid as order_id,
paymentmethod as payment_method ,

-- `amount` is currently stored in cents, so we convert it to dollars
amount / 100 as amount
Expand Down
9 changes: 9 additions & 0 deletions models/airbnb/staging/stg_reviews.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@



with cte as (
select * from {{ ref('src_reviews')}}
)
SELECT listing_id, review_date, reviewer_name, review_text,
review_sentiment ,
current_timestamp as staged_at from cte
58 changes: 22 additions & 36 deletions models/customers.sql
Original file line number Diff line number Diff line change
@@ -1,47 +1,38 @@
with customers as (

select * from {{ ref('stg_customers') }}
select
customer_id,
first_name,
last_name

from {{ ref('stg_customers') }}

),

orders as (

select * from {{ ref('stg_orders') }}

),

payments as (
select
order_id,
customer_id,
order_date,
status

select * from {{ ref('stg_payments') }}
from user_pchauhan.jaffle_shop.orders

),

customer_orders as (

select
select
customer_id,

min(order_date) as first_order,
max(order_date) as most_recent_order,
min(order_date) as first_order_date,
max(order_date) as most_recent_order_date,
count(order_id) as number_of_orders
from orders

group by customer_id

),

customer_payments as (

select
orders.customer_id,
sum(amount) as total_amount

from payments

left join orders on
payments.order_id = orders.order_id
from orders

group by orders.customer_id
group by 1

),

Expand All @@ -51,19 +42,14 @@ final as (
customers.customer_id,
customers.first_name,
customers.last_name,
customer_orders.first_order,
customer_orders.most_recent_order,
customer_orders.number_of_orders,
customer_payments.total_amount as customer_lifetime_value
customer_orders.first_order_date,
customer_orders.most_recent_order_date,
coalesce(customer_orders.number_of_orders, 0) as number_of_orders

from customers

left join customer_orders
on customers.customer_id = customer_orders.customer_id

left join customer_payments
on customers.customer_id = customer_payments.customer_id
left join customer_orders using (customer_id)

)

select * from final
select * from final
Loading