[v5.0.0] Add query acceleration costs, incrementalize cost per query,…

… account for new service type (#141) * Lotta tings * Fix typos * Add changelog * Update desc * Make sqlfluff happy
get-select · Jan 14, 2024 · b69f913 · b69f913
1 parent 4533476
commit b69f913
Show file tree

Hide file tree

Showing 11 changed files with 113 additions and 34 deletions.
diff --git a/.changes/5.0.0.md b/.changes/5.0.0.md
@@ -0,0 +1,19 @@
+## dbt-snowflake-monitoring 5.0.0 - January 14, 2024
+
+### Features
+
+- Add query acceleration costs and update cost per query algorithm to include them ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
+- Make cost_per_query model incremental ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
+
+### Breaking Changes
+
+- Add entity_id to stg_metering_history ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
+
+To upgrade from 4.x.x, you'll need to full refresh the `stg_metering_history` model.
+
+### Fixes
+
+- Support quoting: true ([#139](https://github.com/get-select/dbt-snowflake-monitoring/pull/139))
+
+### Contributors
+- [@ernestoongaro](https://github.com/ernestoongaro) (Fixes)
diff --git a/.changes/unreleased/Fixes-20231130-140709.yaml b/.changes/unreleased/Fixes-20231130-140709.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html),
 and is generated by [Changie](https://github.com/miniscruff/changie).
 
+## dbt-snowflake-monitoring 5.0.0 - January 14, 2024
+
+### Features
+
+- Add query acceleration costs and update cost per query algorithm to include them ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
+- Make cost_per_query model incremental ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
+
+### Breaking Changes
+
+- Add entity_id to stg_metering_history ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
+
+To upgrade from 4.x.x, you'll need to full refresh the `stg_metering_history` model.
+
+### Fixes
+
+- Support quoting: true ([#139](https://github.com/get-select/dbt-snowflake-monitoring/pull/139))
+
+### Contributors
+- [@ernestoongaro](https://github.com/ernestoongaro) (Fixes)
+
+
 ## dbt-snowflake-monitoring 4.6.0 - November 09, 2023
 
 ### Features

diff --git a/dbt_project.yml b/dbt_project.yml
@@ -1,5 +1,5 @@
 name: 'dbt_snowflake_monitoring'
-version: '4.6.0'
+version: '5.0.0'
 config-version: 2
 
 profile: dbt_snowflake_monitoring

diff --git a/models/cost_per_query.sql b/models/cost_per_query.sql
@@ -1,4 +1,7 @@
-{{ config(materialized='table') }}
+{{ config(
+    materialized='incremental',
+    unique_key=['query_id', 'start_time'],
+) }}
 
 with
 stop_threshold as (
@@ -21,9 +24,15 @@ filtered_queries as (
             start_time
         ) as execution_start_time,
         start_time,
-        end_time
+        end_time,
+        query_acceleration_bytes_scanned
     from {{ ref('stg_query_history') }}
-    where end_time <= (select latest_ts from stop_threshold)
+    where true
+        and end_time <= (select latest_ts from stop_threshold)
+        {% if is_incremental() %}
+        -- account for late arriving queries
+        and end_time > (select coalesce(dateadd(day, -3, max(end_time)), '1970-01-01') from {{ this }})
+        {% endif %}
 ),
 
 hours_list as (
@@ -34,7 +43,12 @@ hours_list as (
             dateadd('day', '+1', current_date::timestamp_tz)
         ) as hour_start,
         dateadd('hour', '+1', hour_start) as hour_end
+
+    {% if is_incremental() %}
+    from table(generator(rowcount => (24 * 7)))
+    {% else %}
     from table(generator(rowcount => (24 * 730)))
+    {% endif %}
 ),
 
 -- 1 row per hour a query ran
@@ -56,25 +70,32 @@ query_seconds_per_hour as (
         datediff('millisecond', greatest(execution_start_time, hour_start), least(end_time, hour_end)) as num_milliseconds_query_ran,
         sum(num_milliseconds_query_ran) over (partition by warehouse_id, hour_start) as total_query_milliseconds_in_hour,
         div0(num_milliseconds_query_ran, total_query_milliseconds_in_hour) as fraction_of_total_query_time_in_hour,
+        sum(query_acceleration_bytes_scanned) over (partition by warehouse_id, hour_start) as total_query_acceleration_bytes_scanned_in_hour,
+        div0(query_acceleration_bytes_scanned, total_query_acceleration_bytes_scanned_in_hour) as fraction_of_total_query_acceleration_bytes_scanned_in_hour,
         hour_start as hour
     from query_hours
 ),
 
 credits_billed_hourly as (
     select
         start_time as hour,
-        warehouse_id,
-        credits_used_compute,
-        credits_used_cloud_services
-    from {{ ref('stg_warehouse_metering_history') }}
+        entity_id as warehouse_id,
+        sum(iff(service_type = 'WAREHOUSE_METERING', credits_used_compute, 0)) as credits_used_compute,
+        sum(iff(service_type = 'WAREHOUSE_METERING', credits_used_cloud_services, 0)) as credits_used_cloud_services,
+        sum(iff(service_type = 'QUERY_ACCELERATION', credits_used_compute, 0)) as credits_used_query_acceleration
+    from {{ ref('stg_metering_history') }}
+    where true
+        and service_type in ('QUERY_ACCELERATION', 'WAREHOUSE_METERING')
+    group by 1, 2
 ),
 
 query_cost as (
     select
         query_seconds_per_hour.*,
-        credits_billed_hourly.credits_used_compute * daily_rates.effective_rate as actual_warehouse_cost,
-        credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour * daily_rates.effective_rate as allocated_compute_cost_in_hour,
-        credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour as allocated_compute_credits_in_hour
+        credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour as allocated_compute_credits_in_hour,
+        allocated_compute_credits_in_hour * daily_rates.effective_rate as allocated_compute_cost_in_hour,
+        credits_billed_hourly.credits_used_query_acceleration * query_seconds_per_hour.fraction_of_total_query_acceleration_bytes_scanned_in_hour as allocated_query_acceleration_credits_in_hour,
+        allocated_query_acceleration_credits_in_hour * daily_rates.effective_rate as allocated_query_acceleration_cost_in_hour
     from query_seconds_per_hour
     inner join credits_billed_hourly
         on query_seconds_per_hour.warehouse_id = credits_billed_hourly.warehouse_id
@@ -93,6 +114,8 @@ cost_per_query as (
         any_value(execution_start_time) as execution_start_time,
         sum(allocated_compute_cost_in_hour) as compute_cost,
         sum(allocated_compute_credits_in_hour) as compute_credits,
+        sum(allocated_query_acceleration_cost_in_hour) as query_acceleration_cost,
+        sum(allocated_query_acceleration_credits_in_hour) as query_acceleration_credits,
         any_value(credits_used_cloud_services) as credits_used_cloud_services,
         any_value(ran_on_warehouse) as ran_on_warehouse
     from query_cost
@@ -117,6 +140,8 @@ all_queries as (
         execution_start_time,
         compute_cost,
         compute_credits,
+        query_acceleration_cost,
+        query_acceleration_credits,
         credits_used_cloud_services,
         ran_on_warehouse
     from cost_per_query
@@ -130,6 +155,8 @@ all_queries as (
         execution_start_time,
         0 as compute_cost,
         0 as compute_credits,
+        0 as query_acceleration_cost,
+        0 as query_acceleration_credits,
         credits_used_cloud_services,
         ran_on_warehouse
     from filtered_queries
@@ -144,14 +171,16 @@ select
     all_queries.execution_start_time,
     all_queries.compute_cost,
     all_queries.compute_credits,
+    all_queries.query_acceleration_cost,
+    all_queries.query_acceleration_credits,
     -- For the most recent day, which is not yet complete, this calculation won't be perfect.
     -- For example, at 12PM on the latest day, it's possible that cloud credits make up <10% of compute cost, so the queries
     -- from that day are not allocated any cloud_services_cost. The next time the model runs, after we have the full day of data,
     -- this may change if cloud credits make up >10% of compute cost.
     (div0(all_queries.credits_used_cloud_services, credits_billed_daily.daily_credits_used_cloud_services) * credits_billed_daily.daily_billable_cloud_services) * coalesce(daily_rates.effective_rate, current_rates.effective_rate) as cloud_services_cost,
     div0(all_queries.credits_used_cloud_services, credits_billed_daily.daily_credits_used_cloud_services) * credits_billed_daily.daily_billable_cloud_services as cloud_services_credits,
-    all_queries.compute_cost + cloud_services_cost as query_cost,
-    all_queries.compute_credits + cloud_services_credits as query_credits,
+    all_queries.compute_cost + all_queries.query_acceleration_cost + cloud_services_cost as query_cost,
+    all_queries.compute_credits + all_queries.query_acceleration_credits + cloud_services_credits as query_credits,
     all_queries.ran_on_warehouse,
     coalesce(daily_rates.currency, current_rates.currency) as currency
 from all_queries

diff --git a/models/dbt_queries.sql b/models/dbt_queries.sql
@@ -51,7 +51,7 @@ select
 from {{ ref('query_history_enriched') }}
 where dbt_metadata is not null
     {% if is_incremental() %}
-        -- Conservatively re-process the last 7 days to account for late arriving rates data
+        -- Conservatively re-process the last 3 days to account for late arriving rates data
         -- which changes the cost per query
-        and end_time > (select dateadd(day, -7, max(end_time)) from {{ this }})
+        and end_time > (select dateadd(day, -3, max(end_time)) from {{ this }})
     {% endif %}
diff --git a/models/hourly_spend.sql b/models/hourly_spend.sql
@@ -365,7 +365,7 @@ query_acceleration_spend_hourly as (
         hours.hour,
         'Query Acceleration' as service,
         null as storage_type,
-        null as warehouse_name,
+        stg_metering_history.name as warehouse_name,
         null as database_name,
         coalesce(
             sum(

diff --git a/models/query_history_enriched.sql b/models/query_history_enriched.sql
@@ -24,26 +24,28 @@ query_history as (
     from {{ ref('stg_query_history') }}
 
     {% if is_incremental() %}
-        -- Conservatively re-process the last 7 days to account for late arriving rates data
+        -- Conservatively re-process the last 3 days to account for late arriving rates data
         -- which changes the cost per query
-        where end_time > (select dateadd(day, -7, max(end_time)) from {{ this }})
+        where end_time > (select dateadd(day, -3, max(end_time)) from {{ this }})
     {% endif %}
 ),
 
 cost_per_query as (
     select *
     from {{ ref('cost_per_query') }}
     {% if is_incremental() %}
-        -- Conservatively re-process the last 7 days to account for late arriving rates data
+        -- Conservatively re-process the last 3 days to account for late arriving rates data
         -- which changes the cost per query
-        where end_time > (select dateadd(day, -7, max(end_time)) from {{ this }})
+        where end_time > (select dateadd(day, -3, max(end_time)) from {{ this }})
     {% endif %}
 )
 
 select
     cost_per_query.query_id,
     cost_per_query.compute_cost,
     cost_per_query.compute_credits,
+    cost_per_query.query_acceleration_cost,
+    cost_per_query.query_acceleration_credits,
     cost_per_query.cloud_services_cost,
     cost_per_query.cloud_services_credits,
     cost_per_query.query_cost,

diff --git a/models/staging/stg_metering_history.sql b/models/staging/stg_metering_history.sql
@@ -1,17 +1,22 @@
-{{ config(materialized='incremental') }}
+{{ config(
+    materialized='incremental', 
+    unique_key=['service_type', 'start_time', 'entity_id'],
+) }}
 
 select
-    name,
-    credits_used_compute,
+    service_type,
     start_time,
     end_time,
-    service_type,
+    entity_id,
+    name,
+    credits_used_compute,
     credits_used_cloud_services,
     credits_used
 from {{ source('snowflake_account_usage', 'metering_history') }}
 
 {% if is_incremental() %}
-    where end_time > (select max(end_time) from {{ this }})
+    -- account for changing metering data
+    where end_time > (select coalesce(dateadd(day, -7, max(end_time)), '1970-01-01') from {{ this }})
 {% endif %}
 
 order by start_time asc
diff --git a/models/staging/stg_rate_sheet_daily.sql b/models/staging/stg_rate_sheet_daily.sql
@@ -12,6 +12,11 @@ select
     usage_type,
     currency,
     effective_rate,
-    service_type
+    case
+        -- Have only seen this on one account. Normally it is COMPUTE, and all our downstream models rely on that
+        -- May adjust this in the future if Snowflake is permanently changing these fields for all accounts
+        when service_type = 'WAREHOUSE_METERING' then 'COMPUTE'
+        else service_type
+    end as service_type
 from {{ source('snowflake_organization_usage', 'rate_sheet_daily') }}
 order by date
diff --git a/models/staging/stg_warehouse_metering_history.sql b/models/staging/stg_warehouse_metering_history.sql
@@ -1,4 +1,7 @@
-{{ config(materialized='incremental') }}
+{{ config(
+    materialized='incremental', 
+    unique_key=['start_time', 'warehouse_id'],
+) }}
 
 select
     start_time,
@@ -11,7 +14,8 @@ select
 from {{ source('snowflake_account_usage', 'warehouse_metering_history') }}
 
 {% if is_incremental() %}
-    where end_time > (select max(end_time) from {{ this }})
+    -- account for changing metering data
+    where end_time > (select coalesce(dateadd(day, -7, max(end_time)), '1970-01-01') from {{ this }})
 {% endif %}
 
 order by start_time