Skip to content

Commit

Permalink
[v5.0.0] Add query acceleration costs, incrementalize cost per query,…
Browse files Browse the repository at this point in the history
… account for new service type (#141)

* Lotta tings

* Fix typos

* Add changelog

* Update desc

* Make sqlfluff happy
  • Loading branch information
ian-whitestone authored Jan 14, 2024
1 parent 4533476 commit b69f913
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 34 deletions.
19 changes: 19 additions & 0 deletions .changes/5.0.0.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## dbt-snowflake-monitoring 5.0.0 - January 14, 2024

### Features

- Add query acceleration costs and update cost per query algorithm to include them ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
- Make cost_per_query model incremental ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))

### Breaking Changes

- Add entity_id to stg_metering_history ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))

To upgrade from 4.x.x, you'll need to full refresh the `stg_metering_history` model.

### Fixes

- Support quoting: true ([#139](https://github.com/get-select/dbt-snowflake-monitoring/pull/139))

### Contributors
- [@ernestoongaro](https://github.com/ernestoongaro) (Fixes)
6 changes: 0 additions & 6 deletions .changes/unreleased/Fixes-20231130-140709.yaml

This file was deleted.

21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html),
and is generated by [Changie](https://github.com/miniscruff/changie).

## dbt-snowflake-monitoring 5.0.0 - January 14, 2024

### Features

- Add query acceleration costs and update cost per query algorithm to include them ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))
- Make cost_per_query model incremental ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))

### Breaking Changes

- Add entity_id to stg_metering_history ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141))

To upgrade from 4.x.x, you'll need to full refresh the `stg_metering_history` model.

### Fixes

- Support quoting: true ([#139](https://github.com/get-select/dbt-snowflake-monitoring/pull/139))

### Contributors
- [@ernestoongaro](https://github.com/ernestoongaro) (Fixes)


## dbt-snowflake-monitoring 4.6.0 - November 09, 2023

### Features
Expand Down
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'dbt_snowflake_monitoring'
version: '4.6.0'
version: '5.0.0'
config-version: 2

profile: dbt_snowflake_monitoring
Expand Down
53 changes: 41 additions & 12 deletions models/cost_per_query.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
{{ config(materialized='table') }}
{{ config(
materialized='incremental',
unique_key=['query_id', 'start_time'],
) }}

with
stop_threshold as (
Expand All @@ -21,9 +24,15 @@ filtered_queries as (
start_time
) as execution_start_time,
start_time,
end_time
end_time,
query_acceleration_bytes_scanned
from {{ ref('stg_query_history') }}
where end_time <= (select latest_ts from stop_threshold)
where true
and end_time <= (select latest_ts from stop_threshold)
{% if is_incremental() %}
-- account for late arriving queries
and end_time > (select coalesce(dateadd(day, -3, max(end_time)), '1970-01-01') from {{ this }})
{% endif %}
),

hours_list as (
Expand All @@ -34,7 +43,12 @@ hours_list as (
dateadd('day', '+1', current_date::timestamp_tz)
) as hour_start,
dateadd('hour', '+1', hour_start) as hour_end

{% if is_incremental() %}
from table(generator(rowcount => (24 * 7)))
{% else %}
from table(generator(rowcount => (24 * 730)))
{% endif %}
),

-- 1 row per hour a query ran
Expand All @@ -56,25 +70,32 @@ query_seconds_per_hour as (
datediff('millisecond', greatest(execution_start_time, hour_start), least(end_time, hour_end)) as num_milliseconds_query_ran,
sum(num_milliseconds_query_ran) over (partition by warehouse_id, hour_start) as total_query_milliseconds_in_hour,
div0(num_milliseconds_query_ran, total_query_milliseconds_in_hour) as fraction_of_total_query_time_in_hour,
sum(query_acceleration_bytes_scanned) over (partition by warehouse_id, hour_start) as total_query_acceleration_bytes_scanned_in_hour,
div0(query_acceleration_bytes_scanned, total_query_acceleration_bytes_scanned_in_hour) as fraction_of_total_query_acceleration_bytes_scanned_in_hour,
hour_start as hour
from query_hours
),

credits_billed_hourly as (
select
start_time as hour,
warehouse_id,
credits_used_compute,
credits_used_cloud_services
from {{ ref('stg_warehouse_metering_history') }}
entity_id as warehouse_id,
sum(iff(service_type = 'WAREHOUSE_METERING', credits_used_compute, 0)) as credits_used_compute,
sum(iff(service_type = 'WAREHOUSE_METERING', credits_used_cloud_services, 0)) as credits_used_cloud_services,
sum(iff(service_type = 'QUERY_ACCELERATION', credits_used_compute, 0)) as credits_used_query_acceleration
from {{ ref('stg_metering_history') }}
where true
and service_type in ('QUERY_ACCELERATION', 'WAREHOUSE_METERING')
group by 1, 2
),

query_cost as (
select
query_seconds_per_hour.*,
credits_billed_hourly.credits_used_compute * daily_rates.effective_rate as actual_warehouse_cost,
credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour * daily_rates.effective_rate as allocated_compute_cost_in_hour,
credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour as allocated_compute_credits_in_hour
credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour as allocated_compute_credits_in_hour,
allocated_compute_credits_in_hour * daily_rates.effective_rate as allocated_compute_cost_in_hour,
credits_billed_hourly.credits_used_query_acceleration * query_seconds_per_hour.fraction_of_total_query_acceleration_bytes_scanned_in_hour as allocated_query_acceleration_credits_in_hour,
allocated_query_acceleration_credits_in_hour * daily_rates.effective_rate as allocated_query_acceleration_cost_in_hour
from query_seconds_per_hour
inner join credits_billed_hourly
on query_seconds_per_hour.warehouse_id = credits_billed_hourly.warehouse_id
Expand All @@ -93,6 +114,8 @@ cost_per_query as (
any_value(execution_start_time) as execution_start_time,
sum(allocated_compute_cost_in_hour) as compute_cost,
sum(allocated_compute_credits_in_hour) as compute_credits,
sum(allocated_query_acceleration_cost_in_hour) as query_acceleration_cost,
sum(allocated_query_acceleration_credits_in_hour) as query_acceleration_credits,
any_value(credits_used_cloud_services) as credits_used_cloud_services,
any_value(ran_on_warehouse) as ran_on_warehouse
from query_cost
Expand All @@ -117,6 +140,8 @@ all_queries as (
execution_start_time,
compute_cost,
compute_credits,
query_acceleration_cost,
query_acceleration_credits,
credits_used_cloud_services,
ran_on_warehouse
from cost_per_query
Expand All @@ -130,6 +155,8 @@ all_queries as (
execution_start_time,
0 as compute_cost,
0 as compute_credits,
0 as query_acceleration_cost,
0 as query_acceleration_credits,
credits_used_cloud_services,
ran_on_warehouse
from filtered_queries
Expand All @@ -144,14 +171,16 @@ select
all_queries.execution_start_time,
all_queries.compute_cost,
all_queries.compute_credits,
all_queries.query_acceleration_cost,
all_queries.query_acceleration_credits,
-- For the most recent day, which is not yet complete, this calculation won't be perfect.
-- For example, at 12PM on the latest day, it's possible that cloud credits make up <10% of compute cost, so the queries
-- from that day are not allocated any cloud_services_cost. The next time the model runs, after we have the full day of data,
-- this may change if cloud credits make up >10% of compute cost.
(div0(all_queries.credits_used_cloud_services, credits_billed_daily.daily_credits_used_cloud_services) * credits_billed_daily.daily_billable_cloud_services) * coalesce(daily_rates.effective_rate, current_rates.effective_rate) as cloud_services_cost,
div0(all_queries.credits_used_cloud_services, credits_billed_daily.daily_credits_used_cloud_services) * credits_billed_daily.daily_billable_cloud_services as cloud_services_credits,
all_queries.compute_cost + cloud_services_cost as query_cost,
all_queries.compute_credits + cloud_services_credits as query_credits,
all_queries.compute_cost + all_queries.query_acceleration_cost + cloud_services_cost as query_cost,
all_queries.compute_credits + all_queries.query_acceleration_credits + cloud_services_credits as query_credits,
all_queries.ran_on_warehouse,
coalesce(daily_rates.currency, current_rates.currency) as currency
from all_queries
Expand Down
4 changes: 2 additions & 2 deletions models/dbt_queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ select
from {{ ref('query_history_enriched') }}
where dbt_metadata is not null
{% if is_incremental() %}
-- Conservatively re-process the last 7 days to account for late arriving rates data
-- Conservatively re-process the last 3 days to account for late arriving rates data
-- which changes the cost per query
and end_time > (select dateadd(day, -7, max(end_time)) from {{ this }})
and end_time > (select dateadd(day, -3, max(end_time)) from {{ this }})
{% endif %}
2 changes: 1 addition & 1 deletion models/hourly_spend.sql
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ query_acceleration_spend_hourly as (
hours.hour,
'Query Acceleration' as service,
null as storage_type,
null as warehouse_name,
stg_metering_history.name as warehouse_name,
null as database_name,
coalesce(
sum(
Expand Down
10 changes: 6 additions & 4 deletions models/query_history_enriched.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,28 @@ query_history as (
from {{ ref('stg_query_history') }}

{% if is_incremental() %}
-- Conservatively re-process the last 7 days to account for late arriving rates data
-- Conservatively re-process the last 3 days to account for late arriving rates data
-- which changes the cost per query
where end_time > (select dateadd(day, -7, max(end_time)) from {{ this }})
where end_time > (select dateadd(day, -3, max(end_time)) from {{ this }})
{% endif %}
),

cost_per_query as (
select *
from {{ ref('cost_per_query') }}
{% if is_incremental() %}
-- Conservatively re-process the last 7 days to account for late arriving rates data
-- Conservatively re-process the last 3 days to account for late arriving rates data
-- which changes the cost per query
where end_time > (select dateadd(day, -7, max(end_time)) from {{ this }})
where end_time > (select dateadd(day, -3, max(end_time)) from {{ this }})
{% endif %}
)

select
cost_per_query.query_id,
cost_per_query.compute_cost,
cost_per_query.compute_credits,
cost_per_query.query_acceleration_cost,
cost_per_query.query_acceleration_credits,
cost_per_query.cloud_services_cost,
cost_per_query.cloud_services_credits,
cost_per_query.query_cost,
Expand Down
15 changes: 10 additions & 5 deletions models/staging/stg_metering_history.sql
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
{{ config(materialized='incremental') }}
{{ config(
materialized='incremental',
unique_key=['service_type', 'start_time', 'entity_id'],
) }}

select
name,
credits_used_compute,
service_type,
start_time,
end_time,
service_type,
entity_id,
name,
credits_used_compute,
credits_used_cloud_services,
credits_used
from {{ source('snowflake_account_usage', 'metering_history') }}

{% if is_incremental() %}
where end_time > (select max(end_time) from {{ this }})
-- account for changing metering data
where end_time > (select coalesce(dateadd(day, -7, max(end_time)), '1970-01-01') from {{ this }})
{% endif %}

order by start_time asc
7 changes: 6 additions & 1 deletion models/staging/stg_rate_sheet_daily.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ select
usage_type,
currency,
effective_rate,
service_type
case
-- Have only seen this on one account. Normally it is COMPUTE, and all our downstream models rely on that
-- May adjust this in the future if Snowflake is permanently changing these fields for all accounts
when service_type = 'WAREHOUSE_METERING' then 'COMPUTE'
else service_type
end as service_type
from {{ source('snowflake_organization_usage', 'rate_sheet_daily') }}
order by date
8 changes: 6 additions & 2 deletions models/staging/stg_warehouse_metering_history.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
{{ config(materialized='incremental') }}
{{ config(
materialized='incremental',
unique_key=['start_time', 'warehouse_id'],
) }}

select
start_time,
Expand All @@ -11,7 +14,8 @@ select
from {{ source('snowflake_account_usage', 'warehouse_metering_history') }}

{% if is_incremental() %}
where end_time > (select max(end_time) from {{ this }})
-- account for changing metering data
where end_time > (select coalesce(dateadd(day, -7, max(end_time)), '1970-01-01') from {{ this }})
{% endif %}

order by start_time

0 comments on commit b69f913

Please sign in to comment.