From c63e128c3e4b5f4b0520dc6c2a3ec302198fccb5 Mon Sep 17 00:00:00 2001 From: Diego Quintana Date: Thu, 9 May 2024 21:34:39 +0200 Subject: [PATCH] new: add mhcil transformations in dbt to datamarts (et/somenergia-jardiner!38) * ref: increase error margin in dbt test this need fixing in another branch * fix: bad param in dbt config oops * ref: disable incremental model since it's not finished * new: add select permissions to user mercat_electric * fix: remove extra comma in last cte * chore: update style * new: add file traces to mhcil files from airbyte metadata * new: create incremental model based on surrogate key * new: change rank order to always increasing and set combined rank * change: update models and add best ranked row * new: add ranked mhcil file to dbt lineage * change: lint model using sqlfluff * new: add intermediate mhcil model with transformed data filename and version parsed from airbyte file_url. When using nested files, value should be also split with / but now is not the case * change: update mhcil raw model name to reflect it only has PEN data * change: move dbt raw model to /raw instead of /int * dev: update dbt dependencies --- dbt_jardiner/dbt_project.yml | 9 +++ .../intermediate/simel/_simel__sources.yaml | 9 --- ...mel_mhcil__pen_best_ranked_incremental.sql | 47 +++++++++++++++ .../int_simel_mhcil__pen_best_ranked_view.sql | 28 +++++++++ .../simel/int_simel_mhcil__pen_extended.sql | 59 +++++++++++++++++++ .../models/jardiner/raw/simel/.gitkeep | 0 .../simel/_simel__models.yaml | 2 +- .../jardiner/raw/simel/_simel__sources.yaml | 10 ++++ .../simel/raw_airbyte_simel__mhcil_pen.sql} | 0 .../seeds/jardiner/_jardiner__seeds.yaml | 9 +++ .../seed_simel_mhcil_rankings__firmeza.csv | 5 ++ .../seed_simel_mhcil_rankings__maturity.csv | 6 ++ .../test_dset_signals_receiver_last_hour.sql | 2 +- 13 files changed, 175 insertions(+), 11 deletions(-) delete mode 100644 dbt_jardiner/models/jardiner/intermediate/simel/_simel__sources.yaml create mode 100644 dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_incremental.sql create mode 100644 dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_view.sql create mode 100644 dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_extended.sql create mode 100644 dbt_jardiner/models/jardiner/raw/simel/.gitkeep rename dbt_jardiner/models/jardiner/{intermediate => raw}/simel/_simel__models.yaml (98%) create mode 100644 dbt_jardiner/models/jardiner/raw/simel/_simel__sources.yaml rename dbt_jardiner/models/jardiner/{intermediate/simel/raw_airbyte_simel__simel_mhcil_nas.sql => raw/simel/raw_airbyte_simel__mhcil_pen.sql} (100%) create mode 100644 dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__firmeza.csv create mode 100644 dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__maturity.csv diff --git a/dbt_jardiner/dbt_project.yml b/dbt_jardiner/dbt_project.yml index 2d8c01f9..47b2e2d7 100644 --- a/dbt_jardiner/dbt_project.yml +++ b/dbt_jardiner/dbt_project.yml @@ -40,16 +40,25 @@ models: node_color: "#d17f4c" dset: +tags: dset + simel: + +grants: + select: ['mercat_electric'] intermediate: +docs: node_color: "#d05808" dset: +tags: dset + simel: + +grants: + select: ['mercat_electric'] marts: +docs: node_color: "goldenrod" dset: +tags: dset + simel: + +grants: + select: ['mercat_electric'] observability: +docs: node_color: "#c3c557" diff --git a/dbt_jardiner/models/jardiner/intermediate/simel/_simel__sources.yaml b/dbt_jardiner/models/jardiner/intermediate/simel/_simel__sources.yaml deleted file mode 100644 index 2f40f3db..00000000 --- a/dbt_jardiner/models/jardiner/intermediate/simel/_simel__sources.yaml +++ /dev/null @@ -1,9 +0,0 @@ ---- -version: 2 - -sources: - - name: airbyte_simel - schema: airbyte_imported - tables: - - name: simel_mhcil_nas - diff --git a/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_incremental.sql b/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_incremental.sql new file mode 100644 index 00000000..39cb5928 --- /dev/null +++ b/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_incremental.sql @@ -0,0 +1,47 @@ +{{ + config( + enabled=false, + materialized="incremental", + incremental_strategy="append", + unique_key="surrogate_key", + ) +}} + + +{% if not is_incremental() %} + + select {{ dbt_utils.star(from=ref("int_simel_mhcil__pen_best_ranked_view")) }} + from {{ ref("int_simel_mhcil__pen_best_ranked_view") }} + +{% else %} + + with existing as ( + select + surrogate_key, + total_rank, + ranked_at + from {{ this }} + ), + + incoming as ( + select {{ dbt_utils.star(from=ref("int_simel_mhcil__pen_best_ranked_view")) }} + from {{ ref("int_simel_mhcil__pen_best_ranked_view") }} + ), + + new_rankings as ( + select + incoming.surrogate_key, + incoming.total_rank, + incoming.ranked_at + from existing + left join incoming + on existing.surrogate_key = incoming.surrogate_key + where + existing.total_rank < incoming.total_rank + and existing.ranked_at < incoming.ranked_at + ) + + select * + from new_rankings + +{% endif %} diff --git a/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_view.sql b/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_view.sql new file mode 100644 index 00000000..a70ea87f --- /dev/null +++ b/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_best_ranked_view.sql @@ -0,0 +1,28 @@ +{{ config(materialized="view") }} + +with ranked as ( + select + *, + extract(epoch from start_at) + file_version + firmeza_rank + maturity_rank as total_rank + from {{ ref("int_simel_mhcil__pen_extended") }} +), + +ordered as ( + select + *, + row_number() over ( + partition by cil, start_at + order by total_rank + ) as row_order + from ranked +), + +best_ranked as ( + select + *, + now() as ranked_at, + {{ dbt_utils.generate_surrogate_key(["cil", "start_at"]) }} as surrogate_key + from ordered where row_order = 1 +) + +select * from best_ranked diff --git a/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_extended.sql b/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_extended.sql new file mode 100644 index 00000000..59d34eb0 --- /dev/null +++ b/dbt_jardiner/models/jardiner/intermediate/simel/int_simel_mhcil__pen_extended.sql @@ -0,0 +1,59 @@ +{{ config(materialized="view") }} + + +with timestamp_crafted as ( + select + *, + make_timestamp( + cast("year" as int), cast("month" as int), cast("day" as int), cast("hour" as int), 0, 0 + ) as measurement_timestamp + from {{ ref("raw_airbyte_simel__mhcil_pen") }} +), + +timestamp_localized as ( + select + *, + case + when is_summer + then measurement_timestamp at time zone 'CEST' + else measurement_timestamp at time zone 'CET' + end as measurement_timestamptz + from timestamp_crafted +), + +extended_mhcil as ( + select + cil, + measurement_timestamptz as start_at, + hour, + is_summer, + energy_kwh, + reactive_energy_2_kvarh, + reactive_energy_3_kvarh, + measurement_type, + _ab_source_file_url as file_name, + _airbyte_normalized_at as ingested_at, + cast(_ab_source_file_last_modified as timestamptz) as file_last_modified_at, + cast(split_part(_ab_source_file_url, '.', 2) as int) as file_version, + cast(split_part(_ab_source_file_url, '_', 1) as varchar(5)) as file_type, -- fecha a la que corresponden los datos + cast(split_part(_ab_source_file_url, '_', 2) as varchar(2)) as release_period, -- tipo de fichero según simel + cast(split_part(_ab_source_file_url, '_', 3) as varchar(4)) as member_code, -- periodo de publicación + cast(split_part(_ab_source_file_url, '_', 4) as varchar(2)) as file_receiver_type, -- código de participante + to_date(right(split_part(_ab_source_file_url, '.', 1), 8), 'YYYYMMDD') as file_date -- tipo de receptor de fichero + from timestamp_localized +), + +ranked as ( + select + exmhcil.*, + cast(firmeza_ranking.rank as int) as firmeza_rank, + cast(maturity_ranking.rank as int) as maturity_rank + from extended_mhcil as exmhcil + left join {{ ref("seed_simel_mhcil_rankings__firmeza") }} as firmeza_ranking + on exmhcil.measurement_type = firmeza_ranking.name + left join {{ ref("seed_simel_mhcil_rankings__maturity") }} as maturity_ranking + on exmhcil.release_period = maturity_ranking.name +) + +select * +from ranked diff --git a/dbt_jardiner/models/jardiner/raw/simel/.gitkeep b/dbt_jardiner/models/jardiner/raw/simel/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/dbt_jardiner/models/jardiner/intermediate/simel/_simel__models.yaml b/dbt_jardiner/models/jardiner/raw/simel/_simel__models.yaml similarity index 98% rename from dbt_jardiner/models/jardiner/intermediate/simel/_simel__models.yaml rename to dbt_jardiner/models/jardiner/raw/simel/_simel__models.yaml index c0e66314..d6719712 100644 --- a/dbt_jardiner/models/jardiner/intermediate/simel/_simel__models.yaml +++ b/dbt_jardiner/models/jardiner/raw/simel/_simel__models.yaml @@ -2,7 +2,7 @@ version: 2 models: - - name: raw_airbyte_simel__simel_mhcil_nas + - name: raw_airbyte_simel__mhcil_pen columns: - name: cil description: A - Código CIL (25 characters) diff --git a/dbt_jardiner/models/jardiner/raw/simel/_simel__sources.yaml b/dbt_jardiner/models/jardiner/raw/simel/_simel__sources.yaml new file mode 100644 index 00000000..dedee063 --- /dev/null +++ b/dbt_jardiner/models/jardiner/raw/simel/_simel__sources.yaml @@ -0,0 +1,10 @@ +--- +version: 2 + +sources: + - name: airbyte_simel + schema: airbyte_imported + tables: + - name: simel_mhcil_nas + description: Dades des de SIMEL sincronitzades des de minio.somenergia.coop. Només conté dades de sistema peninsular (PEN) + diff --git a/dbt_jardiner/models/jardiner/intermediate/simel/raw_airbyte_simel__simel_mhcil_nas.sql b/dbt_jardiner/models/jardiner/raw/simel/raw_airbyte_simel__mhcil_pen.sql similarity index 100% rename from dbt_jardiner/models/jardiner/intermediate/simel/raw_airbyte_simel__simel_mhcil_nas.sql rename to dbt_jardiner/models/jardiner/raw/simel/raw_airbyte_simel__mhcil_pen.sql diff --git a/dbt_jardiner/seeds/jardiner/_jardiner__seeds.yaml b/dbt_jardiner/seeds/jardiner/_jardiner__seeds.yaml index e69de29b..24245cc2 100644 --- a/dbt_jardiner/seeds/jardiner/_jardiner__seeds.yaml +++ b/dbt_jardiner/seeds/jardiner/_jardiner__seeds.yaml @@ -0,0 +1,9 @@ +version: 2 + +seeds: + - name: seed_simel_file_rankings__firmeza + description: | + Seed que genera un ranking dels fitxers segons la seva firmeza i el document SIMEL_Ficheros_Intercambio_Informacion_v40_Oct2022.pdf + - name: seed_simel_file_rankings__maturity + description: | + Seed que genera un ranking dels fitxers segons la seva maduresa i el document SIMEL_Ficheros_Intercambio_Informacion_v40_Oct2022.pdf diff --git a/dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__firmeza.csv b/dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__firmeza.csv new file mode 100644 index 00000000..d5fdb466 --- /dev/null +++ b/dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__firmeza.csv @@ -0,0 +1,5 @@ +id,name,rank,description +1,R,4,Real con reparto por potencia +2,L,3,Estimación con reparto por potencia +3,E,2,Real con reparto por energía individualizada de CIL +4,M,1,Estimación con reparto por energía individualizada de CI diff --git a/dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__maturity.csv b/dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__maturity.csv new file mode 100644 index 00000000..93f73511 --- /dev/null +++ b/dbt_jardiner/seeds/jardiner/seed_simel_mhcil_rankings__maturity.csv @@ -0,0 +1,6 @@ +id,name,rank,description +1,HD,1,valores de cierre diario +2,H2,2,valores de cierre de mes m-1 +3,H3,3,valores de cierre de mes m-3 +4,HP,4,valores de cierre provisional +5,HC,5,valores de cierre definitivo diff --git a/dbt_jardiner/tests/dset/test_dset_signals_receiver_last_hour.sql b/dbt_jardiner/tests/dset/test_dset_signals_receiver_last_hour.sql index fc430df3..4d16aaa7 100644 --- a/dbt_jardiner/tests/dset/test_dset_signals_receiver_last_hour.sql +++ b/dbt_jardiner/tests/dset/test_dset_signals_receiver_last_hour.sql @@ -1,4 +1,4 @@ -{{ config(error_if=">500") }} +{{ config(error_if=">1000") }} {# error limit is set on half the number of signal uuids available #} with