Skip to content

Commit

Permalink
add dset basics raw + signals receiver check (et/somenergia-jardiner!5)
Browse files Browse the repository at this point in the history
* Add monthly inverters energy agg for tecnical dashboards

* finished obt, needs more testing

* finish obt (particularly dset) without omie

* dset miro pipeline, update plant names on seed, some best practices

* docs: afegir plantes

* WIP spine and obt

* fix tests of devices integrity

* add device uuid 1-to-1 test and other tests

* WIP device table with tests

* update singal_device_relation with device_parent

* signal to device with hierarchy csv seed

* fix models after signal_device_relation table changes

* WIP reciver check

* add dset basics raw + signals reciver check
  • Loading branch information
Roger Sanjaume authored and polmonso committed Sep 1, 2023
1 parent 73551e3 commit 608fd7b
Show file tree
Hide file tree
Showing 32 changed files with 1,165 additions and 47 deletions.
5 changes: 5 additions & 0 deletions dbt_jardiner/models/operational/dset/devices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{{ config(materialized='table') }}

select distinct plant, device, device_uuid, device_type, device_parent
from {{ ref('signal_device_relation_raw') }}
order by plant, device
36 changes: 36 additions & 0 deletions dbt_jardiner/models/operational/dset/dset_metrics_long_hourly.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{{ config(materialized='view') }}

{# inverter_energy resets between 1 and 2 +02, which gives us energy spikes.
We therefore set it to 0 between midnight and 3 since all plants are fotovoltaic #}

{# TODO we should pass the inverter_energy to incremental instead of accumulative
since this is ugly as fuck #}

with dset_key_metrics as (
select
date_trunc('hour', ts) as start_hour,
plant,
device_type,
metric as split_metric,
signal_value
from {{ ref('dset_values_incremental') }}
where metric in ('inverter_energy','irradiance','exported_energy')
)
select
start_hour,
plant,
device_type,
case when split_metric = 'irradiance' then 'irradiation' else split_metric end as metric,
case
when split_metric = 'inverter_energy' and device_type = 'inverter'
then (extract(hour from start_hour) > 3)::integer * (max(signal_value) - min(signal_value)) {# we have random-ish resets #}
when split_metric = 'irradiance' and device_type in ('sensor','module','inverter')
then avg(signal_value)
when split_metric = 'exported_energy' and device_type = 'meter'
then max(signal_value) - min(signal_value)
else NULL
end as metric_value
from dset_key_metrics
group by start_hour, plant, device_type, split_metric


Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{ config(materialized='view') }}

select
start_hour,
plant,
{{ pivot(column='metric', names=dbt_utils.get_column_values(table=ref('dset_metrics_long_hourly'), column='metric'), value_column='metric_value', agg='max') }}
from {{ ref('dset_metrics_long_hourly') }}
group by start_hour, plant
29 changes: 29 additions & 0 deletions dbt_jardiner/models/operational/dset/dset_responses_raw.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{ config(materialized='view') }}

with unnest_groups as (
SELECT
jsonb_array_elements(response)->>'group_name' as group_name,
jsonb_array_elements(response) as response
FROM {{ source('lake', 'dset_responses') }}
), filter_dummy_group as (
SELECT *
FROM unnest_groups
WHERE group_name != 'SOMENERGIA'
)
SELECT
group_name,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_id')::text as signal_id,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_tz')::text as signal_tz,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_code')::text as signal_code,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_type')::text as signal_type,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_unit')::text as signal_unit,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_last_ts')::timestamp at time zone
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_tz')::text as signal_last_ts,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_frequency')::text as signal_frequency,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_is_virtual')::text as signal_is_virtual,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_last_value')::numeric as signal_last_value,
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_description')::uuid as signal_uuid,
(jsonb_array_elements(jsonb_array_elements((response)::jsonb#>'{signals}')#>'{data}')->>'ts')::timestamp at time zone
(jsonb_array_elements((response)::jsonb#>'{signals}')->>'signal_tz')::text as ts,
(jsonb_array_elements(jsonb_array_elements((response)::jsonb#>'{signals}')#>'{data}')->>'value')::numeric as signal_value
FROM filter_dummy_group
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{{ config(materialized='view') }}

{#select {{ dbt_utils.star(from=ref('dset_responses_raw'), except=['signal_uuid'] }}#}

SELECT
metadata.plant,
metadata.signal,
metadata.metric,
metadata.device,
metadata.device_type,
metadata.device_uuid,
metadata.device_parent,
metadata.signal_uuid,
valors.group_name,
valors.signal_id,
valors.signal_tz,
valors.signal_code,
valors.signal_type,
valors.signal_unit,
valors.signal_last_ts,
valors.signal_frequency,
valors.signal_is_virtual,
valors.signal_last_value,
valors.ts,
valors.signal_value as signal_value
FROM {{ ref('signal_device_relation_raw') }} AS metadata
LEFT JOIN {{ ref('dset_responses_raw') }} AS valors USING(signal_uuid)
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{{ config(materialized='view') }}

with valors as (
select True as rebut_from_dset, signal_uuid
from {{ ref('dset_responses_raw') }}
where ts = (select max(ts) from {{ ref('dset_responses_raw') }})
)
select
signals.plant,
signals.signal,
signals.signal_uuid,
signals.device,
signals.device_type,
signals.device_uuid,
coalesce(rebut_from_dset, False) as rebut_from_dset
from {{ref('signal_device_relation_raw')}} as signals
left join valors on signals.signal_uuid = valors.signal_uuid
order by plant, signal
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{{ config(materialized='view') }}

with inverters_energy as (
select
date_trunc('hour', ts) as start_hour,
plant,
device,
metric,
signal_unit,
signal_value
from {{ ref('dset_values_incremental') }}
where device_type in ('inverter') and metric = 'inverter_energy'
), production_hourly as (
select
start_hour,
plant,
device,
signal_unit,
(extract(hour from start_hour) > 3)::integer * (max(signal_value) - min(signal_value)) as inverter_energy
from inverters_energy
group by start_hour, plant, device, metric, signal_unit
), singal_unit_standardization as (
select
start_hour,
plant,
device,
signal_unit,
case
when signal_unit = 'MWh' then round(inverter_energy,3)
when signal_unit = 'kWh' then round(inverter_energy/1000,3)
else NULL
end as inverter_energy_MWh
from production_hourly
), production_monthly as (
select
date_trunc('month', start_hour) as month_date,
plant,
device,
sum(inverter_energy_MWh) as inverter_energy_MWh
from singal_unit_standardization
group by date_trunc('month', start_hour), device, plant

)
select * from production_monthly
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{ config(materialized='incremental') }}

select * from {{ ref('dset_responses_with_signal_metadata') }}

{% if is_incremental() %}
where ts >= coalesce((select max(ts) from {{ this }}), '1900-01-01')
{% endif %}

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{{ config(materialized='table') }}

select *
from {{ ref('dset_responses_with_signal_metadata') }}
where signal_last_ts > (current_date - interval '30 days')
21 changes: 0 additions & 21 deletions dbt_jardiner/models/operational/dset/lake_dset_raw.sql

This file was deleted.

54 changes: 54 additions & 0 deletions dbt_jardiner/models/operational/dset/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
version: 2

models:
- name: lake_dset_raw
description: Conté les lectures crues que venen de la ingesta
columns:
- name: signal_description
description: |
Conté el tipus de senyal al que fa referència la lectura
- Etotal: Sum d'energia acumulada des de l'inici dels temps
- Eayer: Suma d'energia acumulada el dia d'ahir només
- Ediaria: Suma d'energia acumulada avui fins al moment de la lectura
És a dir, trobem aquestes equivalències
- Etotal@avui - Etotal@ahir = Ediaria@ahir
- Eayer@avui es aproximadament Ediaria@ahir a les 23:55
- name: dset_responses_raw
description: Conté les lectures crues que venen de la ingesta. Consulta la doc del model dset_responses (el source) per més info.

- name: dset_signals_receiver_status
description: >
Model temporal que comprova quins senyals de dset estem rebent respecte el maping que hem enviat.
Agafem l'últim batch rebut i el leftjoinem a la taula signal_device_relation amb un petit processat que ens indica
si hem rebut aquell senyal amb la columna rebut_from_dset.
columns:
- name: rebut_from_dset
description: Booleà que indica si s'ha rebut o no en l'últim batch.

- name: devices
description: >
Taula amb la llista de devices, els seus uuids i si tenen un parent.
columns:
- name: device_uuid
tests:
- unique


- name: test_device_uuid_repeted
description: >
Taula amb els devices_uuids que es repeteixen en més d'un device, entenent per device una combinació
única de plant, device, device_type i device_parent.
config:
severity: error

- name: test_to_debug_device_uuid_errors
description: >
Taula que recull tots els signals que comparteixen un devide_uuid erroni. Ha de servir per identificar
el motiu pel qual un device_uuid no es unique per un device, entenent per device una convinació
única de plant, device, device_type i device_parent.
config:
severity: error

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{{ config(materialized='view') }}

SELECT
plant,
signal,
metric,
device,
device_type,
device_uuid,
device_parent,
signal_uuid
FROM {{ ref('signal_device_relation') }}
15 changes: 15 additions & 0 deletions dbt_jardiner/models/operational/external_data/forecasts_best.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{ config(materialized='view') }}

{# TODO check that this selects the previous day or the same day forecast #}

with forecasts_denormalized as (
select * from {{ref('forecasts_denormalized')}}
)
select distinct on(plant_id, "time")
forecastdate,
"time" - INTERVAL '1 hour' as start_hour,
plant_id,
plant_name as plant,
energy_kwh
from forecasts_denormalized fd
order by plant_id, "time" desc, forecastdate desc
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ with forecast_denormalized as (
plant.id as plant_id,
plant.name as plant_name,
forecastmetadata.forecastdate as forecastdate,
forecast.percentil50/1000.0 as energy_kwh,
TRUE
ROUND(forecast.percentil50/1000.0,2) as energy_kwh
FROM {{source('plantmonitordb','forecast')}}
LEFT JOIN {{source('plantmonitordb','forecastmetadata')}} ON forecastmetadata.id = forecast.forecastmetadata
LEFT JOIN {{source('plantmonitordb','plant')}} ON plant.id = forecastmetadata.plant
Expand Down
6 changes: 6 additions & 0 deletions dbt_jardiner/models/operational/external_data/omie_raw.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{{ config(materialized='view') }}

SELECT
date as start_hour,
price
FROM {{source('plantlake','omie_historical_price_hour')}}
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

select
time,
plant as plant_id,
request_time as request_time,
global_horizontal_irradiation_wh_m2 as horizontal_irradiation_wh_m2,
global_tilted_irradiation_wh_m2 as tilted_irradiation_wh_m2,
module_temperature_dc as module_temperature_dc,
photovoltaic_energy_output_wh as energy_output_kwh,
TRUE
from {{ source('plantmonitordb','satellite_readings') }}
sg.plant_id,
plant_name,
request_time,
horizontal_irradiation_wh_m2,
tilted_irradiation_wh_m2,
module_temperature_dc,
energy_output_kwh
from {{ ref('satellite_readings_raw') }} sg
left join {{ref('som_plants_raw')}} p on p.plant_id = sg.plant_id

-- SolarGis PVOUT (aquí photovoltaic_energy_output_wh) retorna l'energia en kwh però plantmonitor per error ho registra com a wh sense fer cap transformació.
-- Entenem que al redash s'està corregint a mà abans de mostrar el valor.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{ config(materialized='view') }}

select
time,
plant as plant_id,
request_time as request_time,
global_horizontal_irradiation_wh_m2 as horizontal_irradiation_wh_m2,
global_tilted_irradiation_wh_m2 as tilted_irradiation_wh_m2,
module_temperature_dc as module_temperature_dc,
photovoltaic_energy_output_wh as energy_output_kwh
from {{ source('plantmonitordb','satellite_readings') }} sg

-- SolarGis PVOUT (aquí photovoltaic_energy_output_wh) retorna l'energia en kwh però plantmonitor per error ho registra com a wh sense fer cap transformació.
-- Entenem que al redash s'està corregint a mà abans de mostrar el valor.
-- Aquí canviem el nom perquè s'ajusti a la realitat del valor.
12 changes: 12 additions & 0 deletions dbt_jardiner/models/operational/external_data/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: 2

models:
- name: forecast_best
description: energy forecast of the previous day
tests:
- dbt_utils.unique_combination_of_columns:
config:
severity: warn
combination_of_columns:
- forecast_date
- plant
Loading

0 comments on commit 608fd7b

Please sign in to comment.