Skip to content

Commit

Permalink
Enables bias adjusted wind direction (#3149)
Browse files Browse the repository at this point in the history
  • Loading branch information
conbrad authored Oct 16, 2023
1 parent 46a86b5 commit 22ed18a
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 69 deletions.
5 changes: 3 additions & 2 deletions api/app/jobs/common_model_fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
delete_weather_station_model_predictions,
refresh_morecast2_materialized_view)
from app.weather_models.machine_learning import StationMachineLearning
from app.weather_models import ModelEnum, construct_interpolated_noon_prediction
from app.weather_models import SCALAR_MODEL_VALUE_KEYS, ModelEnum, construct_interpolated_noon_prediction
from app.schemas.stations import WeatherStation
from app import config, configure_logging
import app.utils.time as time_utils
Expand Down Expand Up @@ -362,7 +362,8 @@ def _process_model_run_for_station(self,
if (prev_prediction is not None
and prev_prediction.prediction_timestamp.hour == 18
and prediction.prediction_timestamp.hour == 21):
noon_prediction = construct_interpolated_noon_prediction(prev_prediction, prediction)
noon_prediction = construct_interpolated_noon_prediction(
prev_prediction, prediction, SCALAR_MODEL_VALUE_KEYS)
self._process_prediction(
noon_prediction, station, model_run, machine)
self._process_prediction(
Expand Down
12 changes: 11 additions & 1 deletion api/app/tests/weather_models/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,58 +14,68 @@ def get_actuals_left_outer_join_with_predictions(*args):
weather_date=datetime(2020, 10, 10, 18),
temperature=20,
temp_valid=True,
wind_direction=90,
relative_humidity=50,
rh_valid=True),
ModelRunPrediction(
ModelRunPrediction(
tmp_tgl_2=2,
rh_tgl_2=10,
apcp_sfc_0=2,
wdir_tgl_10=97,
prediction_timestamp=datetime(2020, 10, 10, 18))],
[HourlyActual(weather_date=datetime(2020, 10, 10, 19)), None],
[HourlyActual(weather_date=datetime(2020, 10, 10, 20),
temperature=25,
wind_direction=270,
temp_valid=True,
relative_humidity=70,
rh_valid=True), None],
[HourlyActual(
weather_date=datetime(2020, 10, 10, 21),
temperature=30,
temp_valid=True,
wind_direction=120,
relative_humidity=100,
rh_valid=True),
ModelRunPrediction(
tmp_tgl_2=1,
rh_tgl_2=20,
apcp_sfc_0=3,
wdir_tgl_10=101,
prediction_timestamp=datetime(2020, 10, 10, 21))],
# day 2
[HourlyActual(
weather_date=datetime(2020, 10, 11, 18),
temperature=20,
temp_valid=True,
wind_direction=121,
relative_humidity=50,
rh_valid=True),
ModelRunPrediction(
tmp_tgl_2=2,
rh_tgl_2=10,
apcp_sfc_0=2,
wdir_tgl_10=110,
prediction_timestamp=datetime(2020, 10, 11, 18))],
[HourlyActual(weather_date=datetime(2020, 10, 11, 19)), None],
[HourlyActual(weather_date=datetime(2020, 10, 11, 20),
temperature=27,
temp_valid=True,
wind_direction=98,
relative_humidity=60,
rh_valid=True), None],
[HourlyActual(
weather_date=datetime(2020, 10, 11, 21),
temperature=30,
wind_direction=118,
temp_valid=True,
relative_humidity=100,
rh_valid=True),
ModelRunPrediction(
tmp_tgl_2=1,
rh_tgl_2=20,
apcp_sfc_0=3,
wdir_tgl_10=111,
prediction_timestamp=datetime(2020, 10, 11, 21))]
]
return result
5 changes: 3 additions & 2 deletions api/app/weather_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def interpolate_wind_direction(prediction_a: ModelRunPrediction,


def construct_interpolated_noon_prediction(prediction_a: ModelRunPrediction,
prediction_b: ModelRunPrediction):
prediction_b: ModelRunPrediction,
model_keys):
""" Construct a noon prediction by interpolating.
"""
# create a noon prediction. (using utc hour 20, as that is solar noon in B.C.)
Expand All @@ -121,7 +122,7 @@ def construct_interpolated_noon_prediction(prediction_a: ModelRunPrediction,
timestamp_b = prediction_b.prediction_timestamp.timestamp()
noon_timestamp = noon_prediction.prediction_timestamp.timestamp()
# calculate interpolated values.
for key in SCALAR_MODEL_VALUE_KEYS:
for key in model_keys:
value_a = getattr(prediction_a, key)
value_b = getattr(prediction_b, key)
if value_a is None or value_b is None:
Expand Down
2 changes: 1 addition & 1 deletion api/app/weather_models/fetch/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ async def fetch_latest_model_run_predictions_by_station_code_and_date_range(sess
temperature=bias_adjusted_temp,
relative_humidity=bias_adjusted_rh,
wind_speed=bias_adjusted_wind_speed,
wind_dir=bias_adjusted_wdir
wind_direction=bias_adjusted_wdir
))
return post_process_fetched_predictions(results)

Expand Down
85 changes: 23 additions & 62 deletions api/app/weather_models/machine_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
from app.db.models.weather_models import (PredictionModel, ModelRunPrediction)
from app.db.models.observations import HourlyActual
from app.db.crud.observations import get_actuals_left_outer_join_with_predictions
from app.weather_models.regression_model import RegressionModelsV2
from app.weather_models.sample import Samples


logger = getLogger(__name__)

# Corresponding key values on HourlyActual and SampleCollection
SAMPLE_VALUE_KEYS = ('temperature', 'relative_humidity', 'wind_speed', 'wind_direction')
SAMPLE_VALUE_KEYS = ('temperature', 'relative_humidity', 'wind_speed')
# Number of days of historical actual data to learn from when training model
MAX_DAYS_TO_LEARN = 19

Expand All @@ -38,59 +40,12 @@ class RegressionModels:
"""

keys = ('temperature_wrapper', 'relative_humidity_wrapper',
'wind_speed_wrapper', 'wind_direction_wrapper')
'wind_speed_wrapper')

def __init__(self):
self.temperature_wrapper = LinearRegressionWrapper()
self.relative_humidity_wrapper = LinearRegressionWrapper()
self.wind_speed_wrapper = LinearRegressionWrapper()
self.wind_direction_wrapper = LinearRegressionWrapper()


class Samples:
""" Class for storing samples in buckets of hours.
e.g. a temperature sample consists of an x axis (predicted values) and a y axis (observed values) put
together in hour buckets.
"""

def __init__(self):
self._x = defaultdict(list)
self._y = defaultdict(list)

def hours(self):
""" Return all the hours used to bucket samples together. """
return self._x.keys()

def append_x(self, value, timestamp: datetime):
""" Append another predicted value. """
self._x[timestamp.hour].append(value)

def append_y(self, value, timestamp: datetime):
""" Append another observered values. """
self._y[timestamp.hour].append(value)

def np_x(self, hour):
""" Return numpy array of the predicted values, reshaped appropriately. """
return np.array(self._x[hour]).reshape((-1, 1))

def np_y(self, hour):
""" Return a numpy array of the observed values """
return np.array(self._y[hour])

def add_sample(self,
model_value: float,
actual_value: float,
timestamp: datetime,
model_key: str,
sample_key: str):
""" Add a sample, interpolating the model values spatially """
# Additional logging to assist with finding errors:
logger.info('adding sample for %s->%s with: model_values %s, actual_value: %s',
model_key, sample_key, model_value, actual_value)
# Add to the data we're going to learn from:
# Using two variables, the interpolated temperature value, and the hour of the day.
self.append_x(model_value, timestamp)
self.append_y(actual_value, timestamp)


class SampleCollection:
Expand All @@ -100,7 +55,6 @@ def __init__(self):
self.temperature = Samples()
self.relative_humidity = Samples()
self.wind_speed = Samples()
self.wind_direction = Samples()


class StationMachineLearning:
Expand All @@ -126,6 +80,7 @@ def __init__(self,
self.target_coordinate = target_coordinate
self.station_code = station_code
self.regression_models = defaultdict(RegressionModels)
self.regression_models_v2 = RegressionModelsV2()
self.max_learn_date = max_learn_date
# Maximum number of days to try to learn from. Experimentation has shown that
# about two weeks worth of data starts giving fairly good results compared to human forecasters.
Expand All @@ -152,12 +107,9 @@ def _add_sample_to_collection(self,
# are None.
logger.warning('no model value for %s->%s', model_key, sample_key)

def _collect_data(self):
def _collect_data(self, start_date: datetime):
""" Collect data to use for machine learning.
"""
# Calculate the date to start learning from.
start_date = self.max_learn_date - \
timedelta(days=self.max_days_to_learn)
# Create a convenient structure to store samples in.
sample_collection = SampleCollection()

Expand All @@ -177,7 +129,8 @@ def _collect_data(self):
and prev_prediction.prediction_timestamp.hour == 18):
# If there's a gap in the data (like with the GLOBAL model) - then make up
# a noon prediction using interpolation, and add it as a sample.
noon_prediction = construct_interpolated_noon_prediction(prev_prediction, prediction)
noon_prediction = construct_interpolated_noon_prediction(
prev_prediction, prediction, SCALAR_MODEL_VALUE_KEYS)
self._add_sample_to_collection(
noon_prediction, prev_actual, sample_collection)

Expand All @@ -190,8 +143,12 @@ def _collect_data(self):
def learn(self):
""" Collect data and perform linear regression.
"""
# Calculate the date to start learning from.
start_date = self.max_learn_date - \
timedelta(days=self.max_days_to_learn)

# collect data
data = self._collect_data()
data = self._collect_data(start_date)

# iterate through the data, creating a regression model for each variable
# and each hour.
Expand All @@ -206,6 +163,12 @@ def learn(self):
# how much sample data we actually had etc., and then not mark the model as being "good".
regression_model.good_model = True

# wdir specific using new structure for regression handling
query = get_actuals_left_outer_join_with_predictions(
self.session, self.model.id, self.station_code, start_date, self.max_learn_date)
self.regression_models_v2.collect_data(query)
self.regression_models_v2.train()

def predict_temperature(self, model_temperature: float, timestamp: datetime):
""" Predict the bias adjusted temperature for a given point in time, given a corresponding model
temperature.
Expand Down Expand Up @@ -255,9 +218,7 @@ def predict_wind_direction(self, model_wind_dir: int, timestamp: datetime):
: return: The bias-adjusted wind direction as predicted by the linear regression model.
"""
hour = timestamp.hour
if self.regression_models[hour].wind_direction_wrapper.good_model and model_wind_dir is not None:
predicted_wind_dir = self.regression_models[hour].wind_direction_wrapper.model.predict([[model_wind_dir]])[
0]
# a valid wind direction value is between 0 and 360. If the returned value is outside these bounds, correct it
return predicted_wind_dir % 360
return None
predicted_wind_dir = self.regression_models_v2._models[0].predict(hour, [[model_wind_dir]])
if predicted_wind_dir is None:
return None
return predicted_wind_dir % 360
Loading

0 comments on commit 22ed18a

Please sign in to comment.