diff --git a/README.md b/README.md
index 78bec70..386c6b9 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ ForecastPFN is more accurate and faster compared to state-of-the-art forecasting
The codebase has these parts:
- `./src/` contains all code to replicate the ForecastPFN synthetic data generation and training procedure
-- `./benchmark/` contains all the code to replicate the benchmark of ForecastPFN against the the other baselines.
+- `./benchmark/` contains all the code to replicate the benchmark of ForecastPFN against the other baselines.
# Table of contents
1. [Installation](#installation-)
@@ -80,7 +80,7 @@ The arguments that are passed are:
See how our model performs:
![alt text](img/fpfn_performance.png?raw=true)
-The above figure shows analysis of performance vs. train budget, aggregated across datasets and prediction lengths. We plot the number of total MSE wins (left) where a higher value is better and mean MSE rank (right) where a lower values is better. Error bars show one standard deviation across training runs. ForecastPFN and Meta-N-BEATS are disadvantaged in these comparisons given that they see no training data for these series, only the length 36 input.
+The above figure shows an analysis of performance vs. train budget, aggregated across datasets and prediction lengths. We plot the number of total MSE wins (left) where a higher value is better and mean MSE rank (right) where a lower value is better. Error bars show one standard deviation across training runs. ForecastPFN and Meta-N-BEATS are disadvantaged in these comparisons given that they see no training data for these series, only the length 36 input.
# Synthetic Data Generation
ForecastPFN is completely trained on synthetic data.
diff --git a/benchmark/.DS_Store b/benchmark/.DS_Store
deleted file mode 100644
index d6bded0..0000000
Binary files a/benchmark/.DS_Store and /dev/null differ
diff --git a/benchmark/README.md b/benchmark/README.md
index 7b051d3..6ec8da7 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -1,4 +1,4 @@
-This directory is for evaluation of ForecastPFN. We have evaluated ForecastPFN on seven real-world datasets which have been used in the literature. The datasets are in the `../academic_data` folder. The datasets include Illness, Exchange, ECL, ETTh1 and ETTh2, Weather and Traffic.
+This directory is for the evaluation of ForecastPFN. We have evaluated ForecastPFN on seven real-world datasets that have been used in the literature. The datasets are in the `../academic_data` folder. The datasets include Illness, Exchange, ECL, ETTh1 and ETTh2, Weather and Traffic.
The evaluation has been done against multiple baselines which include Arima, Prophet, Informer, Fedformer-w, Autoformer, Transformer and Metalearn, as well as more simple baselines Mean, Last, and NaiveSeasonal.
@@ -24,12 +24,12 @@ The arguments that are passed are:
- `root_path` : This denotes the parent directory which contains the required dataset.
- `data_path` : This denotes the name of the file which contains the data. Look into the academic_data folder for information regarding other dataset files.
- `model` : This is one of (ForecastPFN, Metalearn, Arima, Autoformer, Informer, Transformer, FEDformer-w, Prophet)
-- `seq_len` : The length of input sequence to be used. In our default setting, we have this set to 96 for exchange and 36 for all other datasets.
+- `seq_len` : The length of the input sequence to be used. In our default setting, we have this set to 96 for exchange and 36 for all other datasets.
- `label_len` : In our default setting, we have this set to 48 for exchange and 18 for all other datasets.
-- `pred_len` : This is the length of prediction to be made. We have evaluated our model with various prediction lengths.
-- `train_budget` : This denotes the number of training examples that are available to the models which they can use for training. ForecastPFN and Metalearn use 0 examples since they are zero-shot.
+- `pred_len` : This is the length of the prediction to be made. We have evaluated our model with various prediction lengths.
+- `train_budget` : This denotes the number of training examples that are available to the models that they can use for training. ForecastPFN and Metalearn use 0 examples since they are zero-shot.
- `itr` : Number of times evaluation should be repeated. This affects the transformer-based models since they are non-deterministic.
All experiments that have been run for this paper can be found in `run.sh`.
-Repliaction of the paper tables and plots can be found in the jupyter notebook `./analyze_results.ipynb`.
\ No newline at end of file
+Replication of the paper tables and plots can be found in the jupyter notebook `./analyze_results.ipynb`.
\ No newline at end of file
diff --git a/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py b/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py
index cdbc5aa..e87ab68 100644
--- a/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py
+++ b/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py
@@ -1,17 +1,17 @@
import numpy as np
-import pandas as pd
-import datetime
+
class UnivariateTimeseriesSampler_WithStamps:
- def __init__(self,
- timeseries: np.ndarray,
- time_stamps: np.ndarray,
- insample_size: int,
- outsample_size: int,
- window_sampling_limit: int,
- batch_size: int,
- time_features,
- ):
+ def __init__(
+ self,
+ timeseries: np.ndarray,
+ time_stamps: np.ndarray,
+ insample_size: int,
+ outsample_size: int,
+ window_sampling_limit: int,
+ batch_size: int,
+ time_features,
+ ):
self.timeseries = [ts for ts in timeseries]
self.time_stamps = [ts for ts in time_stamps]
self.window_sampling_limit = window_sampling_limit
@@ -20,7 +20,6 @@ def __init__(self,
self.outsample_size = outsample_size
self.time_features = time_features
self.time_embedding_dim = self.time_features(self.time_stamps[0]).T.shape[0]
-
def __iter__(self):
while True:
@@ -28,47 +27,79 @@ def __iter__(self):
insample_mask = np.zeros((self.batch_size, self.insample_size))
outsample = np.zeros((self.batch_size, self.outsample_size))
outsample_mask = np.zeros((self.batch_size, self.outsample_size))
- sampled_ts_indices = np.random.randint(len(self.timeseries), size=self.batch_size)
+ sampled_ts_indices = np.random.randint(
+ len(self.timeseries), size=self.batch_size
+ )
insample_time_stamps = np.zeros(
- (self.batch_size, self.insample_size, self.time_embedding_dim), dtype=object)
+ (self.batch_size, self.insample_size, self.time_embedding_dim),
+ dtype=object,
+ )
outsample_time_stamps = np.zeros(
- (self.batch_size, self.outsample_size, self.time_embedding_dim), dtype=object)
+ (self.batch_size, self.outsample_size, self.time_embedding_dim),
+ dtype=object,
+ )
for i, sampled_index in enumerate(sampled_ts_indices):
sampled_timeseries = self.timeseries[sampled_index]
- cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit),
- high=len(sampled_timeseries),
- size=1)[0]
+ cut_point = np.random.randint(
+ low=max(1, len(sampled_timeseries) - self.window_sampling_limit),
+ high=len(sampled_timeseries),
+ size=1,
+ )[0]
- insample_window = sampled_timeseries[max(0, cut_point - self.insample_size):cut_point]
- insample[i, -len(insample_window):] = insample_window
- insample_mask[i, -len(insample_window):] = 1.0
+ insample_window = sampled_timeseries[
+ max(0, cut_point - self.insample_size) : cut_point
+ ]
+ insample[i, -len(insample_window) :] = insample_window
+ insample_mask[i, -len(insample_window) :] = 1.0
outsample_window = sampled_timeseries[
- cut_point:min(len(sampled_timeseries), cut_point + self.outsample_size)]
- outsample[i, :len(outsample_window)] = outsample_window
- outsample_mask[i, :len(outsample_window)] = 1.0
+ cut_point : min(
+ len(sampled_timeseries), cut_point + self.outsample_size
+ )
+ ]
+ outsample[i, : len(outsample_window)] = outsample_window
+ outsample_mask[i, : len(outsample_window)] = 1.0
sampled_timestamps = self.time_stamps[sampled_index]
- insample_window_time_stamps = sampled_timestamps[max(0, cut_point - self.insample_size):cut_point]
- insample_time_stamps[i, -len(insample_window_time_stamps):] = self.time_features(insample_window_time_stamps)
+ insample_window_time_stamps = sampled_timestamps[
+ max(0, cut_point - self.insample_size) : cut_point
+ ]
+ insample_time_stamps[
+ i, -len(insample_window_time_stamps) :
+ ] = self.time_features(insample_window_time_stamps)
outsample_window_timestamps = sampled_timestamps[
- cut_point:min(len(sampled_timestamps), cut_point + self.outsample_size)]
- outsample_time_stamps[i, :len(outsample_window_timestamps)] = self.time_features(outsample_window_timestamps)
- yield insample, insample_mask, outsample, outsample_mask, insample_time_stamps, outsample_time_stamps
+ cut_point : min(
+ len(sampled_timestamps), cut_point + self.outsample_size
+ )
+ ]
+ outsample_time_stamps[
+ i, : len(outsample_window_timestamps)
+ ] = self.time_features(outsample_window_timestamps)
+ yield (
+ insample,
+ insample_mask,
+ outsample,
+ outsample_mask,
+ insample_time_stamps,
+ outsample_time_stamps,
+ )
def sequential_latest_insamples(self):
batch_size = len(self.timeseries)
insample = np.zeros((batch_size, self.insample_size))
insample_mask = np.zeros((batch_size, self.insample_size))
insample_time_stamps = np.zeros(
- (batch_size, self.insample_size, self.time_embedding_dim), dtype=object)
+ (batch_size, self.insample_size, self.time_embedding_dim), dtype=object
+ )
for i, (ts, time_stamp) in enumerate(zip(self.timeseries, self.time_stamps)):
- ts_last_window = ts[-self.insample_size:]
- insample[i, -len(ts):] = ts_last_window
- insample_mask[i, -len(ts):] = 1.0
+ ts_last_window = ts[-self.insample_size :]
+ insample[i, -len(ts) :] = ts_last_window
+ insample_mask[i, -len(ts) :] = 1.0
sampled_timestamps = time_stamp
- insample_window_time_stamps = sampled_timestamps[-self.insample_size:]
- insample_time_stamps[i, -len(insample_window_time_stamps):] = self.time_features(insample_window_time_stamps)
+ insample_window_time_stamps = sampled_timestamps[-self.insample_size :]
+ insample_time_stamps[
+ i, -len(insample_window_time_stamps) :
+ ] = self.time_features(insample_window_time_stamps)
return insample, insample_mask, insample_time_stamps
diff --git a/benchmark/data_provider/data_factory.py b/benchmark/data_provider/data_factory.py
index 42f96ff..6ed4bc4 100644
--- a/benchmark/data_provider/data_factory.py
+++ b/benchmark/data_provider/data_factory.py
@@ -1,5 +1,7 @@
-from data_provider.data_loader import Dataset_Custom
from torch.utils.data import DataLoader
+
+from data_provider.data_loader import Dataset_Custom
+
# from metalearned.resources.electricity.dataset import ElectricityDataset, ElectricityMeta
# from metalearned.resources.m3.dataset import M3Dataset, M3Meta
# from metalearned.resources.m4.dataset import M4Dataset, M4Meta
@@ -74,5 +76,6 @@ def data_provider(args, flag):
batch_size=batch_size,
shuffle=shuffle_flag,
num_workers=args.num_workers,
- drop_last=drop_last)
+ drop_last=drop_last,
+ )
return data_set, data_loader
diff --git a/benchmark/data_provider/data_loader.py b/benchmark/data_provider/data_loader.py
index 616590c..566b74c 100644
--- a/benchmark/data_provider/data_loader.py
+++ b/benchmark/data_provider/data_loader.py
@@ -1,23 +1,33 @@
import os
-import numpy as np
+import warnings
+
import pandas as pd
-import os
-import torch
-from torch.utils.data import Dataset, DataLoader
-from utils.timefeatures import time_features
from sklearn.preprocessing import StandardScaler
-import warnings
+from torch.utils.data import Dataset
+
+from utils.timefeatures import time_features
warnings.filterwarnings('ignore')
+
class Dataset_Custom(Dataset):
- def __init__(self, root_path, flag='train', size=None,
- features='S', data_path='ETTh1.csv',
- target='OT', scale=True, timeenc=0, freq='h',
- scaler=StandardScaler(), train_budget=None):
+ def __init__(
+ self,
+ root_path,
+ flag='train',
+ size=None,
+ features='S',
+ data_path='ETTh1.csv',
+ target='OT',
+ scale=True,
+ timeenc=0,
+ freq='h',
+ scaler=StandardScaler(),
+ train_budget=None,
+ ):
# size [seq_len, label_len, pred_len]
# info
- if size == None:
+ if size is None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
@@ -43,12 +53,11 @@ def __init__(self, root_path, flag='train', size=None,
self.__read_data__()
def __read_data__(self):
- df_raw = pd.read_csv(os.path.join(self.root_path,
- self.data_path))
+ df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path))
- '''
+ """
df_raw.columns: ['date', ...(other features), target feature]
- '''
+ """
cols = list(df_raw.columns)
cols.remove(self.target)
cols.remove('date')
@@ -60,10 +69,13 @@ def __read_data__(self):
train_start = 0
if self.train_budget:
- train_start = max(train_start, num_train -
- self.seq_len - self.train_budget)
+ train_start = max(train_start, num_train - self.seq_len - self.train_budget)
- border1s = [train_start, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
+ border1s = [
+ train_start,
+ num_train - self.seq_len,
+ len(df_raw) - num_test - self.seq_len,
+ ]
border2s = [num_train, num_train + num_vali, len(df_raw)]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
@@ -75,7 +87,7 @@ def __read_data__(self):
df_data = df_raw[[self.target]]
if self.scale:
- train_data = df_data[0:border2s[0]]
+ train_data = df_data[0 : border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
@@ -91,7 +103,9 @@ def __read_data__(self):
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
- data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+ data_stamp = time_features(
+ pd.to_datetime(df_stamp['date'].values), freq=self.freq
+ )
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
@@ -108,10 +122,10 @@ def __getitem__(self, index):
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
- seq_x_original = self.data_stamp_original['date'].values[s_begin:s_end]
- seq_y_original = self.data_stamp_original['date'].values[r_begin:r_end]
+ # seq_x_original = self.data_stamp_original["date"].values[s_begin:s_end]
+ # seq_y_original = self.data_stamp_original["date"].values[r_begin:r_end]
- return seq_x, seq_y, seq_x_mark, seq_y_mark#, seq_x_original, seq_y_original
+ return seq_x, seq_y, seq_x_mark, seq_y_mark # , seq_x_original, seq_y_original
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
@@ -121,13 +135,24 @@ def inverse_transform(self, data):
class Dataset_Pred(Dataset):
- def __init__(self, root_path, flag='pred', size=None,
- features='S', data_path='ETTh1.csv',
- target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None,
- scaler=StandardScaler()):
+ def __init__(
+ self,
+ root_path,
+ flag='pred',
+ size=None,
+ features='S',
+ data_path='ETTh1.csv',
+ target='OT',
+ scale=True,
+ inverse=False,
+ timeenc=0,
+ freq='15min',
+ cols=None,
+ scaler=StandardScaler(),
+ ):
# size [seq_len, label_len, pred_len]
# info
- if size == None:
+ if size is None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
@@ -151,11 +176,10 @@ def __init__(self, root_path, flag='pred', size=None,
self.__read_data__()
def __read_data__(self):
- df_raw = pd.read_csv(os.path.join(self.root_path,
- self.data_path))
- '''
+ df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path))
+ """
df_raw.columns: ['date', ...(other features), target feature]
- '''
+ """
if self.cols:
cols = self.cols.copy()
cols.remove(self.target)
@@ -181,7 +205,9 @@ def __read_data__(self):
tmp_stamp = df_raw[['date']][border1:border2]
tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date)
- pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq)
+ pred_dates = pd.date_range(
+ tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq
+ )
df_stamp = pd.DataFrame(columns=['date'])
df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:])
@@ -194,7 +220,9 @@ def __read_data__(self):
df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
- data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+ data_stamp = time_features(
+ pd.to_datetime(df_stamp['date'].values), freq=self.freq
+ )
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
@@ -212,9 +240,9 @@ def __getitem__(self, index):
seq_x = self.data_x[s_begin:s_end]
if self.inverse:
- seq_y = self.data_x[r_begin:r_begin + self.label_len]
+ seq_y = self.data_x[r_begin : r_begin + self.label_len]
else:
- seq_y = self.data_y[r_begin:r_begin + self.label_len]
+ seq_y = self.data_y[r_begin : r_begin + self.label_len]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
diff --git a/benchmark/exp/exp_ForecastPFN.py b/benchmark/exp/exp_ForecastPFN.py
index 2e17d7a..b880da7 100644
--- a/benchmark/exp/exp_ForecastPFN.py
+++ b/benchmark/exp/exp_ForecastPFN.py
@@ -1,25 +1,26 @@
+import datetime
import os
+import time
import warnings
+
import numpy as np
-import torch
-import torch.nn as nn
import pandas as pd
-import datetime
-import time
-from data_provider.data_factory import data_provider
+import tensorflow as tf
+import torch
+from sklearn.preprocessing import StandardScaler
+
from exp.exp_basic import Exp_Basic
-from utils.metrics import metric
from utils.metrics import smape
-import tensorflow as tf
-import tensorflow_io
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
+
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
- try:
- tf.config.experimental.set_virtual_device_configuration(
- gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
- except RuntimeError as e:
- print(e)
+ try:
+ tf.config.experimental.set_virtual_device_configuration(
+ gpus[0],
+ [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)],
+ )
+ except RuntimeError as e:
+ print(e)
warnings.filterwarnings('ignore')
@@ -30,24 +31,25 @@ def __init__(self, args):
super(Exp_ForecastPFN, self).__init__(args)
def _build_model(self):
- return
+ pass
def train(self, setting):
- return
-
+ pass
+
def _ForecastPFN_time_features(self, ts: np.ndarray):
if type(ts[0]) == datetime.datetime:
year = [x.year for x in ts]
month = [x.month for x in ts]
day = [x.day for x in ts]
- day_of_week = [x.weekday()+1 for x in ts]
+ day_of_week = [x.weekday() + 1 for x in ts]
day_of_year = [x.timetuple().tm_yday for x in ts]
return np.stack([year, month, day, day_of_week, day_of_year], axis=-1)
ts = pd.to_datetime(ts)
- return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
+ return np.stack(
+ [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1
+ )
- def _process_tuple(self,x,x_mark,y_mark,
- model, horizon):
+ def _process_tuple(self, x, x_mark, y_mark, model, horizon):
"""
x: tensor of shape (n, 1)
x_mark: tensor of shape (n, d)
@@ -74,7 +76,7 @@ def _process_tuple(self,x,x_mark,y_mark,
history_std = np.nanstd(history[-6:])
# local scale, don't know why defined so
- local_scale = (history_mean + history_std + 1e-4)
+ local_scale = history_mean + history_std + 1e-4
# change history based on local scale, to normalize it between 0 and 1
history = np.clip(history / local_scale, a_min=0, a_max=1)
@@ -85,44 +87,60 @@ def _process_tuple(self,x,x_mark,y_mark,
target = tf.convert_to_tensor(x_mark)[-100:, :]
history = tf.convert_to_tensor(history)[-100:, :]
else:
- target = tf.pad(x_mark.cpu(), [[100-x.shape[0], 0], [0, 0]])
- history = tf.pad(history, [[100-x.shape[0], 0], [0, 0]])
+ target = tf.pad(x_mark.cpu(), [[100 - x.shape[0], 0], [0, 0]])
+ history = tf.pad(history, [[100 - x.shape[0], 0], [0, 0]])
- history = tf.repeat(tf.expand_dims(history, axis=0), [
- horizon], axis=0)[:, :, 0]
- ts = tf.repeat(tf.expand_dims(
- target, axis=0), [horizon], axis=0)
+ history = tf.repeat(tf.expand_dims(history, axis=0), [horizon], axis=0)[
+ :, :, 0
+ ]
+ ts = tf.repeat(tf.expand_dims(target, axis=0), [horizon], axis=0)
else:
- ts = tf.convert_to_tensor(x_mark.unsqueeze(0).repeat(
- horizon, 1, 1), dtype=tf.int64)
+ ts = tf.convert_to_tensor(
+ x_mark.unsqueeze(0).repeat(horizon, 1, 1), dtype=tf.int64
+ )
history = tf.convert_to_tensor(history, dtype=tf.float32)
- task = tf.fill([horizon, ], 1)
+ task = tf.fill(
+ [
+ horizon,
+ ],
+ 1,
+ )
target_ts = tf.convert_to_tensor(
- y_mark.cpu()[-horizon:, :].unsqueeze(1), dtype=tf.int64)
-
- model_input = {'ts': ts, 'history': history,
- 'target_ts': target_ts, 'task': task}
+ y_mark.cpu()[-horizon:, :].unsqueeze(1), dtype=tf.int64
+ )
+
+ model_input = {
+ 'ts': ts,
+ 'history': history,
+ 'target_ts': target_ts,
+ 'task': task,
+ }
t1 = time.time()
pred_vals = model(model_input)
time_diff = time.time() - t1
- scaled_vals = pred_vals['result'].numpy(
- ).T.reshape(-1) * pred_vals['scale'].numpy().reshape(-1)
+ scaled_vals = pred_vals['result'].numpy().T.reshape(-1) * pred_vals[
+ 'scale'
+ ].numpy().reshape(-1)
scaled_vals = scaler.inverse_transform([scaled_vals])
return scaled_vals, time_diff
-
- def _ForecastPFN_process_batch(self, model, batch_x, batch_y, batch_x_mark, batch_y_mark):
+
+ def _ForecastPFN_process_batch(
+ self, model, batch_x, batch_y, batch_x_mark, batch_y_mark
+ ):
preds = []
trues = []
- for idx, (x, y, x_mark, y_mark) in enumerate(zip(batch_x, batch_y, batch_x_mark, batch_y_mark)):
-
+ for idx, (x, y, x_mark, y_mark) in enumerate(
+ zip(batch_x, batch_y, batch_x_mark, batch_y_mark)
+ ):
pred, time_diff = self._process_tuple(
- x, x_mark, y_mark, model, self.args.pred_len)
+ x, x_mark, y_mark, model, self.args.pred_len
+ )
- y = y[-self.args.pred_len:, :].to(self.device)
+ y = y[-self.args.pred_len :, :].to(self.device)
true = y.detach().cpu().numpy()
-
+
preds += [pred]
trues += [true]
return preds, trues, time_diff
@@ -130,11 +148,13 @@ def _ForecastPFN_process_batch(self, model, batch_x, batch_y, batch_x_mark, batc
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
test_data.data_stamp = self._ForecastPFN_time_features(
- list(test_data.data_stamp_original['date']))
+ list(test_data.data_stamp_original['date'])
+ )
if test:
print('loading model')
pretrained = tf.keras.models.load_model(
- self.args.model_path, custom_objects={'smape': smape})
+ self.args.model_path, custom_objects={'smape': smape}
+ )
preds = []
trues = []
@@ -145,7 +165,9 @@ def test(self, setting, test=0):
self.test_timer.start_timer()
timer = 0
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ test_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
@@ -153,9 +175,10 @@ def test(self, setting, test=0):
batch_y_mark = batch_y_mark.float().to(self.device)
pred, true, time = self._ForecastPFN_process_batch(
- pretrained, batch_x, batch_y, batch_x_mark, batch_y_mark)
+ pretrained, batch_x, batch_y, batch_x_mark, batch_y_mark
+ )
timer += time
-
+
preds.append(pred)
trues.append(true)
diff --git a/benchmark/exp/exp_arima.py b/benchmark/exp/exp_arima.py
index d4e007b..03fdd97 100644
--- a/benchmark/exp/exp_arima.py
+++ b/benchmark/exp/exp_arima.py
@@ -1,15 +1,9 @@
-import os
-import time
import warnings
-import numpy as np
-import torch
-import torch.nn as nn
-from torch import optim
-from data_provider.data_factory import data_provider
-from exp.exp_basic import Exp_Basic
-from utils.metrics import metric
-import pmdarima
+
import pandas as pd
+import pmdarima
+
+from exp.exp_basic import Exp_Basic
warnings.filterwarnings('ignore')
@@ -21,11 +15,14 @@ def __init__(self, args):
def _build_model(self):
return pmdarima.auto_arima
-
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
- train_df = pd.DataFrame({'y': train_data.data_y.T[0], 'ds': list(
- pd.to_datetime(train_data.data_stamp_original['date']))})
+ train_df = pd.DataFrame(
+ {
+ 'y': train_data.data_y.T[0],
+ 'ds': list(pd.to_datetime(train_data.data_stamp_original['date'])),
+ }
+ )
self.train_timer.start_timer()
self.model = pmdarima.auto_arima(train_df.y.values)
self.train_timer.end_timer()
@@ -35,23 +32,29 @@ def test(self, setting, test=0):
horizon = self.args.pred_len
test_data, test_loader = self._get_data(flag='test')
- test_df = pd.DataFrame({'y': test_data.data_y.T[0], 'ds': list(
- pd.to_datetime(test_data.data_stamp_original['date']))})
-
- cmp = pd.DataFrame({
- 'date': test_df['ds'].values,
- 'y': test_df['y'].values,
- 'yhat': self.model.predict(test_df.shape[0])
- })
+ test_df = pd.DataFrame(
+ {
+ 'y': test_data.data_y.T[0],
+ 'ds': list(pd.to_datetime(test_data.data_stamp_original['date'])),
+ }
+ )
+
+ cmp = pd.DataFrame(
+ {
+ 'date': test_df['ds'].values,
+ 'y': test_df['y'].values,
+ 'yhat': self.model.predict(test_df.shape[0]),
+ }
+ )
preds, trues = [], []
self.test_timer.start_timer()
- for i in range(self.args.seq_len, cmp.shape[0]-horizon+1):
- pred = cmp[i:i+horizon]['yhat'].values
- true = cmp[i:i+horizon]['y'].values
+ for i in range(self.args.seq_len, cmp.shape[0] - horizon + 1):
+ pred = cmp[i : i + horizon]['yhat'].values
+ true = cmp[i : i + horizon]['y'].values
preds += [pred]
trues += [true]
self.test_timer.end_timer()
-
+
return self._save_test_data(setting, preds, trues)
diff --git a/benchmark/exp/exp_basic.py b/benchmark/exp/exp_basic.py
index 81e526e..e9b2b09 100644
--- a/benchmark/exp/exp_basic.py
+++ b/benchmark/exp/exp_basic.py
@@ -1,9 +1,12 @@
import os
-import torch
+
import numpy as np
+import torch
+
from data_provider.data_factory import data_provider
-from utils.tools import TimeBudget
from utils.metrics import metric
+from utils.tools import TimeBudget
+
class Exp_Basic(object):
def __init__(self, args):
@@ -20,8 +23,9 @@ def _build_model(self):
def _acquire_device(self):
if self.args.use_gpu:
- os.environ["CUDA_VISIBLE_DEVICES"] = str(
- self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
+ os.environ['CUDA_VISIBLE_DEVICES'] = (
+ str(self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
+ )
device = torch.device('cuda:{}'.format(self.args.gpu))
print('Use GPU: cuda:{}'.format(self.args.gpu))
else:
@@ -48,8 +52,8 @@ def _save_test_data(self, setting, preds, trues):
mae, mse, rmse, mape, mspe = metric(preds, trues)
print('mse:{}, mae:{}'.format(mse, mae))
- f = open("result.txt", 'a')
- f.write(setting + " \n")
+ f = open('result.txt', 'a')
+ f.write(setting + ' \n')
f.write('mse:{}, mae:{}'.format(mse, mae))
f.write('\n')
f.write('\n')
@@ -58,15 +62,15 @@ def _save_test_data(self, setting, preds, trues):
output = {
'metrics': {
'mae': mae,
- 'mse': mse,
- 'rmse': rmse,
- 'mape': mape,
+ 'mse': mse,
+ 'rmse': rmse,
+ 'mape': mape,
'mspe': mspe,
},
'train_timer': self.train_timer.total_time,
'vali_timer': self.vali_timer.total_time,
'test_timer': self.test_timer.total_time,
- 'args': self.args
+ 'args': self.args,
}
print(output)
diff --git a/benchmark/exp/exp_last.py b/benchmark/exp/exp_last.py
index 8be4ec4..7d8311d 100644
--- a/benchmark/exp/exp_last.py
+++ b/benchmark/exp/exp_last.py
@@ -1,15 +1,9 @@
-import os
-import time
import warnings
-import numpy as np
+
+import pmdarima
import torch
-import torch.nn as nn
-from torch import optim
-from data_provider.data_factory import data_provider
+
from exp.exp_basic import Exp_Basic
-from utils.metrics import metric
-import pmdarima
-import pandas as pd
warnings.filterwarnings('ignore')
@@ -21,25 +15,30 @@ def __init__(self, args):
def _build_model(self):
return pmdarima.auto_arima
-
def train(self, setting):
- return
+ pass
def test(self, setting, test=0):
- horizon = self.args.pred_len
-
test_data, test_loader = self._get_data(flag='test')
preds, trues = [], []
self.test_timer.start_timer()
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ test_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
- true = batch_y[:, -self.args.pred_len:].detach().cpu().numpy()
- pred = batch_x[:,-1,:].unsqueeze(1).repeat(
- 1, true.shape[1], 1).detach().cpu().numpy()
+ true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy()
+ pred = (
+ batch_x[:, -1, :]
+ .unsqueeze(1)
+ .repeat(1, true.shape[1], 1)
+ .detach()
+ .cpu()
+ .numpy()
+ )
preds.append(pred)
trues.append(true)
diff --git a/benchmark/exp/exp_mean.py b/benchmark/exp/exp_mean.py
index 326acd7..8546e13 100644
--- a/benchmark/exp/exp_mean.py
+++ b/benchmark/exp/exp_mean.py
@@ -1,15 +1,9 @@
-import os
-import time
import warnings
-import numpy as np
+
+import pmdarima
import torch
-import torch.nn as nn
-from torch import optim
-from data_provider.data_factory import data_provider
+
from exp.exp_basic import Exp_Basic
-from utils.metrics import metric
-import pmdarima
-import pandas as pd
warnings.filterwarnings('ignore')
@@ -21,25 +15,30 @@ def __init__(self, args):
def _build_model(self):
return pmdarima.auto_arima
-
def train(self, setting):
- return
+ pass
def test(self, setting, test=0):
- horizon = self.args.pred_len
-
test_data, test_loader = self._get_data(flag='test')
preds, trues = [], []
self.test_timer.start_timer()
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ test_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
- true = batch_y[:, -self.args.pred_len:].detach().cpu().numpy()
- pred = batch_x.mean(1).unsqueeze(1).repeat(
- 1, true.shape[1], 1).detach().cpu().numpy()
+ true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy()
+ pred = (
+ batch_x.mean(1)
+ .unsqueeze(1)
+ .repeat(1, true.shape[1], 1)
+ .detach()
+ .cpu()
+ .numpy()
+ )
preds.append(pred)
trues.append(true)
diff --git a/benchmark/exp/exp_metalearn.py b/benchmark/exp/exp_metalearn.py
index 075056e..bb317e1 100644
--- a/benchmark/exp/exp_metalearn.py
+++ b/benchmark/exp/exp_metalearn.py
@@ -1,36 +1,30 @@
import os
+import time
import warnings
-import numpy as np
+
+import tensorflow as tf
import torch
-import torch.nn as nn
-import pandas as pd
-import datetime
-from data_provider.data_factory import data_provider
+
from exp.exp_basic import Exp_Basic
-from utils.metrics import metric
-from utils.metrics import smape
-import tensorflow as tf
-import tensorflow_io
-import time
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
+
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
- try:
- tf.config.experimental.set_virtual_device_configuration(
- gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
- except RuntimeError as e:
- print(e)
+ try:
+ tf.config.experimental.set_virtual_device_configuration(
+ gpus[0],
+ [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)],
+ )
+ except RuntimeError as e:
+ print(e)
import sys
+
sys.path.append('metalearned')
from metalearned.common.experiment import load_experiment_parameters
-from metalearned.common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss
+from metalearned.common.torch_utils import SnapshotManager
from metalearned.models.nbeats_torch import nbeats_generic, nbeats_interpretable
-
-
-
warnings.filterwarnings('ignore')
@@ -39,7 +33,6 @@ def __init__(self, args):
super(Exp_Metalearn, self).__init__(args)
def _build_model(self):
-
self.args.path = f'metalearned/experiments/tl/ForecastPFN/loss_name=MAPE,input_size={self.args.seq_len},horizon={self.args.pred_len}/'
experiment_parameters = load_experiment_parameters(self.args.path)
@@ -49,35 +42,39 @@ def _build_model(self):
horizon = experiment_parameters['horizon']
if experiment_parameters['model_type'] == 'generic':
- model = nbeats_generic(input_size=input_size,
- output_size=horizon,
- blocks=experiment_parameters['blocks'],
- stacks=experiment_parameters['stacks'],
- fc_layers=experiment_parameters['layers'],
- fc_layers_size=experiment_parameters['width'],
- scaling=experiment_parameters['scaling'],
- mode=experiment_parameters['mode'])
+ model = nbeats_generic(
+ input_size=input_size,
+ output_size=horizon,
+ blocks=experiment_parameters['blocks'],
+ stacks=experiment_parameters['stacks'],
+ fc_layers=experiment_parameters['layers'],
+ fc_layers_size=experiment_parameters['width'],
+ scaling=experiment_parameters['scaling'],
+ mode=experiment_parameters['mode'],
+ )
else:
- model = nbeats_interpretable(input_size=input_size,
- output_size=horizon,
- trend_blocks=experiment_parameters['trend_blocks'],
- trend_fc_layers=experiment_parameters['layers'],
- trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'],
- degree_of_polynomial=experiment_parameters['degree_of_polynomial'],
- seasonality_blocks=experiment_parameters['seasonality_blocks'],
- seasonality_fc_layers=experiment_parameters['layers'],
- seasonality_fc_layers_size=experiment_parameters[
- 'seasonality_fc_layers_size'],
- num_of_harmonics=experiment_parameters['num_of_harmonics'],
- scaling=experiment_parameters['scaling'],
- mode=experiment_parameters['mode'])
+ model = nbeats_interpretable(
+ input_size=input_size,
+ output_size=horizon,
+ trend_blocks=experiment_parameters['trend_blocks'],
+ trend_fc_layers=experiment_parameters['layers'],
+ trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'],
+ degree_of_polynomial=experiment_parameters['degree_of_polynomial'],
+ seasonality_blocks=experiment_parameters['seasonality_blocks'],
+ seasonality_fc_layers=experiment_parameters['layers'],
+ seasonality_fc_layers_size=experiment_parameters[
+ 'seasonality_fc_layers_size'
+ ],
+ num_of_harmonics=experiment_parameters['num_of_harmonics'],
+ scaling=experiment_parameters['scaling'],
+ mode=experiment_parameters['mode'],
+ )
return model.to(self.device)
def train(self, setting):
- return
-
-
+ pass
+
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
@@ -90,14 +87,15 @@ def test(self, setting, test=0):
experiment_parameters = self.args.experiment_parameters
snapshot_dir = os.path.join(path, 'snapshots', time_freq)
- snapshot_manager = SnapshotManager(snapshot_dir=snapshot_dir,
- logging_frequency=experiment_parameters['logging_frequency'],
- snapshot_frequency=experiment_parameters['snapshot_frequency'])
+ snapshot_manager = SnapshotManager(
+ snapshot_dir=snapshot_dir,
+ logging_frequency=experiment_parameters['logging_frequency'],
+ snapshot_frequency=experiment_parameters['snapshot_frequency'],
+ )
self.model.load_state_dict(torch.load(snapshot_manager.model_snapshot_file))
self.model.to(self.device)
-
preds = []
trues = []
folder_path = './test_results/' + setting + '/'
@@ -107,9 +105,11 @@ def test(self, setting, test=0):
self.test_timer.start_timer()
timer = 0
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
- batch_x = batch_x.float().to(self.device)[:,:,0]
- batch_y = batch_y.float().to(self.device)[:,:,0]
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ test_loader
+ ):
+ batch_x = batch_x.float().to(self.device)[:, :, 0]
+ batch_y = batch_y.float().to(self.device)[:, :, 0]
print(batch_x.shape, batch_y.shape)
@@ -118,10 +118,10 @@ def test(self, setting, test=0):
t1 = time.time()
pred = self.model(batch_x, torch.ones(batch_x.shape).to(self.device))
- timer += time.time()-t1
+ timer += time.time() - t1
pred = pred.detach().cpu().numpy()
- true = batch_y[:,-self.args.pred_len:].detach().cpu().numpy()
+ true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy()
preds.append(pred)
trues.append(true)
diff --git a/benchmark/exp/exp_prophet.py b/benchmark/exp/exp_prophet.py
index a7e30fb..b0710b6 100644
--- a/benchmark/exp/exp_prophet.py
+++ b/benchmark/exp/exp_prophet.py
@@ -1,13 +1,9 @@
-import os
-import time
import warnings
-import numpy as np
-import torch
-import torch.nn as nn
-from torch import optim
-from exp.exp_basic import Exp_Basic
-import prophet
+
import pandas as pd
+import prophet
+
+from exp.exp_basic import Exp_Basic
warnings.filterwarnings('ignore')
@@ -21,8 +17,12 @@ def _build_model(self):
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
- train_df = pd.DataFrame({'y': train_data.data_y.T[0], 'ds': list(
- pd.to_datetime(train_data.data_stamp_original['date']))})
+ train_df = pd.DataFrame(
+ {
+ 'y': train_data.data_y.T[0],
+ 'ds': list(pd.to_datetime(train_data.data_stamp_original['date'])),
+ }
+ )
self.train_timer.start_timer()
self.model.fit(train_df)
self.train_timer.end_timer()
@@ -32,24 +32,29 @@ def test(self, setting, test=0):
horizon = self.args.pred_len
test_data, test_loader = self._get_data(flag='test')
- test_df = pd.DataFrame({'y': test_data.data_y.T[0], 'ds': list(
- pd.to_datetime(test_data.data_stamp_original['date']))})
- predict_frame = self.model.make_future_dataframe(
- test_data.data_x.shape[0])
+ test_df = pd.DataFrame(
+ {
+ 'y': test_data.data_y.T[0],
+ 'ds': list(pd.to_datetime(test_data.data_stamp_original['date'])),
+ }
+ )
+ self.model.make_future_dataframe(test_data.data_x.shape[0])
forecast = self.model.predict(test_df)
- cmp = pd.DataFrame({
- 'date': test_df['ds'].values,
- 'ds': forecast.ds.values,
- 'y': test_df['y'].values,
- 'yhat': forecast.yhat.values
- })
+ cmp = pd.DataFrame(
+ {
+ 'date': test_df['ds'].values,
+ 'ds': forecast.ds.values,
+ 'y': test_df['y'].values,
+ 'yhat': forecast.yhat.values,
+ }
+ )
preds, trues = [], []
self.test_timer.start_timer()
- for i in range(self.args.seq_len, cmp.shape[0]-horizon+1):
- pred = cmp[i:i+horizon]['yhat'].values
- true = cmp[i:i+horizon]['y'].values
+ for i in range(self.args.seq_len, cmp.shape[0] - horizon + 1):
+ pred = cmp[i : i + horizon]['yhat'].values
+ true = cmp[i : i + horizon]['y'].values
preds += [pred]
trues += [true]
diff --git a/benchmark/exp/exp_resolver.py b/benchmark/exp/exp_resolver.py
index 6af7dba..e761189 100644
--- a/benchmark/exp/exp_resolver.py
+++ b/benchmark/exp/exp_resolver.py
@@ -1,14 +1,14 @@
import warnings
-from exp.exp_transformer import Exp_Transformer
-from exp.exp_transformer_metalearn import Exp_Transformer_Meta
-from exp.exp_ForecastPFN import Exp_ForecastPFN
+
from exp.exp_arima import Exp_Arima
-from exp.exp_prophet import Exp_Prophet
-from exp.exp_metalearn import Exp_Metalearn
-from exp.exp_mean import Exp_Mean
+from exp.exp_ForecastPFN import Exp_ForecastPFN
from exp.exp_last import Exp_Last
+from exp.exp_mean import Exp_Mean
+from exp.exp_metalearn import Exp_Metalearn
+from exp.exp_prophet import Exp_Prophet
from exp.exp_seasonalNaive import Exp_SeasonalNaive
-
+from exp.exp_transformer import Exp_Transformer
+from exp.exp_transformer_metalearn import Exp_Transformer_Meta
warnings.filterwarnings('ignore')
diff --git a/benchmark/exp/exp_seasonalNaive.py b/benchmark/exp/exp_seasonalNaive.py
index 8a06136..4668e9f 100644
--- a/benchmark/exp/exp_seasonalNaive.py
+++ b/benchmark/exp/exp_seasonalNaive.py
@@ -1,15 +1,9 @@
-import os
-import time
import warnings
-import numpy as np
+
+import pmdarima
import torch
-import torch.nn as nn
-from torch import optim
-from data_provider.data_factory import data_provider
+
from exp.exp_basic import Exp_Basic
-from utils.metrics import metric
-import pmdarima
-import pandas as pd
warnings.filterwarnings('ignore')
@@ -21,25 +15,29 @@ def __init__(self, args):
def _build_model(self):
return pmdarima.auto_arima
-
def train(self, setting):
- return
+ pass
def test(self, setting, test=0):
- horizon = self.args.pred_len
-
test_data, test_loader = self._get_data(flag='test')
preds, trues = [], []
self.test_timer.start_timer()
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ test_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
- true = batch_y[:, -self.args.pred_len:].detach().cpu().numpy()
- pred = batch_x[:,-7:,:].repeat(
- 1,int(true.shape[1]/7)+1,1)[:,:true.shape[1],:].detach().cpu().numpy()
+ true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy()
+ pred = (
+ batch_x[:, -7:, :]
+ .repeat(1, int(true.shape[1] / 7) + 1, 1)[:, : true.shape[1], :]
+ .detach()
+ .cpu()
+ .numpy()
+ )
preds.append(pred)
trues.append(true)
diff --git a/benchmark/exp/exp_transformer.py b/benchmark/exp/exp_transformer.py
index 8281fb1..f53c411 100644
--- a/benchmark/exp/exp_transformer.py
+++ b/benchmark/exp/exp_transformer.py
@@ -1,16 +1,15 @@
import os
import time
import warnings
+
import numpy as np
import torch
import torch.nn as nn
from torch import optim
-from data_provider.data_factory import data_provider
-from exp.exp_basic import Exp_Basic
-from transformer_models.models import FEDformer, Autoformer, Informer, Transformer
-from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate, visual
-from utils.metrics import metric
+from exp.exp_basic import Exp_Basic
+from transformer_models.models import Autoformer, FEDformer, Informer, Transformer
+from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate
warnings.filterwarnings('ignore')
@@ -38,8 +37,7 @@ def _build_model(self):
return model.to(self.device)
def _select_optimizer(self):
- model_optim = optim.Adam(
- self.model.parameters(), lr=self.args.learning_rate)
+ model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
@@ -51,7 +49,9 @@ def vali(self, vali_data, vali_loader, criterion):
self.model.eval()
self.vali_timer.start_timer()
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ vali_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
@@ -59,29 +59,34 @@ def vali(self, vali_data, vali_loader, criterion):
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
- dec_inp = torch.zeros_like(
- batch_y[:, -self.args.pred_len:, :]).float()
- dec_inp = torch.cat(
- [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+ dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float()
+ dec_inp = (
+ torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1)
+ .float()
+ .to(self.device)
+ )
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
else:
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device)
pred = outputs.detach().cpu()
true = batch_y.detach().cpu()
@@ -106,11 +111,10 @@ def train(self, setting):
else:
raise NotImplementedError
- time_now = time.time()
+ time.time()
train_steps = len(train_loader)
- early_stopping = EarlyStopping(
- patience=self.args.patience, verbose=False)
+ early_stopping = EarlyStopping(patience=self.args.patience, verbose=False)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
@@ -126,7 +130,9 @@ def train(self, setting):
self.model.train()
epoch_time = time.time()
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ train_loader
+ ):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
@@ -136,37 +142,43 @@ def train(self, setting):
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
- dec_inp = torch.zeros_like(
- batch_y[:, -self.args.pred_len:, :]).float()
- dec_inp = torch.cat(
- [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+ dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float()
+ dec_inp = (
+ torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1)
+ .float()
+ .to(self.device)
+ )
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(
+ self.device
+ )
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
else:
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
@@ -190,17 +202,19 @@ def train(self, setting):
return self.model
- print("Epoch: {} cost time: {}".format(
- epoch + 1, time.time() - epoch_time))
+ print('Epoch: {} cost time: {}'.format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.vali(test_data, test_loader, criterion)
- print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
- epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+ print(
+ 'Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}'.format(
+ epoch + 1, train_steps, train_loss, vali_loss, test_loss
+ )
+ )
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
- print("Early stopping")
+ print('Early stopping')
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
@@ -217,11 +231,18 @@ def test(self, setting, test=0):
if test:
print('loading model')
if self.args.use_gpu:
- self.model.load_state_dict(torch.load(os.path.join(
- './checkpoints/' + setting, 'checkpoint.pth')))
+ self.model.load_state_dict(
+ torch.load(
+ os.path.join('./checkpoints/' + setting, 'checkpoint.pth')
+ )
+ )
else:
- self.model.load_state_dict(torch.load(os.path.join(
- './checkpoints/' + setting, 'checkpoint.pth'), map_location=torch.device('cpu')))
+ self.model.load_state_dict(
+ torch.load(
+ os.path.join('./checkpoints/' + setting, 'checkpoint.pth'),
+ map_location=torch.device('cpu'),
+ )
+ )
preds = []
trues = []
@@ -230,10 +251,11 @@ def test(self, setting, test=0):
os.makedirs(folder_path)
self.model.eval()
- j = 0
self.test_timer.start_timer()
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ test_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
@@ -241,32 +263,37 @@ def test(self, setting, test=0):
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
- dec_inp = torch.zeros_like(
- batch_y[:, -self.args.pred_len:, :]).float()
- dec_inp = torch.cat(
- [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+ dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float()
+ dec_inp = (
+ torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1)
+ .float()
+ .to(self.device)
+ )
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
else:
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device)
outputs = outputs.detach().cpu().numpy()
batch_y = batch_y.detach().cpu().numpy()
@@ -300,33 +327,41 @@ def predict(self, setting, load=False):
self.model.eval()
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ pred_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
- dec_inp = torch.zeros_like(
- batch_y[:, -self.args.pred_len:, :]).float()
- dec_inp = torch.cat(
- [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+ dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float()
+ dec_inp = (
+ torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1)
+ .float()
+ .to(self.device)
+ )
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
else:
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
pred = outputs.detach().cpu().numpy() # .squeeze()
preds.append(pred)
diff --git a/benchmark/exp/exp_transformer_metalearn.py b/benchmark/exp/exp_transformer_metalearn.py
index 6081d12..1a62e89 100644
--- a/benchmark/exp/exp_transformer_metalearn.py
+++ b/benchmark/exp/exp_transformer_metalearn.py
@@ -1,33 +1,32 @@
+import sys
import time
+
import yaml
-import sys
+
sys.path.append('/home/ubuntu/ForecastPFN/academic_comparison/')
import os
-import time
import warnings
+from typing import Dict
+
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
-from tqdm import tqdm
from torch import optim
-from typing import Dict
+from tqdm import tqdm
+
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from exp.torch_utils import *
-from transformer_models.models import FEDformer, Autoformer, Informer, Transformer
-from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate, visual
-from utils.metrics import metric
+from transformer_models.models import Autoformer, FEDformer, Informer, Transformer
+from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate
sys.path.append('/home/ubuntu/ForecastPFN/src/')
sys.path.append('/home/ubuntu/ForecastPFN/src/training/')
-from training.create_train_test_df import create_train_test_df
import tensorflow as tf
-
-
from training.config_variables import Config
-from training.constants import PADDING, HISTORY_LEN, TARGET_LEN, TRIM_LEN, TARGET_INDEX
+from training.constants import HISTORY_LEN, PADDING
from training.prepare_dataset import filter_unusable_points
from training.utils import load_tf_dataset
@@ -61,12 +60,9 @@ def _build_model(self):
return model.to(self.device)
def _get_data(self, flag):
-
-
TARGET_LEN = self.args.label_len + self.args.pred_len
TRIM_LEN = self.args.label_len + self.args.pred_len
- TARGET_INDEX = 2*TRIM_LEN
-
+ TARGET_INDEX = 2 * TRIM_LEN
def compute_time_features(ts: np.ndarray):
"""
@@ -76,24 +72,26 @@ def compute_time_features(ts: np.ndarray):
"""
ts = pd.to_datetime(ts)
if Config.is_sub_day:
- return np.stack([ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
+ return np.stack(
+ [ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year],
+ axis=-1,
+ )
return np.stack([ts.month, ts.day, ts.day_of_week, ts.hour], axis=-1)
-
def build_frames(r: Dict[str, tf.Tensor]):
raw_date_info = tf.numpy_function(
- compute_time_features, inp=[r['ts']], Tout=tf.int64)
+ compute_time_features, inp=[r['ts']], Tout=tf.int64
+ )
date_info = tf.signal.frame(
- tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]),
- HISTORY_LEN,
- 1,
- axis=0
+ tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]), HISTORY_LEN, 1, axis=0
)
history = tf.signal.frame(
- tf.pad(r['y'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1)
+ tf.pad(r['y'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1
+ )
noise = tf.signal.frame(
- tf.pad(r['noise'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1)
+ tf.pad(r['noise'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1
+ )
target_dates = tf.signal.frame(raw_date_info, TARGET_LEN, 1, axis=0)
target_values = tf.signal.frame(r['y'], TARGET_LEN, 1, axis=-1)
@@ -107,7 +105,7 @@ def build_frames(r: Dict[str, tf.Tensor]):
noise[-start_index:-TARGET_LEN],
target_dates[TARGET_INDEX:],
target_values[TARGET_INDEX:],
- target_noise[TARGET_INDEX:]
+ target_noise[TARGET_INDEX:],
)
@tf.function
@@ -117,15 +115,14 @@ def gen_random_single_point(
noise: tf.Tensor,
target_dates: tf.Tensor,
target_values: tf.Tensor,
- target_noise: tf.Tensor
+ target_noise: tf.Tensor,
):
-
return dict(
ts=date_info,
- history=history*noise,
+ history=history * noise,
noise=noise,
target_ts=target_dates,
- target_noise=target_noise
+ target_noise=target_noise,
), target_values
@tf.function
@@ -135,25 +132,24 @@ def gen_random_single_point_no_noise(
noise: tf.Tensor,
target_dates: tf.Tensor,
target_values: tf.Tensor,
- target_noise: tf.Tensor
+ target_noise: tf.Tensor,
):
-
return dict(
ts=date_info,
history=history,
noise=noise,
target_ts=target_dates,
- target_noise=target_noise
+ target_noise=target_noise,
), target_values
-
def remove_noise(x, y):
return (
{
'ts': x['ts'],
'history': x['history'],
'target_ts': x['target_ts'],
- }, y
+ },
+ y,
)
def create_train_test_df(combined_ds, test_noise=False):
@@ -166,9 +162,14 @@ def create_train_test_df(combined_ds, test_noise=False):
base_train_df.map(func, num_parallel_calls=tf.data.AUTOTUNE)
for func in task_map.values()
]
- train_df = tf.data.Dataset.choose_from_datasets(
- train_tasks_dfs, tf.data.Dataset.range(len(train_tasks_dfs)).repeat()
- ).unbatch().filter(filter_unusable_points)
+ train_df = (
+ tf.data.Dataset.choose_from_datasets(
+ train_tasks_dfs,
+ tf.data.Dataset.range(len(train_tasks_dfs)).repeat(),
+ )
+ .unbatch()
+ .filter(filter_unusable_points)
+ )
task_map_test = {
'point': gen_random_single_point_no_noise,
@@ -185,21 +186,24 @@ def create_train_test_df(combined_ds, test_noise=False):
for func in task_map_test.values()
]
- test_df = tf.data.Dataset.choose_from_datasets(
- test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat()
- ).unbatch().filter(filter_unusable_points)
+ test_df = (
+ tf.data.Dataset.choose_from_datasets(
+ test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat()
+ )
+ .unbatch()
+ .filter(filter_unusable_points)
+ )
test_df = test_df.map(remove_noise)
return train_df, test_df
-
def get_combined_ds(config):
- version = config["version"]
+ version = config['version']
datasets = [
# load_tf_dataset(config["prefix"] + f"{version}/minute.tfrecords"),
# load_tf_dataset(config["prefix"] + f"{version}/hourly.tfrecords"),
- load_tf_dataset(config["prefix"] + f"{version}/daily.tfrecords"),
+ load_tf_dataset(config['prefix'] + f'{version}/daily.tfrecords'),
# load_tf_dataset(config["prefix"] + f"{version}/weekly.tfrecords"),
# load_tf_dataset(config["prefix"] + f"{version}/monthly.tfrecords"),
]
@@ -209,36 +213,36 @@ def get_combined_ds(config):
return combined_ds
-
-
if flag == 'test':
data_set, data_loader = data_provider(self.args, flag)
elif flag == 'train':
- with open('/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml') as config_file:
+ with open(
+ '/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml'
+ ) as config_file:
config = yaml.load(config_file, yaml.loader.SafeLoader)
combined_ds = get_combined_ds(config)
- train_df, vali_df = create_train_test_df(
- combined_ds, config["test_noise"])
+ train_df, vali_df = create_train_test_df(combined_ds, config['test_noise'])
data_loader = TFRecordDataLoader(
- train_df, self.args.batch_size, True, 10_000)
+ train_df, self.args.batch_size, True, 10_000
+ )
data_set = None
elif flag == 'val':
- with open('/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml') as config_file:
+ with open(
+ '/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml'
+ ) as config_file:
config = yaml.load(config_file, yaml.loader.SafeLoader)
combined_ds = get_combined_ds(config)
- train_df, vali_df = create_train_test_df(
- combined_ds, config["test_noise"])
+ train_df, vali_df = create_train_test_df(combined_ds, config['test_noise'])
data_set = None
data_loader = TFRecordDataLoader(
- vali_df, self.args.batch_size, True, 10_000)
+ vali_df, self.args.batch_size, True, 10_000
+ )
return data_set, data_loader
-
def _select_optimizer(self):
- model_optim = optim.Adam(
- self.model.parameters(), lr=self.args.learning_rate)
+ model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
@@ -255,37 +259,41 @@ def vali(self, vali_data, vali_loader, criterion):
X_batch = numpy_to_torch(batch_data[0], self.device)
y_batch = torch.from_numpy(batch_data[1]).to(self.device)
- batch_x = X_batch['history'].float().to(
- self.device).unsqueeze(2)
+ batch_x = X_batch['history'].float().to(self.device).unsqueeze(2)
batch_y = y_batch.float().to(self.device).unsqueeze(2)
batch_x_mark = X_batch['ts'].float().to(self.device)
batch_y_mark = X_batch['target_ts'].float().to(self.device)
# decoder input
- dec_inp = torch.zeros_like(
- batch_y[:, -self.args.pred_len:, :]).float()
- dec_inp = torch.cat(
- [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+ dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float()
+ dec_inp = (
+ torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1)
+ .float()
+ .to(self.device)
+ )
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
else:
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device)
pred = outputs.detach().cpu()
true = batch_y.detach().cpu()
@@ -304,7 +312,9 @@ def test(self, vali_data, vali_loader, criterion):
self.model.eval()
self.vali_timer.start_timer()
with torch.no_grad():
- for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
+ for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(
+ vali_loader
+ ):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
@@ -312,29 +322,34 @@ def test(self, vali_data, vali_loader, criterion):
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
- dec_inp = torch.zeros_like(
- batch_y[:, -self.args.pred_len:, :]).float()
- dec_inp = torch.cat(
- [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+ dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float()
+ dec_inp = (
+ torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1)
+ .float()
+ .to(self.device)
+ )
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
else:
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device)
pred = outputs.detach().cpu()
true = batch_y.detach().cpu()
@@ -346,7 +361,7 @@ def test(self, vali_data, vali_loader, criterion):
self.model.train()
self.vali_timer.end_timer()
return total_loss
-
+
def train(self, setting):
print(setting)
@@ -360,11 +375,10 @@ def train(self, setting):
else:
raise NotImplementedError
- time_now = time.time()
+ time.time()
train_steps = -1
- early_stopping = EarlyStopping(
- patience=self.args.patience, verbose=False)
+ early_stopping = EarlyStopping(patience=self.args.patience, verbose=False)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
@@ -387,7 +401,7 @@ def train(self, setting):
batch_x = X_batch['history'].float().to(self.device).unsqueeze(2)
batch_y = y_batch.float().to(self.device).unsqueeze(2)
-
+
batch_x_mark = X_batch['ts'].float().to(self.device)
batch_y_mark = X_batch['target_ts'].float().to(self.device)
@@ -395,37 +409,43 @@ def train(self, setting):
model_optim.zero_grad()
# decoder input
- dec_inp = torch.zeros_like(
- batch_y[:, -self.args.pred_len:, :]).float()
- dec_inp = torch.cat(
- [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+ dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float()
+ dec_inp = (
+ torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1)
+ .float()
+ .to(self.device)
+ )
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(
+ self.device
+ )
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
else:
if self.args.output_attention:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )[0]
else:
outputs = self.model(
- batch_x, batch_x_mark, dec_inp, batch_y_mark)
+ batch_x, batch_x_mark, dec_inp, batch_y_mark
+ )
f_dim = -1 if self.args.features == 'MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:,
- f_dim:].to(self.device)
+ batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
@@ -448,18 +468,20 @@ def train(self, setting):
self.model.load_state_dict(torch.load(best_model_path))
return self.model
-
+
if batch_i >= 1_000:
break
- print("Epoch: {} cost time: {}".format(
- epoch + 1, time.time() - epoch_time))
+ print('Epoch: {} cost time: {}'.format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.test(test_data, test_loader, criterion)
- print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
- epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+ print(
+ 'Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}'.format(
+ epoch + 1, train_steps, train_loss, vali_loss, test_loss
+ )
+ )
early_stopping(vali_loss, self.model, path)
# if early_stopping.early_stop:
# print("Early stopping")
@@ -473,4 +495,3 @@ def train(self, setting):
self.model.load_state_dict(torch.load(best_model_path))
return self.model
-
diff --git a/benchmark/exp/torch_utils.py b/benchmark/exp/torch_utils.py
index ca30a23..aae5e6a 100644
--- a/benchmark/exp/torch_utils.py
+++ b/benchmark/exp/torch_utils.py
@@ -1,18 +1,21 @@
-import torch
import os
from collections import OrderedDict
from functools import partial
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+import torch
from torch.nn import MSELoss
from torch.optim import Adam
-import tensorflow_datasets as tfds
-import tensorflow as tf
from tqdm import tqdm
DEFAULT_LOSS = MSELoss()
DEFAULT_OPTIMIZER = partial(Adam, lr=0.001)
-load_dir = 'tensorboard/mf_replicate_testnoiseT_shuffle5Millilon.20230714-133237/models/51'
+load_dir = (
+ 'tensorboard/mf_replicate_testnoiseT_shuffle5Millilon.20230714-133237/models/51'
+)
def numpy_to_torch(X, device):
@@ -63,12 +66,16 @@ def __next__(self):
class AdditionalValidationSets:
- def __init__(self, validation_sets, batch_size=1, metrics=[], loss=DEFAULT_LOSS, device=None):
+ def __init__(
+ self, validation_sets, batch_size=1, metrics=[], loss=DEFAULT_LOSS, device=None
+ ):
self.validation_sets = []
for validation_set in validation_sets:
if len(validation_set) not in [2]:
raise ValueError()
- self.validation_sets.append([tfds.as_numpy(validation_set[0]), validation_set[1]])
+ self.validation_sets.append(
+ [tfds.as_numpy(validation_set[0]), validation_set[1]]
+ )
self.epoch = []
self.metrics = metrics
self.loss = loss
@@ -88,7 +95,16 @@ def on_epoch_end(self, model, epoch, tbCallback=None):
validation_data, validation_set_name = validation_set
else:
raise ValueError()
- results = add_metrics_to_log(model, validation_data, self.loss, self.metrics, tbCallback, f'add_valid/{validation_set_name}/', epoch, self.device)
+ results = add_metrics_to_log(
+ model,
+ validation_data,
+ self.loss,
+ self.metrics,
+ tbCallback,
+ f'add_valid/{validation_set_name}/',
+ epoch,
+ self.device,
+ )
log.update(results)
self.logs[epoch] = log
return log
@@ -106,13 +122,17 @@ def predict(model, data, device, steps_per_epoch=None):
y_batch_pred = model(X_batch)
y_batch_pred, y_batch = model.transform_output(y_batch_pred, y_batch)
y_true = y_batch if y_true is None else torch.concat([y_true, y_batch])
- y_pred = y_batch_pred if y_pred is None else torch.concat([y_pred, y_batch_pred])
+ y_pred = (
+ y_batch_pred if y_pred is None else torch.concat([y_pred, y_batch_pred])
+ )
if steps_per_epoch is not None and batch_i >= steps_per_epoch:
break
return y_true, y_pred
-def add_metrics_to_log(model, data, loss, metrics, writer, prefix, epoch, device, steps_per_epoch=None):
+def add_metrics_to_log(
+ model, data, loss, metrics, writer, prefix, epoch, device, steps_per_epoch=None
+):
with torch.no_grad():
y_true, y_pred = predict(model, data, device, steps_per_epoch)
y_true = y_true.reshape(-1)
@@ -130,23 +150,25 @@ def add_metrics_to_log(model, data, loss, metrics, writer, prefix, epoch, device
return log
-def fit(model,
- train_df,
- batch_size=1024,
- epochs=1,
- verbose=1,
- valid_df=None,
- shuffle=0,
- initial_epoch=0,
- seed=None,
- loss=DEFAULT_LOSS,
- optimizer=DEFAULT_OPTIMIZER,
- metrics=None,
- writer=None,
- device='cpu',
- steps_per_epoch=None,
- logdir=None,
- additional_validation_sets=[]):
+def fit(
+ model,
+ train_df,
+ batch_size=1024,
+ epochs=1,
+ verbose=1,
+ valid_df=None,
+ shuffle=0,
+ initial_epoch=0,
+ seed=None,
+ loss=DEFAULT_LOSS,
+ optimizer=DEFAULT_OPTIMIZER,
+ metrics=None,
+ writer=None,
+ device='cpu',
+ steps_per_epoch=None,
+ logdir=None,
+ additional_validation_sets=[],
+):
"""Trains the model similar to Keras' .fit(...) method
# Arguments
@@ -183,7 +205,9 @@ def fit(model,
# Build DataLoaders
valid_data = TFRecordDataLoader(valid_df, batch_size)
- additional_valid_data = AdditionalValidationSets(additional_validation_sets, metrics=metrics, loss=loss, device=device)
+ additional_valid_data = AdditionalValidationSets(
+ additional_validation_sets, metrics=metrics, loss=loss, device=device
+ )
# Compile optimizer
opt = optimizer(model.parameters())
# load = torch.load(load_dir)
@@ -192,11 +216,11 @@ def fit(model,
# Run training loop
logs = []
for t in tqdm(range(initial_epoch, epochs)):
- logfile.write(f"Epoch: {t+1}\n")
+ logfile.write(f'Epoch: {t+1}\n')
train_data = TFRecordDataLoader(train_df, batch_size, True, shuffle)
model.train()
if verbose and t % 10 == 0:
- print("Epoch {0} / {1}".format(t + 1, epochs))
+ print('Epoch {0} / {1}'.format(t + 1, epochs))
log = OrderedDict()
epoch_loss = 0.0
# Run batches
@@ -222,18 +246,27 @@ def fit(model,
# train_metric_log = add_metrics_to_log(model, train_data, loss, metrics, writer, prefix='train/metrics/', epoch=t, device=device, steps_per_epoch=steps_per_epoch)
# log.update(train_metric_log)
if valid_data is not None:
- val_metric_log = add_metrics_to_log(model, valid_data, loss, metrics, writer, prefix='valid/metrics/', epoch=t, device=device)
+ val_metric_log = add_metrics_to_log(
+ model,
+ valid_data,
+ loss,
+ metrics,
+ writer,
+ prefix='valid/metrics/',
+ epoch=t,
+ device=device,
+ )
log.update(val_metric_log)
# Additional validation set
if t % 10 == 0:
add_log = additional_valid_data.on_epoch_end(model, t, writer)
- logfile.write(str(add_log)+'\n')
+ logfile.write(str(add_log) + '\n')
to_save = {
- "model": model.state_dict(),
- "optimizer": opt.state_dict(),
+ 'model': model.state_dict(),
+ 'optimizer': opt.state_dict(),
}
torch.save(to_save, logdir + f'/models/{t+1}')
- logfile.write(str(log)+'\n')
+ logfile.write(str(log) + '\n')
logfile.flush()
logs.append(log)
diff --git a/benchmark/layers/AutoCorrelation.py b/benchmark/layers/AutoCorrelation.py
index 2fda13a..6fb6ec6 100644
--- a/benchmark/layers/AutoCorrelation.py
+++ b/benchmark/layers/AutoCorrelation.py
@@ -1,8 +1,8 @@
+import math
import time
+
import torch
import torch.nn as nn
-import numpy as np
-import math
from torch.nn.functional import interpolate
@@ -13,6 +13,7 @@ def func2(*args, **kw):
t = time.time() - now
print('call <{}>, time={}'.format(func.__name__, t))
return y
+
return func2
@@ -23,7 +24,16 @@ class AutoCorrelation(nn.Module):
(2) time delay aggregation
This block can replace the self-attention family mechanism seamlessly.
"""
- def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False, configs=None):
+
+ def __init__(
+ self,
+ mask_flag=True,
+ factor=1,
+ scale=None,
+ attention_dropout=0.1,
+ output_attention=False,
+ configs=None,
+ ):
super(AutoCorrelation, self).__init__()
print('Autocorrelation used !')
self.factor = factor
@@ -55,8 +65,13 @@ def time_delay_agg_training(self, values, corr):
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
pattern = torch.roll(tmp_values, -int(index[i]), -1)
- delays_agg = delays_agg + pattern * \
- (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
+ delays_agg = delays_agg + pattern * (
+ tmp_corr[:, i]
+ .unsqueeze(1)
+ .unsqueeze(1)
+ .unsqueeze(1)
+ .repeat(1, head, channel, length)
+ )
return delays_agg # size=[B, H, d, S]
def time_delay_agg_inference(self, values, corr):
@@ -69,7 +84,14 @@ def time_delay_agg_inference(self, values, corr):
channel = values.shape[2]
length = values.shape[3]
# index init
- init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
+ init_index = (
+ torch.arange(length)
+ .unsqueeze(0)
+ .unsqueeze(0)
+ .unsqueeze(0)
+ .repeat(batch, head, channel, 1)
+ .cuda()
+ )
# find top k
top_k = int(self.factor * math.log(length))
mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
@@ -81,10 +103,17 @@ def time_delay_agg_inference(self, values, corr):
tmp_values = values.repeat(1, 1, 1, 2)
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
- tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
+ tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(
+ 1
+ ).repeat(1, head, channel, length)
pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
- delays_agg = delays_agg + pattern * \
- (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
+ delays_agg = delays_agg + pattern * (
+ tmp_corr[:, i]
+ .unsqueeze(1)
+ .unsqueeze(1)
+ .unsqueeze(1)
+ .repeat(1, head, channel, length)
+ )
return delays_agg
def time_delay_agg_full(self, values, corr):
@@ -96,7 +125,14 @@ def time_delay_agg_full(self, values, corr):
channel = values.shape[2]
length = values.shape[3]
# index init
- init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
+ init_index = (
+ torch.arange(length)
+ .unsqueeze(0)
+ .unsqueeze(0)
+ .unsqueeze(0)
+ .repeat(batch, head, channel, 1)
+ .cuda()
+ )
# find top k
top_k = int(self.factor * math.log(length))
weights = torch.topk(corr, top_k, dim=-1)[0]
@@ -116,7 +152,7 @@ def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
if L > S:
- zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
+ zeros = torch.zeros_like(queries[:, : (L - S), :]).float()
values = torch.cat([values, zeros], dim=1)
keys = torch.cat([keys, zeros], dim=1)
else:
@@ -138,20 +174,34 @@ def forward(self, queries, keys, values, attn_mask):
for q, k, j in zip(qs, ks, j_list):
q_list += [interpolate(q, scale_factor=j, mode='linear')[:, :, -L:]]
k_list += [interpolate(k, scale_factor=j, mode='linear')[:, :, -L:]]
- queries = torch.stack([i.reshape([B, H, E, L]) for i in q_list], dim=3).reshape([B, H, -1, L]).permute(0, 3, 1, 2)
- keys = torch.stack([i.reshape([B, H, E, L]) for i in k_list], dim=3).reshape([B, H, -1, L]).permute(0, 3, 1, 2)
+ queries = (
+ torch.stack([i.reshape([B, H, E, L]) for i in q_list], dim=3)
+ .reshape([B, H, -1, L])
+ .permute(0, 3, 1, 2)
+ )
+ keys = (
+ torch.stack([i.reshape([B, H, E, L]) for i in k_list], dim=3)
+ .reshape([B, H, -1, L])
+ .permute(0, 3, 1, 2)
+ )
else:
pass
- q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) # size=[B, H, E, L]
+ q_fft = torch.fft.rfft(
+ queries.permute(0, 2, 3, 1).contiguous(), dim=-1
+ ) # size=[B, H, E, L]
k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
res = q_fft * torch.conj(k_fft)
- corr = torch.fft.irfft(res, dim=-1) # size=[B, H, E, L]
+ corr = torch.fft.irfft(res, dim=-1) # size=[B, H, E, L]
# time delay agg
if self.training:
- V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) # [B, L, H, E], [B, H, E, L] -> [B, L, H, E]
+ V = self.time_delay_agg_training(
+ values.permute(0, 2, 3, 1).contiguous(), corr
+ ).permute(0, 3, 1, 2) # [B, L, H, E], [B, H, E, L] -> [B, L, H, E]
else:
- V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
+ V = self.time_delay_agg_inference(
+ values.permute(0, 2, 3, 1).contiguous(), corr
+ ).permute(0, 3, 1, 2)
else:
V_list = []
queries = queries.reshape([B, L, -1])
@@ -172,12 +222,16 @@ def forward(self, queries, keys, values, attn_mask):
res = q_fft * torch.conj(k_fft)
corr = torch.fft.irfft(res, dim=-1) # [B, H, E, L]
if self.training:
- V = self.time_delay_agg_training(v.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
+ V = self.time_delay_agg_training(
+ v.permute(0, 2, 3, 1).contiguous(), corr
+ ).permute(0, 3, 1, 2)
else:
- V = self.time_delay_agg_inference(v.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
+ V = self.time_delay_agg_inference(
+ v.permute(0, 2, 3, 1).contiguous(), corr
+ ).permute(0, 3, 1, 2)
V_list += [V]
- Vl = V_list[-1].reshape([B, -1, H*E]).transpose(1, 2)
- Vh_list = [i.reshape([B, -1, H*E]).transpose(1, 2) for i in V_list[:-1]]
+ Vl = V_list[-1].reshape([B, -1, H * E]).transpose(1, 2)
+ Vh_list = [i.reshape([B, -1, H * E]).transpose(1, 2) for i in V_list[:-1]]
V = self.dwt1div((Vl, Vh_list)).reshape([B, H, E, -1]).permute(0, 3, 1, 2)
# corr = self.dwt1div((V_list[-1], V_list[:-1]))
@@ -188,8 +242,7 @@ def forward(self, queries, keys, values, attn_mask):
class AutoCorrelationLayer(nn.Module):
- def __init__(self, correlation, d_model, n_heads, d_keys=None,
- d_values=None):
+ def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None):
super(AutoCorrelationLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
@@ -211,12 +264,7 @@ def forward(self, queries, keys, values, attn_mask):
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
- out, attn = self.inner_correlation(
- queries,
- keys,
- values,
- attn_mask
- )
+ out, attn = self.inner_correlation(queries, keys, values, attn_mask)
out = out.view(B, L, -1)
- return self.out_projection(out), attn
\ No newline at end of file
+ return self.out_projection(out), attn
diff --git a/benchmark/layers/Autoformer_EncDec.py b/benchmark/layers/Autoformer_EncDec.py
index 5bcae4e..d97c916 100644
--- a/benchmark/layers/Autoformer_EncDec.py
+++ b/benchmark/layers/Autoformer_EncDec.py
@@ -1,14 +1,15 @@
+import math
+
import torch
import torch.nn as nn
import torch.nn.functional as F
-import math
-from layers.SelfAttention_Family import FullAttention
class my_Layernorm(nn.Module):
"""
Special designed layernorm for the seasonal part
"""
+
def __init__(self, channels):
super(my_Layernorm, self).__init__()
self.layernorm = nn.LayerNorm(channels)
@@ -23,6 +24,7 @@ class moving_avg(nn.Module):
"""
Moving average block to highlight the trend of time series
"""
+
def __init__(self, kernel_size, stride):
super(moving_avg, self).__init__()
if type(kernel_size) == list:
@@ -32,7 +34,9 @@ def __init__(self, kernel_size, stride):
def forward(self, x):
# padding on the both ends of time series
- front = x[:, 0:1, :].repeat(1, self.kernel_size - 1-math.floor((self.kernel_size - 1) // 2), 1)
+ front = x[:, 0:1, :].repeat(
+ 1, self.kernel_size - 1 - math.floor((self.kernel_size - 1) // 2), 1
+ )
end = x[:, -1:, :].repeat(1, math.floor((self.kernel_size - 1) // 2), 1)
x = torch.cat([front, x, end], dim=1)
x = self.avg(x.permute(0, 2, 1))
@@ -44,6 +48,7 @@ class series_decomp(nn.Module):
"""
Series decomposition block
"""
+
def __init__(self, kernel_size):
super(series_decomp, self).__init__()
self.moving_avg = moving_avg(kernel_size, stride=1)
@@ -58,20 +63,23 @@ class series_decomp_multi(nn.Module):
"""
Series decomposition block
"""
+
def __init__(self, kernel_size):
super(series_decomp_multi, self).__init__()
self.moving_avg = [moving_avg(kernel, stride=1) for kernel in kernel_size]
self.layer = torch.nn.Linear(1, len(kernel_size))
def forward(self, x):
- moving_mean=[]
+ moving_mean = []
for func in self.moving_avg:
moving_avg = func(x)
moving_mean.append(moving_avg.unsqueeze(-1))
- moving_mean=torch.cat(moving_mean,dim=-1)
- moving_mean = torch.sum(moving_mean*nn.Softmax(-1)(self.layer(x.unsqueeze(-1))),dim=-1)
+ moving_mean = torch.cat(moving_mean, dim=-1)
+ moving_mean = torch.sum(
+ moving_mean * nn.Softmax(-1)(self.layer(x.unsqueeze(-1))), dim=-1
+ )
res = x - moving_mean
- return res, moving_mean
+ return res, moving_mean
class FourierDecomp(nn.Module):
@@ -80,19 +88,32 @@ def __init__(self):
pass
def forward(self, x):
- x_ft = torch.fft.rfft(x, dim=-1)
+ torch.fft.rfft(x, dim=-1)
class EncoderLayer(nn.Module):
"""
Autoformer encoder layer with the progressive decomposition architecture
"""
- def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
+
+ def __init__(
+ self,
+ attention,
+ d_model,
+ d_ff=None,
+ moving_avg=25,
+ dropout=0.1,
+ activation='relu',
+ ):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
- self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
- self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
+ self.conv1 = nn.Conv1d(
+ in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False
+ )
+ self.conv2 = nn.Conv1d(
+ in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False
+ )
if isinstance(moving_avg, list):
self.decomp1 = series_decomp_multi(moving_avg)
@@ -102,13 +123,10 @@ def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, ac
self.decomp2 = series_decomp(moving_avg)
self.dropout = nn.Dropout(dropout)
- self.activation = F.relu if activation == "relu" else F.gelu
+ self.activation = F.relu if activation == 'relu' else F.gelu
def forward(self, x, attn_mask=None):
- new_x, attn = self.attention(
- x, x, x,
- attn_mask=attn_mask
- )
+ new_x, attn = self.attention(x, x, x, attn_mask=attn_mask)
x = x + self.dropout(new_x)
x, _ = self.decomp1(x)
y = x
@@ -122,10 +140,13 @@ class Encoder(nn.Module):
"""
Autoformer encoder
"""
+
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
- self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
+ self.conv_layers = (
+ nn.ModuleList(conv_layers) if conv_layers is not None else None
+ )
self.norm = norm_layer
def forward(self, x, attn_mask=None):
@@ -152,14 +173,28 @@ class DecoderLayer(nn.Module):
"""
Autoformer decoder layer with the progressive decomposition architecture
"""
- def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
- moving_avg=25, dropout=0.1, activation="relu"):
+
+ def __init__(
+ self,
+ self_attention,
+ cross_attention,
+ d_model,
+ c_out,
+ d_ff=None,
+ moving_avg=25,
+ dropout=0.1,
+ activation='relu',
+ ):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
- self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
- self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
+ self.conv1 = nn.Conv1d(
+ in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False
+ )
+ self.conv2 = nn.Conv1d(
+ in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False
+ )
if isinstance(moving_avg, list):
self.decomp1 = series_decomp_multi(moving_avg)
@@ -171,21 +206,24 @@ def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
self.decomp3 = series_decomp(moving_avg)
self.dropout = nn.Dropout(dropout)
- self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
- padding_mode='circular', bias=False)
- self.activation = F.relu if activation == "relu" else F.gelu
+ self.projection = nn.Conv1d(
+ in_channels=d_model,
+ out_channels=c_out,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ padding_mode='circular',
+ bias=False,
+ )
+ self.activation = F.relu if activation == 'relu' else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None):
- x = x + self.dropout(self.self_attention(
- x, x, x,
- attn_mask=x_mask
- )[0])
+ x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
x, trend1 = self.decomp1(x)
- x = x + self.dropout(self.cross_attention(
- x, cross, cross,
- attn_mask=cross_mask
- )[0])
+ x = x + self.dropout(
+ self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0]
+ )
x, trend2 = self.decomp2(x)
y = x
@@ -194,7 +232,9 @@ def forward(self, x, cross, x_mask=None, cross_mask=None):
x, trend3 = self.decomp3(x + y)
residual_trend = trend1 + trend2 + trend3
- residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
+ residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(
+ 1, 2
+ )
return x, residual_trend
@@ -202,6 +242,7 @@ class Decoder(nn.Module):
"""
Autoformer encoder
"""
+
def __init__(self, layers, norm_layer=None, projection=None):
super(Decoder, self).__init__()
self.layers = nn.ModuleList(layers)
diff --git a/benchmark/layers/Embed.py b/benchmark/layers/Embed.py
index 1cc5034..1921d66 100644
--- a/benchmark/layers/Embed.py
+++ b/benchmark/layers/Embed.py
@@ -1,8 +1,7 @@
+import math
+
import torch
import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.utils import weight_norm
-import math
class PositionalEmbedding(nn.Module):
@@ -13,7 +12,9 @@ def __init__(self, d_model, max_len=5000):
pe.require_grad = False
position = torch.arange(0, max_len).float().unsqueeze(1)
- div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
+ div_term = (
+ torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)
+ ).exp()
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
@@ -22,18 +23,26 @@ def __init__(self, d_model, max_len=5000):
self.register_buffer('pe', pe)
def forward(self, x):
- return self.pe[:, :x.size(1)]
+ return self.pe[:, : x.size(1)]
class TokenEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(TokenEmbedding, self).__init__()
padding = 1 if torch.__version__ >= '1.5.0' else 2
- self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
- kernel_size=3, padding=padding, padding_mode='circular', bias=False)
+ self.tokenConv = nn.Conv1d(
+ in_channels=c_in,
+ out_channels=d_model,
+ kernel_size=3,
+ padding=padding,
+ padding_mode='circular',
+ bias=False,
+ )
for m in self.modules():
if isinstance(m, nn.Conv1d):
- nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
+ nn.init.kaiming_normal_(
+ m.weight, mode='fan_in', nonlinearity='leaky_relu'
+ )
def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
@@ -48,7 +57,9 @@ def __init__(self, c_in, d_model):
w.require_grad = False
position = torch.arange(0, c_in).float().unsqueeze(1)
- div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
+ div_term = (
+ torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)
+ ).exp()
w[:, 0::2] = torch.sin(position * div_term)
w[:, 1::2] = torch.cos(position * div_term)
@@ -81,7 +92,9 @@ def __init__(self, d_model, embed_type='fixed', freq='h'):
def forward(self, x):
x = x.long()
- minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0.
+ minute_x = (
+ self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0.0
+ )
hour_x = self.hour_embed(x[:, :, 3])
weekday_x = self.weekday_embed(x[:, :, 2])
day_x = self.day_embed(x[:, :, 1])
@@ -108,15 +121,22 @@ def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
- self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
- freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
- d_model=d_model, embed_type=embed_type, freq=freq)
+ self.temporal_embedding = (
+ TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+ if embed_type != 'timeF'
+ else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+ )
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
- x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
+ x = (
+ self.value_embedding(x)
+ + self.temporal_embedding(x_mark)
+ + self.position_embedding(x)
+ )
return self.dropout(x)
+
class DataEmbedding_onlypos(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_onlypos, self).__init__()
@@ -128,16 +148,19 @@ def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
def forward(self, x, x_mark):
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x)
-
+
+
class DataEmbedding_wo_pos(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_pos, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
- self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
- freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
- d_model=d_model, embed_type=embed_type, freq=freq)
+ self.temporal_embedding = (
+ TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+ if embed_type != 'timeF'
+ else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+ )
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
diff --git a/benchmark/layers/FourierCorrelation.py b/benchmark/layers/FourierCorrelation.py
index 12892cd..9567d55 100644
--- a/benchmark/layers/FourierCorrelation.py
+++ b/benchmark/layers/FourierCorrelation.py
@@ -13,7 +13,7 @@ def get_frequency_modes(seq_len, modes=64, mode_select_method='random'):
'random' means sampling randomly;
'else' means sampling the lowest modes;
"""
- modes = min(modes, seq_len//2)
+ modes = min(modes, seq_len // 2)
if mode_select_method == 'random':
index = list(range(0, seq_len // 2))
np.random.shuffle(index)
@@ -26,7 +26,9 @@ def get_frequency_modes(seq_len, modes=64, mode_select_method='random'):
# ########## fourier layer #############
class FourierBlock(nn.Module):
- def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_method='random'):
+ def __init__(
+ self, in_channels, out_channels, seq_len, modes=0, mode_select_method='random'
+ ):
super(FourierBlock, self).__init__()
print('fourier enhanced block used!')
"""
@@ -34,17 +36,27 @@ def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_meth
it does FFT, linear transform, and Inverse FFT.
"""
# get modes on frequency domain
- self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method)
+ self.index = get_frequency_modes(
+ seq_len, modes=modes, mode_select_method=mode_select_method
+ )
print('modes={}, index={}'.format(modes, self.index))
- self.scale = (1 / (in_channels * out_channels))
+ self.scale = 1 / (in_channels * out_channels)
self.weights1 = nn.Parameter(
- self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index), dtype=torch.cfloat))
+ self.scale
+ * torch.rand(
+ 8,
+ in_channels // 8,
+ out_channels // 8,
+ len(self.index),
+ dtype=torch.cfloat,
+ )
+ )
# Complex multiplication
def compl_mul1d(self, input, weights):
# (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x)
- return torch.einsum("bhi,hio->bho", input, weights)
+ return torch.einsum('bhi,hio->bho', input, weights)
def forward(self, q, k, v, mask):
# size = [B, L, H, E]
@@ -55,7 +67,9 @@ def forward(self, q, k, v, mask):
# Perform Fourier neural operations
out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat)
for wi, i in enumerate(self.index):
- out_ft[:, :, :, wi] = self.compl_mul1d(x_ft[:, :, :, i], self.weights1[:, :, :, wi])
+ out_ft[:, :, :, wi] = self.compl_mul1d(
+ x_ft[:, :, :, i], self.weights1[:, :, :, wi]
+ )
# Return to time domain
x = torch.fft.irfft(out_ft, n=x.size(-1))
return (x, None)
@@ -63,8 +77,17 @@ def forward(self, q, k, v, mask):
# ########## Fourier Cross Former ####################
class FourierCrossAttention(nn.Module):
- def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random',
- activation='tanh', policy=0):
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ seq_len_q,
+ seq_len_kv,
+ modes=64,
+ mode_select_method='random',
+ activation='tanh',
+ policy=0,
+ ):
super(FourierCrossAttention, self).__init__()
print(' fourier enhanced cross attention used!')
"""
@@ -74,56 +97,72 @@ def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, m
self.in_channels = in_channels
self.out_channels = out_channels
# get modes for queries and keys (& values) on frequency domain
- self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method)
- self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method)
+ self.index_q = get_frequency_modes(
+ seq_len_q, modes=modes, mode_select_method=mode_select_method
+ )
+ self.index_kv = get_frequency_modes(
+ seq_len_kv, modes=modes, mode_select_method=mode_select_method
+ )
print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q))
print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv))
- self.scale = (1 / (in_channels * out_channels))
+ self.scale = 1 / (in_channels * out_channels)
self.weights1 = nn.Parameter(
- self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index_q), dtype=torch.cfloat))
+ self.scale
+ * torch.rand(
+ 8,
+ in_channels // 8,
+ out_channels // 8,
+ len(self.index_q),
+ dtype=torch.cfloat,
+ )
+ )
# Complex multiplication
def compl_mul1d(self, input, weights):
# (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x)
- return torch.einsum("bhi,hio->bho", input, weights)
+ return torch.einsum('bhi,hio->bho', input, weights)
def forward(self, q, k, v, mask):
# size = [B, L, H, E]
B, L, H, E = q.shape
xq = q.permute(0, 2, 3, 1) # size = [B, H, E, L]
xk = k.permute(0, 2, 3, 1)
- xv = v.permute(0, 2, 3, 1)
+ v.permute(0, 2, 3, 1)
# Compute Fourier coefficients
- xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat)
+ xq_ft_ = torch.zeros(
+ B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat
+ )
xq_ft = torch.fft.rfft(xq, dim=-1)
for i, j in enumerate(self.index_q):
xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
- xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat)
+ xk_ft_ = torch.zeros(
+ B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat
+ )
xk_ft = torch.fft.rfft(xk, dim=-1)
for i, j in enumerate(self.index_kv):
xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
# perform attention mechanism on frequency domain
- xqk_ft = (torch.einsum("bhex,bhey->bhxy", xq_ft_, xk_ft_))
+ xqk_ft = torch.einsum('bhex,bhey->bhxy', xq_ft_, xk_ft_)
if self.activation == 'tanh':
xqk_ft = xqk_ft.tanh()
elif self.activation == 'softmax':
xqk_ft = torch.softmax(abs(xqk_ft), dim=-1)
xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
else:
- raise Exception('{} actiation function is not implemented'.format(self.activation))
- xqkv_ft = torch.einsum("bhxy,bhey->bhex", xqk_ft, xk_ft_)
- xqkvw = torch.einsum("bhex,heox->bhox", xqkv_ft, self.weights1)
+ raise Exception(
+ '{} actiation function is not implemented'.format(self.activation)
+ )
+ xqkv_ft = torch.einsum('bhxy,bhey->bhex', xqk_ft, xk_ft_)
+ xqkvw = torch.einsum('bhex,heox->bhox', xqkv_ft, self.weights1)
out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
for i, j in enumerate(self.index_q):
out_ft[:, :, :, j] = xqkvw[:, :, :, i]
# Return to time domain
- out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1))
+ out = torch.fft.irfft(
+ out_ft / self.in_channels / self.out_channels, n=xq.size(-1)
+ )
return (out, None)
-
-
-
-
diff --git a/benchmark/layers/MultiWaveletCorrelation.py b/benchmark/layers/MultiWaveletCorrelation.py
index 5d8d7fc..71bfde2 100644
--- a/benchmark/layers/MultiWaveletCorrelation.py
+++ b/benchmark/layers/MultiWaveletCorrelation.py
@@ -1,20 +1,15 @@
-import torch
+import math
+from typing import List, Tuple
+
import numpy as np
+import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
-from typing import List, Tuple
-import math
-from functools import partial
-from einops import rearrange, reduce, repeat
-from torch import nn, einsum, diagonal
-from math import log2, ceil
-import pdb
-from utils.masking import LocalMask
from layers.utils import get_filter
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class MultiWaveletTransform(nn.Module):
@@ -22,8 +17,17 @@ class MultiWaveletTransform(nn.Module):
1D multiwavelet block.
"""
- def __init__(self, ich=1, k=8, alpha=16, c=128,
- nCZ=1, L=0, base='legendre', attention_dropout=0.1):
+ def __init__(
+ self,
+ ich=1,
+ k=8,
+ alpha=16,
+ c=128,
+ nCZ=1,
+ L=0,
+ base='legendre',
+ attention_dropout=0.1,
+ ):
super(MultiWaveletTransform, self).__init__()
print('base', base)
self.k = k
@@ -39,7 +43,7 @@ def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
if L > S:
- zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
+ zeros = torch.zeros_like(queries[:, : (L - S), :]).float()
values = torch.cat([values, zeros], dim=1)
keys = torch.cat([keys, zeros], dim=1)
else:
@@ -63,13 +67,23 @@ class MultiWaveletCross(nn.Module):
1D Multiwavelet Cross Attention layer.
"""
- def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes, c=64,
- k=8, ich=512,
- L=0,
- base='legendre',
- mode_select_method='random',
- initializer=None, activation='tanh',
- **kwargs):
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ seq_len_q,
+ seq_len_kv,
+ modes,
+ c=64,
+ k=8,
+ ich=512,
+ L=0,
+ base='legendre',
+ mode_select_method='random',
+ initializer=None,
+ activation='tanh',
+ **kwargs,
+ ):
super(MultiWaveletCross, self).__init__()
print('base', base)
@@ -88,28 +102,48 @@ def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes, c=64
G1r[np.abs(G1r) < 1e-8] = 0
self.max_item = 3
- self.attn1 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
- seq_len_kv=seq_len_kv, modes=modes, activation=activation,
- mode_select_method=mode_select_method)
- self.attn2 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
- seq_len_kv=seq_len_kv, modes=modes, activation=activation,
- mode_select_method=mode_select_method)
- self.attn3 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
- seq_len_kv=seq_len_kv, modes=modes, activation=activation,
- mode_select_method=mode_select_method)
- self.attn4 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
- seq_len_kv=seq_len_kv, modes=modes, activation=activation,
- mode_select_method=mode_select_method)
+ self.attn1 = FourierCrossAttentionW(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ seq_len_q=seq_len_q,
+ seq_len_kv=seq_len_kv,
+ modes=modes,
+ activation=activation,
+ mode_select_method=mode_select_method,
+ )
+ self.attn2 = FourierCrossAttentionW(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ seq_len_q=seq_len_q,
+ seq_len_kv=seq_len_kv,
+ modes=modes,
+ activation=activation,
+ mode_select_method=mode_select_method,
+ )
+ self.attn3 = FourierCrossAttentionW(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ seq_len_q=seq_len_q,
+ seq_len_kv=seq_len_kv,
+ modes=modes,
+ activation=activation,
+ mode_select_method=mode_select_method,
+ )
+ self.attn4 = FourierCrossAttentionW(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ seq_len_q=seq_len_q,
+ seq_len_kv=seq_len_kv,
+ modes=modes,
+ activation=activation,
+ mode_select_method=mode_select_method,
+ )
self.T0 = nn.Linear(k, k)
- self.register_buffer('ec_s', torch.Tensor(
- np.concatenate((H0.T, H1.T), axis=0)))
- self.register_buffer('ec_d', torch.Tensor(
- np.concatenate((G0.T, G1.T), axis=0)))
+ self.register_buffer('ec_s', torch.Tensor(np.concatenate((H0.T, H1.T), axis=0)))
+ self.register_buffer('ec_d', torch.Tensor(np.concatenate((G0.T, G1.T), axis=0)))
- self.register_buffer('rc_e', torch.Tensor(
- np.concatenate((H0r, G0r), axis=0)))
- self.register_buffer('rc_o', torch.Tensor(
- np.concatenate((H1r, G1r), axis=0)))
+ self.register_buffer('rc_e', torch.Tensor(np.concatenate((H0r, G0r), axis=0)))
+ self.register_buffer('rc_o', torch.Tensor(np.concatenate((H1r, G1r), axis=0)))
self.Lk = nn.Linear(ich, c * k)
self.Lq = nn.Linear(ich, c * k)
@@ -132,7 +166,7 @@ def forward(self, q, k, v, mask=None):
v = v.view(v.shape[0], v.shape[1], self.c, self.k)
if N > S:
- zeros = torch.zeros_like(q[:, :(N - S), :]).float()
+ zeros = torch.zeros_like(q[:, : (N - S), :]).float()
v = torch.cat([v, zeros], dim=1)
k = torch.cat([k, zeros], dim=1)
else:
@@ -141,9 +175,9 @@ def forward(self, q, k, v, mask=None):
ns = math.floor(np.log2(N))
nl = pow(2, math.ceil(np.log2(N)))
- extra_q = q[:, 0:nl - N, :, :]
- extra_k = k[:, 0:nl - N, :, :]
- extra_v = v[:, 0:nl - N, :, :]
+ extra_q = q[:, 0 : nl - N, :, :]
+ extra_k = k[:, 0 : nl - N, :, :]
+ extra_v = v[:, 0 : nl - N, :, :]
q = torch.cat([q, extra_q], 1)
k = torch.cat([k, extra_k], 1)
v = torch.cat([v, extra_v], 1)
@@ -177,7 +211,10 @@ def forward(self, q, k, v, mask=None):
dk, sk = Ud_k[i], Us_k[i]
dq, sq = Ud_q[i], Us_q[i]
dv, sv = Ud_v[i], Us_v[i]
- Ud += [self.attn1(dq[0], dk[0], dv[0], mask)[0] + self.attn2(dq[1], dk[1], dv[1], mask)[0]]
+ Ud += [
+ self.attn1(dq[0], dk[0], dv[0], mask)[0]
+ + self.attn2(dq[1], dk[1], dv[1], mask)[0]
+ ]
Us += [self.attn3(sq, sk, sv, mask)[0]]
v = self.attn4(q, k, v, mask)[0]
@@ -190,9 +227,13 @@ def forward(self, q, k, v, mask=None):
return (v.contiguous(), None)
def wavelet_transform(self, x):
- xa = torch.cat([x[:, ::2, :, :],
- x[:, 1::2, :, :],
- ], -1)
+ xa = torch.cat(
+ [
+ x[:, ::2, :, :],
+ x[:, 1::2, :, :],
+ ],
+ -1,
+ )
d = torch.matmul(xa, self.ec_d)
s = torch.matmul(xa, self.ec_s)
return d, s
@@ -203,16 +244,23 @@ def evenOdd(self, x):
x_e = torch.matmul(x, self.rc_e)
x_o = torch.matmul(x, self.rc_o)
- x = torch.zeros(B, N * 2, c, self.k,
- device=x.device)
+ x = torch.zeros(B, N * 2, c, self.k, device=x.device)
x[..., ::2, :, :] = x_e
x[..., 1::2, :, :] = x_o
return x
class FourierCrossAttentionW(nn.Module):
- def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=16, activation='tanh',
- mode_select_method='random'):
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ seq_len_q,
+ seq_len_kv,
+ modes=16,
+ activation='tanh',
+ mode_select_method='random',
+ ):
super(FourierCrossAttentionW, self).__init__()
print('corss fourier correlation used!')
self.in_channels = in_channels
@@ -230,52 +278,58 @@ def forward(self, q, k, v, mask):
self.index_k_v = list(range(0, min(int(xv.shape[3] // 2), self.modes1)))
# Compute Fourier coefficients
- xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat)
+ xq_ft_ = torch.zeros(
+ B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat
+ )
xq_ft = torch.fft.rfft(xq, dim=-1)
for i, j in enumerate(self.index_q):
xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
- xk_ft_ = torch.zeros(B, H, E, len(self.index_k_v), device=xq.device, dtype=torch.cfloat)
+ xk_ft_ = torch.zeros(
+ B, H, E, len(self.index_k_v), device=xq.device, dtype=torch.cfloat
+ )
xk_ft = torch.fft.rfft(xk, dim=-1)
for i, j in enumerate(self.index_k_v):
xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
- xqk_ft = (torch.einsum("bhex,bhey->bhxy", xq_ft_, xk_ft_))
+ xqk_ft = torch.einsum('bhex,bhey->bhxy', xq_ft_, xk_ft_)
if self.activation == 'tanh':
xqk_ft = xqk_ft.tanh()
elif self.activation == 'softmax':
xqk_ft = torch.softmax(abs(xqk_ft), dim=-1)
xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
else:
- raise Exception('{} actiation function is not implemented'.format(self.activation))
- xqkv_ft = torch.einsum("bhxy,bhey->bhex", xqk_ft, xk_ft_)
+ raise Exception(
+ '{} actiation function is not implemented'.format(self.activation)
+ )
+ xqkv_ft = torch.einsum('bhxy,bhey->bhex', xqk_ft, xk_ft_)
xqkvw = xqkv_ft
out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
for i, j in enumerate(self.index_q):
out_ft[:, :, :, j] = xqkvw[:, :, :, i]
- out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)).permute(0, 3, 2, 1)
+ out = torch.fft.irfft(
+ out_ft / self.in_channels / self.out_channels, n=xq.size(-1)
+ ).permute(0, 3, 2, 1)
# size = [B, L, H, E]
return (out, None)
class sparseKernelFT1d(nn.Module):
- def __init__(self,
- k, alpha, c=1,
- nl=1,
- initializer=None,
- **kwargs):
+ def __init__(self, k, alpha, c=1, nl=1, initializer=None, **kwargs):
super(sparseKernelFT1d, self).__init__()
self.modes1 = alpha
- self.scale = (1 / (c * k * c * k))
- self.weights1 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.cfloat))
+ self.scale = 1 / (c * k * c * k)
+ self.weights1 = nn.Parameter(
+ self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.cfloat)
+ )
self.weights1.requires_grad = True
self.k = k
def compl_mul1d(self, x, weights):
# (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x)
- return torch.einsum("bix,iox->box", x, weights)
+ return torch.einsum('bix,iox->box', x, weights)
def forward(self, x):
B, N, c, k = x.shape # (B, N, c, k)
@@ -295,12 +349,9 @@ def forward(self, x):
# ##
class MWT_CZ1d(nn.Module):
- def __init__(self,
- k=3, alpha=64,
- L=0, c=1,
- base='legendre',
- initializer=None,
- **kwargs):
+ def __init__(
+ self, k=3, alpha=64, L=0, c=1, base='legendre', initializer=None, **kwargs
+ ):
super(MWT_CZ1d, self).__init__()
self.k = k
@@ -323,21 +374,17 @@ def __init__(self,
self.T0 = nn.Linear(k, k)
- self.register_buffer('ec_s', torch.Tensor(
- np.concatenate((H0.T, H1.T), axis=0)))
- self.register_buffer('ec_d', torch.Tensor(
- np.concatenate((G0.T, G1.T), axis=0)))
+ self.register_buffer('ec_s', torch.Tensor(np.concatenate((H0.T, H1.T), axis=0)))
+ self.register_buffer('ec_d', torch.Tensor(np.concatenate((G0.T, G1.T), axis=0)))
- self.register_buffer('rc_e', torch.Tensor(
- np.concatenate((H0r, G0r), axis=0)))
- self.register_buffer('rc_o', torch.Tensor(
- np.concatenate((H1r, G1r), axis=0)))
+ self.register_buffer('rc_e', torch.Tensor(np.concatenate((H0r, G0r), axis=0)))
+ self.register_buffer('rc_o', torch.Tensor(np.concatenate((H1r, G1r), axis=0)))
def forward(self, x):
B, N, c, k = x.shape # (B, N, k)
ns = math.floor(np.log2(N))
nl = pow(2, math.ceil(np.log2(N)))
- extra_x = x[:, 0:nl - N, :, :]
+ extra_x = x[:, 0 : nl - N, :, :]
x = torch.cat([x, extra_x], 1)
Ud = torch.jit.annotate(List[Tensor], [])
Us = torch.jit.annotate(List[Tensor], [])
@@ -359,22 +406,24 @@ def forward(self, x):
return x
def wavelet_transform(self, x):
- xa = torch.cat([x[:, ::2, :, :],
- x[:, 1::2, :, :],
- ], -1)
+ xa = torch.cat(
+ [
+ x[:, ::2, :, :],
+ x[:, 1::2, :, :],
+ ],
+ -1,
+ )
d = torch.matmul(xa, self.ec_d)
s = torch.matmul(xa, self.ec_s)
return d, s
def evenOdd(self, x):
-
B, N, c, ich = x.shape # (B, N, c, k)
assert ich == 2 * self.k
x_e = torch.matmul(x, self.rc_e)
x_o = torch.matmul(x, self.rc_o)
- x = torch.zeros(B, N * 2, c, self.k,
- device=x.device)
+ x = torch.zeros(B, N * 2, c, self.k, device=x.device)
x[..., ::2, :, :] = x_e
x[..., 1::2, :, :] = x_o
return x
diff --git a/benchmark/layers/SelfAttention_Family.py b/benchmark/layers/SelfAttention_Family.py
index c8138e2..8e1c820 100644
--- a/benchmark/layers/SelfAttention_Family.py
+++ b/benchmark/layers/SelfAttention_Family.py
@@ -1,18 +1,21 @@
+from math import sqrt
+
+import numpy as np
import torch
import torch.nn as nn
-import torch.nn.functional as F
-
-import matplotlib.pyplot as plt
-import numpy as np
-import math
-from math import sqrt
-from utils.masking import TriangularCausalMask, ProbMask
-import os
+from utils.masking import ProbMask, TriangularCausalMask
class FullAttention(nn.Module):
- def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+ def __init__(
+ self,
+ mask_flag=True,
+ factor=5,
+ scale=None,
+ attention_dropout=0.1,
+ output_attention=False,
+ ):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
@@ -22,9 +25,9 @@ def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1,
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
- scale = self.scale or 1. / sqrt(E)
+ scale = self.scale or 1.0 / sqrt(E)
- scores = torch.einsum("blhe,bshe->bhls", queries, keys)
+ scores = torch.einsum('blhe,bshe->bhls', queries, keys)
if self.mask_flag:
if attn_mask is None:
@@ -33,7 +36,7 @@ def forward(self, queries, keys, values, attn_mask):
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
- V = torch.einsum("bhls,bshd->blhd", A, values)
+ V = torch.einsum('bhls,bshd->blhd', A, values)
if self.output_attention:
return (V.contiguous(), A)
@@ -42,7 +45,14 @@ def forward(self, queries, keys, values, attn_mask):
class ProbAttention(nn.Module):
- def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+ def __init__(
+ self,
+ mask_flag=True,
+ factor=5,
+ scale=None,
+ attention_dropout=0.1,
+ output_attention=False,
+ ):
super(ProbAttention, self).__init__()
self.factor = factor
self.scale = scale
@@ -57,18 +67,20 @@ def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
# calculate the sampled Q_K
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
- index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
+ index_sample = torch.randint(
+ L_K, (L_Q, sample_k)
+ ) # real U = U_part(factor*ln(L_k))*L_q
K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
- # find the Top_k query with sparisty measurement
+ # find the Top_k query with sparsity measurement
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
M_top = M.topk(n_top, sorted=False)[1]
# use the reduced Q to calculate Q_K
- Q_reduce = Q[torch.arange(B)[:, None, None],
- torch.arange(H)[None, :, None],
- M_top, :] # factor*ln(L_q)
+ Q_reduce = Q[
+ torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], M_top, :
+ ] # factor*ln(L_q)
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
return Q_K, M_top
@@ -80,7 +92,7 @@ def _get_initial_context(self, V, L_Q):
V_sum = V.mean(dim=-2)
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
else: # use mask
- assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
+ assert L_Q == L_V # requires that L_Q == L_V, i.e. for self-attention only
contex = V.cumsum(dim=-2)
return contex
@@ -93,12 +105,14 @@ def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
- context_in[torch.arange(B)[:, None, None],
- torch.arange(H)[None, :, None],
- index, :] = torch.matmul(attn, V).type_as(context_in)
+ context_in[
+ torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
+ ] = torch.matmul(attn, V).type_as(context_in)
if self.output_attention:
attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
- attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
+ attns[
+ torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
+ ] = attn
return (context_in, attns)
else:
return (context_in, None)
@@ -120,20 +134,21 @@ def forward(self, queries, keys, values, attn_mask):
scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
# add scale factor
- scale = self.scale or 1. / sqrt(D)
+ scale = self.scale or 1.0 / sqrt(D)
if scale is not None:
scores_top = scores_top * scale
# get the context
context = self._get_initial_context(values, L_Q)
# update the context with selected top_k queries
- context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
+ context, attn = self._update_context(
+ context, values, scores_top, index, L_Q, attn_mask
+ )
return context.contiguous(), attn
class AttentionLayer(nn.Module):
- def __init__(self, attention, d_model, n_heads, d_keys=None,
- d_values=None):
+ def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
@@ -155,12 +170,7 @@ def forward(self, queries, keys, values, attn_mask):
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
- out, attn = self.inner_attention(
- queries,
- keys,
- values,
- attn_mask
- )
+ out, attn = self.inner_attention(queries, keys, values, attn_mask)
out = out.view(B, L, -1)
return self.out_projection(out), attn
diff --git a/benchmark/layers/Transformer_EncDec.py b/benchmark/layers/Transformer_EncDec.py
index c0c5789..2e614b5 100644
--- a/benchmark/layers/Transformer_EncDec.py
+++ b/benchmark/layers/Transformer_EncDec.py
@@ -1,4 +1,3 @@
-import torch
import torch.nn as nn
import torch.nn.functional as F
@@ -6,11 +5,13 @@
class ConvLayer(nn.Module):
def __init__(self, c_in):
super(ConvLayer, self).__init__()
- self.downConv = nn.Conv1d(in_channels=c_in,
- out_channels=c_in,
- kernel_size=3,
- padding=2,
- padding_mode='circular')
+ self.downConv = nn.Conv1d(
+ in_channels=c_in,
+ out_channels=c_in,
+ kernel_size=3,
+ padding=2,
+ padding_mode='circular',
+ )
self.norm = nn.BatchNorm1d(c_in)
self.activation = nn.ELU()
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
@@ -25,7 +26,7 @@ def forward(self, x):
class EncoderLayer(nn.Module):
- def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
+ def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation='relu'):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
@@ -34,13 +35,10 @@ def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
- self.activation = F.relu if activation == "relu" else F.gelu
+ self.activation = F.relu if activation == 'relu' else F.gelu
def forward(self, x, attn_mask=None):
- new_x, attn = self.attention(
- x, x, x,
- attn_mask=attn_mask
- )
+ new_x, attn = self.attention(x, x, x, attn_mask=attn_mask)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
@@ -54,7 +52,9 @@ class Encoder(nn.Module):
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
- self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
+ self.conv_layers = (
+ nn.ModuleList(conv_layers) if conv_layers is not None else None
+ )
self.norm = norm_layer
def forward(self, x, attn_mask=None):
@@ -79,8 +79,15 @@ def forward(self, x, attn_mask=None):
class DecoderLayer(nn.Module):
- def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
- dropout=0.1, activation="relu"):
+ def __init__(
+ self,
+ self_attention,
+ cross_attention,
+ d_model,
+ d_ff=None,
+ dropout=0.1,
+ activation='relu',
+ ):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
@@ -91,19 +98,15 @@ def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
- self.activation = F.relu if activation == "relu" else F.gelu
+ self.activation = F.relu if activation == 'relu' else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None):
- x = x + self.dropout(self.self_attention(
- x, x, x,
- attn_mask=x_mask
- )[0])
+ x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
x = self.norm1(x)
- x = x + self.dropout(self.cross_attention(
- x, cross, cross,
- attn_mask=cross_mask
- )[0])
+ x = x + self.dropout(
+ self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0]
+ )
y = x = self.norm2(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
diff --git a/benchmark/layers/utils.py b/benchmark/layers/utils.py
index abad383..4c8673c 100644
--- a/benchmark/layers/utils.py
+++ b/benchmark/layers/utils.py
@@ -1,100 +1,130 @@
-import torch
-import torch.nn as nn
-
-import numpy as np
from functools import partial
+import numpy as np
+import torch
+import torch.nn as nn
from scipy.special import eval_legendre
-from sympy import Poly, legendre, Symbol, chebyshevt
+from sympy import Poly, Symbol, chebyshevt, legendre
def legendreDer(k, x):
def _legendre(k, x):
- return (2*k+1) * eval_legendre(k, x)
+ return (2 * k + 1) * eval_legendre(k, x)
+
out = 0
- for i in np.arange(k-1,-1,-2):
+ for i in np.arange(k - 1, -1, -2):
out += _legendre(i, x)
return out
-def phi_(phi_c, x, lb = 0, ub = 1):
- mask = np.logical_or(xub) * 1.0
- return np.polynomial.polynomial.Polynomial(phi_c)(x) * (1-mask)
+def phi_(phi_c, x, lb=0, ub=1):
+ mask = np.logical_or(x < lb, x > ub) * 1.0
+ return np.polynomial.polynomial.Polynomial(phi_c)(x) * (1 - mask)
def get_phi_psi(k, base):
-
x = Symbol('x')
- phi_coeff = np.zeros((k,k))
- phi_2x_coeff = np.zeros((k,k))
+ phi_coeff = np.zeros((k, k))
+ phi_2x_coeff = np.zeros((k, k))
if base == 'legendre':
for ki in range(k):
- coeff_ = Poly(legendre(ki, 2*x-1), x).all_coeffs()
- phi_coeff[ki,:ki+1] = np.flip(np.sqrt(2*ki+1) * np.array(coeff_).astype(np.float64))
- coeff_ = Poly(legendre(ki, 4*x-1), x).all_coeffs()
- phi_2x_coeff[ki,:ki+1] = np.flip(np.sqrt(2) * np.sqrt(2*ki+1) * np.array(coeff_).astype(np.float64))
-
+ coeff_ = Poly(legendre(ki, 2 * x - 1), x).all_coeffs()
+ phi_coeff[ki, : ki + 1] = np.flip(
+ np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64)
+ )
+ coeff_ = Poly(legendre(ki, 4 * x - 1), x).all_coeffs()
+ phi_2x_coeff[ki, : ki + 1] = np.flip(
+ np.sqrt(2) * np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64)
+ )
+
psi1_coeff = np.zeros((k, k))
psi2_coeff = np.zeros((k, k))
for ki in range(k):
- psi1_coeff[ki,:] = phi_2x_coeff[ki,:]
+ psi1_coeff[ki, :] = phi_2x_coeff[ki, :]
for i in range(k):
- a = phi_2x_coeff[ki,:ki+1]
- b = phi_coeff[i, :i+1]
+ a = phi_2x_coeff[ki, : ki + 1]
+ b = phi_coeff[i, : i + 1]
prod_ = np.convolve(a, b)
- prod_[np.abs(prod_)<1e-8] = 0
- proj_ = (prod_ * 1/(np.arange(len(prod_))+1) * np.power(0.5, 1+np.arange(len(prod_)))).sum()
- psi1_coeff[ki,:] -= proj_ * phi_coeff[i,:]
- psi2_coeff[ki,:] -= proj_ * phi_coeff[i,:]
+ prod_[np.abs(prod_) < 1e-8] = 0
+ proj_ = (
+ prod_
+ * 1
+ / (np.arange(len(prod_)) + 1)
+ * np.power(0.5, 1 + np.arange(len(prod_)))
+ ).sum()
+ psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :]
+ psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :]
for j in range(ki):
- a = phi_2x_coeff[ki,:ki+1]
+ a = phi_2x_coeff[ki, : ki + 1]
b = psi1_coeff[j, :]
prod_ = np.convolve(a, b)
- prod_[np.abs(prod_)<1e-8] = 0
- proj_ = (prod_ * 1/(np.arange(len(prod_))+1) * np.power(0.5, 1+np.arange(len(prod_)))).sum()
- psi1_coeff[ki,:] -= proj_ * psi1_coeff[j,:]
- psi2_coeff[ki,:] -= proj_ * psi2_coeff[j,:]
-
- a = psi1_coeff[ki,:]
+ prod_[np.abs(prod_) < 1e-8] = 0
+ proj_ = (
+ prod_
+ * 1
+ / (np.arange(len(prod_)) + 1)
+ * np.power(0.5, 1 + np.arange(len(prod_)))
+ ).sum()
+ psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :]
+ psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :]
+
+ a = psi1_coeff[ki, :]
prod_ = np.convolve(a, a)
- prod_[np.abs(prod_)<1e-8] = 0
- norm1 = (prod_ * 1/(np.arange(len(prod_))+1) * np.power(0.5, 1+np.arange(len(prod_)))).sum()
-
- a = psi2_coeff[ki,:]
+ prod_[np.abs(prod_) < 1e-8] = 0
+ norm1 = (
+ prod_
+ * 1
+ / (np.arange(len(prod_)) + 1)
+ * np.power(0.5, 1 + np.arange(len(prod_)))
+ ).sum()
+
+ a = psi2_coeff[ki, :]
prod_ = np.convolve(a, a)
- prod_[np.abs(prod_)<1e-8] = 0
- norm2 = (prod_ * 1/(np.arange(len(prod_))+1) * (1-np.power(0.5, 1+np.arange(len(prod_))))).sum()
+ prod_[np.abs(prod_) < 1e-8] = 0
+ norm2 = (
+ prod_
+ * 1
+ / (np.arange(len(prod_)) + 1)
+ * (1 - np.power(0.5, 1 + np.arange(len(prod_))))
+ ).sum()
norm_ = np.sqrt(norm1 + norm2)
- psi1_coeff[ki,:] /= norm_
- psi2_coeff[ki,:] /= norm_
- psi1_coeff[np.abs(psi1_coeff)<1e-8] = 0
- psi2_coeff[np.abs(psi2_coeff)<1e-8] = 0
-
- phi = [np.poly1d(np.flip(phi_coeff[i,:])) for i in range(k)]
- psi1 = [np.poly1d(np.flip(psi1_coeff[i,:])) for i in range(k)]
- psi2 = [np.poly1d(np.flip(psi2_coeff[i,:])) for i in range(k)]
-
+ psi1_coeff[ki, :] /= norm_
+ psi2_coeff[ki, :] /= norm_
+ psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0
+ psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0
+
+ phi = [np.poly1d(np.flip(phi_coeff[i, :])) for i in range(k)]
+ psi1 = [np.poly1d(np.flip(psi1_coeff[i, :])) for i in range(k)]
+ psi2 = [np.poly1d(np.flip(psi2_coeff[i, :])) for i in range(k)]
+
elif base == 'chebyshev':
for ki in range(k):
if ki == 0:
- phi_coeff[ki,:ki+1] = np.sqrt(2/np.pi)
- phi_2x_coeff[ki,:ki+1] = np.sqrt(2/np.pi) * np.sqrt(2)
+ phi_coeff[ki, : ki + 1] = np.sqrt(2 / np.pi)
+ phi_2x_coeff[ki, : ki + 1] = np.sqrt(2 / np.pi) * np.sqrt(2)
else:
- coeff_ = Poly(chebyshevt(ki, 2*x-1), x).all_coeffs()
- phi_coeff[ki,:ki+1] = np.flip(2/np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
- coeff_ = Poly(chebyshevt(ki, 4*x-1), x).all_coeffs()
- phi_2x_coeff[ki,:ki+1] = np.flip(np.sqrt(2) * 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
-
- phi = [partial(phi_, phi_coeff[i,:]) for i in range(k)]
-
+ coeff_ = Poly(chebyshevt(ki, 2 * x - 1), x).all_coeffs()
+ phi_coeff[ki, : ki + 1] = np.flip(
+ 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64)
+ )
+ coeff_ = Poly(chebyshevt(ki, 4 * x - 1), x).all_coeffs()
+ phi_2x_coeff[ki, : ki + 1] = np.flip(
+ np.sqrt(2)
+ * 2
+ / np.sqrt(np.pi)
+ * np.array(coeff_).astype(np.float64)
+ )
+
+ phi = [partial(phi_, phi_coeff[i, :]) for i in range(k)]
+
x = Symbol('x')
- kUse = 2*k
- roots = Poly(chebyshevt(kUse, 2*x-1)).all_roots()
+ kUse = 2 * k
+ roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots()
x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
# x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1)
# not needed for our purpose here, we use even k always to avoid
wm = np.pi / kUse / 2
-
+
psi1_coeff = np.zeros((k, k))
psi2_coeff = np.zeros((k, k))
@@ -102,71 +132,82 @@ def get_phi_psi(k, base):
psi2 = [[] for _ in range(k)]
for ki in range(k):
- psi1_coeff[ki,:] = phi_2x_coeff[ki,:]
+ psi1_coeff[ki, :] = phi_2x_coeff[ki, :]
for i in range(k):
- proj_ = (wm * phi[i](x_m) * np.sqrt(2)* phi[ki](2*x_m)).sum()
- psi1_coeff[ki,:] -= proj_ * phi_coeff[i,:]
- psi2_coeff[ki,:] -= proj_ * phi_coeff[i,:]
+ proj_ = (wm * phi[i](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum()
+ psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :]
+ psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :]
for j in range(ki):
- proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2*x_m)).sum()
- psi1_coeff[ki,:] -= proj_ * psi1_coeff[j,:]
- psi2_coeff[ki,:] -= proj_ * psi2_coeff[j,:]
+ proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum()
+ psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :]
+ psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :]
- psi1[ki] = partial(phi_, psi1_coeff[ki,:], lb = 0, ub = 0.5)
- psi2[ki] = partial(phi_, psi2_coeff[ki,:], lb = 0.5, ub = 1)
+ psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5)
+ psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5, ub=1)
norm1 = (wm * psi1[ki](x_m) * psi1[ki](x_m)).sum()
norm2 = (wm * psi2[ki](x_m) * psi2[ki](x_m)).sum()
norm_ = np.sqrt(norm1 + norm2)
- psi1_coeff[ki,:] /= norm_
- psi2_coeff[ki,:] /= norm_
- psi1_coeff[np.abs(psi1_coeff)<1e-8] = 0
- psi2_coeff[np.abs(psi2_coeff)<1e-8] = 0
-
- psi1[ki] = partial(phi_, psi1_coeff[ki,:], lb = 0, ub = 0.5+1e-16)
- psi2[ki] = partial(phi_, psi2_coeff[ki,:], lb = 0.5+1e-16, ub = 1)
-
+ psi1_coeff[ki, :] /= norm_
+ psi2_coeff[ki, :] /= norm_
+ psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0
+ psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0
+
+ psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5 + 1e-16)
+ psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5 + 1e-16, ub=1)
+
return phi, psi1, psi2
def get_filter(base, k):
-
def psi(psi1, psi2, i, inp):
- mask = (inp<=0.5) * 1.0
- return psi1[i](inp) * mask + psi2[i](inp) * (1-mask)
-
+ mask = (inp <= 0.5) * 1.0
+ return psi1[i](inp) * mask + psi2[i](inp) * (1 - mask)
+
if base not in ['legendre', 'chebyshev']:
raise Exception('Base not supported')
-
+
x = Symbol('x')
- H0 = np.zeros((k,k))
- H1 = np.zeros((k,k))
- G0 = np.zeros((k,k))
- G1 = np.zeros((k,k))
- PHI0 = np.zeros((k,k))
- PHI1 = np.zeros((k,k))
+ H0 = np.zeros((k, k))
+ H1 = np.zeros((k, k))
+ G0 = np.zeros((k, k))
+ G1 = np.zeros((k, k))
+ PHI0 = np.zeros((k, k))
+ PHI1 = np.zeros((k, k))
phi, psi1, psi2 = get_phi_psi(k, base)
if base == 'legendre':
- roots = Poly(legendre(k, 2*x-1)).all_roots()
+ roots = Poly(legendre(k, 2 * x - 1)).all_roots()
x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
- wm = 1/k/legendreDer(k,2*x_m-1)/eval_legendre(k-1,2*x_m-1)
-
+ wm = 1 / k / legendreDer(k, 2 * x_m - 1) / eval_legendre(k - 1, 2 * x_m - 1)
+
for ki in range(k):
for kpi in range(k):
- H0[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki](x_m/2) * phi[kpi](x_m)).sum()
- G0[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m/2) * phi[kpi](x_m)).sum()
- H1[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki]((x_m+1)/2) * phi[kpi](x_m)).sum()
- G1[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m+1)/2) * phi[kpi](x_m)).sum()
-
+ H0[ki, kpi] = (
+ 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum()
+ )
+ G0[ki, kpi] = (
+ 1
+ / np.sqrt(2)
+ * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum()
+ )
+ H1[ki, kpi] = (
+ 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum()
+ )
+ G1[ki, kpi] = (
+ 1
+ / np.sqrt(2)
+ * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum()
+ )
+
PHI0 = np.eye(k)
PHI1 = np.eye(k)
-
+
elif base == 'chebyshev':
x = Symbol('x')
- kUse = 2*k
- roots = Poly(chebyshevt(kUse, 2*x-1)).all_roots()
+ kUse = 2 * k
+ roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots()
x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
# x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1)
# not needed for our purpose here, we use even k always to avoid
@@ -174,72 +215,97 @@ def psi(psi1, psi2, i, inp):
for ki in range(k):
for kpi in range(k):
- H0[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki](x_m/2) * phi[kpi](x_m)).sum()
- G0[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m/2) * phi[kpi](x_m)).sum()
- H1[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki]((x_m+1)/2) * phi[kpi](x_m)).sum()
- G1[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m+1)/2) * phi[kpi](x_m)).sum()
-
- PHI0[ki, kpi] = (wm * phi[ki](2*x_m) * phi[kpi](2*x_m)).sum() * 2
- PHI1[ki, kpi] = (wm * phi[ki](2*x_m-1) * phi[kpi](2*x_m-1)).sum() * 2
-
- PHI0[np.abs(PHI0)<1e-8] = 0
- PHI1[np.abs(PHI1)<1e-8] = 0
-
- H0[np.abs(H0)<1e-8] = 0
- H1[np.abs(H1)<1e-8] = 0
- G0[np.abs(G0)<1e-8] = 0
- G1[np.abs(G1)<1e-8] = 0
-
+ H0[ki, kpi] = (
+ 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum()
+ )
+ G0[ki, kpi] = (
+ 1
+ / np.sqrt(2)
+ * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum()
+ )
+ H1[ki, kpi] = (
+ 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum()
+ )
+ G1[ki, kpi] = (
+ 1
+ / np.sqrt(2)
+ * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum()
+ )
+
+ PHI0[ki, kpi] = (wm * phi[ki](2 * x_m) * phi[kpi](2 * x_m)).sum() * 2
+ PHI1[ki, kpi] = (
+ wm * phi[ki](2 * x_m - 1) * phi[kpi](2 * x_m - 1)
+ ).sum() * 2
+
+ PHI0[np.abs(PHI0) < 1e-8] = 0
+ PHI1[np.abs(PHI1) < 1e-8] = 0
+
+ H0[np.abs(H0) < 1e-8] = 0
+ H1[np.abs(H1) < 1e-8] = 0
+ G0[np.abs(G0) < 1e-8] = 0
+ G1[np.abs(G1) < 1e-8] = 0
+
return H0, H1, G0, G1, PHI0, PHI1
-def train(model, train_loader, optimizer, epoch, device, verbose = 0,
- lossFn = None, lr_schedule=None,
- post_proc = lambda args: args):
-
+def train(
+ model,
+ train_loader,
+ optimizer,
+ epoch,
+ device,
+ verbose=0,
+ lossFn=None,
+ lr_schedule=None,
+ post_proc=lambda args: args,
+):
if lossFn is None:
lossFn = nn.MSELoss()
model.train()
-
- total_loss = 0.
+
+ total_loss = 0.0
for batch_idx, (data, target) in enumerate(train_loader):
-
bs = len(data)
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
-
+
output = model(data)
-
+
target = post_proc(target)
output = post_proc(output)
loss = lossFn(output.view(bs, -1), target.view(bs, -1))
-
+
loss.backward()
optimizer.step()
total_loss += loss.sum().item()
- if lr_schedule is not None: lr_schedule.step()
-
- if verbose>0:
- print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
- epoch, batch_idx * len(data), len(train_loader.dataset),
- 100. * batch_idx / len(train_loader), loss.item()))
-
- return total_loss/len(train_loader.dataset)
-
-
-def test(model, test_loader, device, verbose=0, lossFn=None,
- post_proc = lambda args: args):
-
+ if lr_schedule is not None:
+ lr_schedule.step()
+
+ if verbose > 0:
+ print(
+ 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+ epoch,
+ batch_idx * len(data),
+ len(train_loader.dataset),
+ 100.0 * batch_idx / len(train_loader),
+ loss.item(),
+ )
+ )
+
+ return total_loss / len(train_loader.dataset)
+
+
+def test(
+ model, test_loader, device, verbose=0, lossFn=None, post_proc=lambda args: args
+):
model.eval()
if lossFn is None:
lossFn = nn.MSELoss()
-
-
- total_loss = 0.
- predictions = []
-
+
+ total_loss = 0.0
+
with torch.no_grad():
for data, target in test_loader:
bs = len(data)
@@ -247,17 +313,18 @@ def test(model, test_loader, device, verbose=0, lossFn=None,
data, target = data.to(device), target.to(device)
output = model(data)
output = post_proc(output)
-
+
loss = lossFn(output.view(bs, -1), target.view(bs, -1))
total_loss += loss.sum().item()
-
- return total_loss/len(test_loader.dataset)
+
+ return total_loss / len(test_loader.dataset)
# Till EoF
# taken from FNO paper:
# https://github.com/zongyi-li/fourier_neural_operator
+
# normalization, pointwise gaussian
class UnitGaussianNormalizer(object):
def __init__(self, x, eps=0.00001):
@@ -274,15 +341,15 @@ def encode(self, x):
def decode(self, x, sample_idx=None):
if sample_idx is None:
- std = self.std + self.eps # n
+ std = self.std + self.eps # n
mean = self.mean
else:
if len(self.mean.shape) == len(sample_idx[0].shape):
std = self.std[sample_idx] + self.eps # batch*n
mean = self.mean[sample_idx]
if len(self.mean.shape) > len(sample_idx[0].shape):
- std = self.std[:,sample_idx]+ self.eps # T*batch*n
- mean = self.mean[:,sample_idx]
+ std = self.std[:, sample_idx] + self.eps # T*batch*n
+ mean = self.mean[:, sample_idx]
# x is in shape of batch*n or T*batch*n
x = (x * std) + mean
@@ -330,28 +397,29 @@ def __init__(self, x, low=0.0, high=1.0):
mymin = torch.min(x, 0)[0].view(-1)
mymax = torch.max(x, 0)[0].view(-1)
- self.a = (high - low)/(mymax - mymin)
- self.b = -self.a*mymax + high
+ self.a = (high - low) / (mymax - mymin)
+ self.b = -self.a * mymax + high
def encode(self, x):
s = x.size()
x = x.view(s[0], -1)
- x = self.a*x + self.b
+ x = self.a * x + self.b
x = x.view(s)
return x
def decode(self, x):
s = x.size()
x = x.view(s[0], -1)
- x = (x - self.b)/self.a
+ x = (x - self.b) / self.a
x = x.view(s)
return x
-
+
+
class LpLoss(object):
def __init__(self, d=2, p=2, size_average=True, reduction=True):
super(LpLoss, self).__init__()
- #Dimension and Lp-norm type are postive
+ # Dimension and Lp-norm type are postive
assert d > 0 and p > 0
self.d = d
@@ -365,7 +433,9 @@ def abs(self, x, y):
# Assume uniform mesh
h = 1.0 / (x.size()[1] - 1.0)
- all_norms = (h**(self.d/self.p))*torch.norm(x.view(num_examples,-1) - y.view(num_examples,-1), self.p, 1)
+ all_norms = (h ** (self.d / self.p)) * torch.norm(
+ x.view(num_examples, -1) - y.view(num_examples, -1), self.p, 1
+ )
if self.reduction:
if self.size_average:
@@ -378,16 +448,18 @@ def abs(self, x, y):
def rel(self, x, y):
num_examples = x.size()[0]
- diff_norms = torch.norm(x.reshape(num_examples,-1) - y.reshape(num_examples,-1), self.p, 1)
- y_norms = torch.norm(y.reshape(num_examples,-1), self.p, 1)
+ diff_norms = torch.norm(
+ x.reshape(num_examples, -1) - y.reshape(num_examples, -1), self.p, 1
+ )
+ y_norms = torch.norm(y.reshape(num_examples, -1), self.p, 1)
if self.reduction:
if self.size_average:
- return torch.mean(diff_norms/y_norms)
+ return torch.mean(diff_norms / y_norms)
else:
- return torch.sum(diff_norms/y_norms)
+ return torch.sum(diff_norms / y_norms)
- return diff_norms/y_norms
+ return diff_norms / y_norms
def __call__(self, x, y):
- return self.rel(x, y)
\ No newline at end of file
+ return self.rel(x, y)
diff --git a/benchmark/metalearned/common/evaluator.py b/benchmark/metalearned/common/evaluator.py
index 6ed209e..bb092b0 100644
--- a/benchmark/metalearned/common/evaluator.py
+++ b/benchmark/metalearned/common/evaluator.py
@@ -1,10 +1,13 @@
from dataclasses import dataclass
+
from common.timeseries import TimeseriesBundle
+
@dataclass
class Evaluator:
test_set: TimeseriesBundle
+
@dataclass
class EvaluationResult:
- test_set: TimeseriesBundle
\ No newline at end of file
+ test_set: TimeseriesBundle
diff --git a/benchmark/metalearned/common/experiment.py b/benchmark/metalearned/common/experiment.py
index 21c28ff..80235d7 100644
--- a/benchmark/metalearned/common/experiment.py
+++ b/benchmark/metalearned/common/experiment.py
@@ -12,10 +12,12 @@
command_file_name = 'experiment.cmd'
-def create_experiment(experiment_path: str,
- parameters: Dict,
- command: Callable[[str, Dict], str],
- callback: Callable[[str, Dict], None] = lambda path, params: None) -> None:
+def create_experiment(
+ experiment_path: str,
+ parameters: Dict,
+ command: Callable[[str, Dict], str],
+ callback: Callable[[str, Dict], None] = lambda path, params: None,
+) -> None:
"""
Create experiment.
If parameters contain keys with multiple values, then multiple sub-experiments will be created.
@@ -40,8 +42,13 @@ def create_experiment(experiment_path: str,
logging.info('Generating experiments ...')
for variables_instance in tqdm(product(*experiment_variables)):
sub_experiment_name = ','.join(
- ['%s=%.4g' % (name, value) if isinstance(value, float) else '%s=%s' % (name, str(value).replace(' ', '_'))
- for name, value in dict(variables_instance).items()])
+ [
+ '%s=%.4g' % (name, value)
+ if isinstance(value, float)
+ else '%s=%s' % (name, str(value).replace(' ', '_'))
+ for name, value in dict(variables_instance).items()
+ ]
+ )
sub_experiment_path = os.path.join(experiment_path, sub_experiment_name)
Path(sub_experiment_path).mkdir(parents=True, exist_ok=False)
@@ -51,7 +58,9 @@ def create_experiment(experiment_path: str,
# write command file
with open(os.path.join(sub_experiment_path, command_file_name), 'w') as f:
f.write(command(sub_experiment_path, dict(variables_instance)))
- callback(sub_experiment_path, dict(**{**parameters, **dict(variables_instance)}))
+ callback(
+ sub_experiment_path, dict(**{**parameters, **dict(variables_instance)})
+ )
def load_experiment_parameters(experiment_path: str) -> Dict:
diff --git a/benchmark/metalearned/common/metrics.py b/benchmark/metalearned/common/metrics.py
index 93402b5..c6e7d37 100644
--- a/benchmark/metalearned/common/metrics.py
+++ b/benchmark/metalearned/common/metrics.py
@@ -4,7 +4,9 @@
Target = np.ndarray
-def mase(forecast: Forecast, insample: np.ndarray, outsample: Target, frequency: int) -> np.ndarray:
+def mase(
+ forecast: Forecast, insample: np.ndarray, outsample: Target, frequency: int
+) -> np.ndarray:
"""
Calculate MASE of each point for each timeseries.
https://en.wikipedia.org/wiki/Mean_absolute_scaled_error
@@ -15,7 +17,9 @@ def mase(forecast: Forecast, insample: np.ndarray, outsample: Target, frequency:
:param frequency:
:return:
"""
- return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
+ return np.mean(np.abs(forecast - outsample)) / np.mean(
+ np.abs(insample[:-frequency] - insample[frequency:])
+ )
def nd(forecast: Forecast, target: Target) -> float:
@@ -37,7 +41,9 @@ def nrmse(forecast: Forecast, target: Target) -> float:
:param target:
:return:
"""
- return np.sqrt(np.mean(np.power((forecast - target), 2))) / (np.mean(np.abs(target)))
+ return np.sqrt(np.mean(np.power((forecast - target), 2))) / (
+ np.mean(np.abs(target))
+ )
def mape(forecast: Forecast, target: Target) -> np.ndarray:
@@ -77,30 +83,29 @@ def smape_2(forecast: Forecast, target: Target) -> np.ndarray:
:return: Same shape array with sMAPE calculated for each time step of each timeseries.
"""
denom = np.abs(target) + np.abs(forecast)
- denom[denom == 0.0] = 1.0 # divide by 1.0 instead of 0.0, in case when denom is zero the enum will be 0.0 anyways.
+ denom[
+ denom == 0.0
+ ] = 1.0 # divide by 1.0 instead of 0.0, in case when denom is zero the enum will be 0.0 anyways.
return 200 * np.abs(forecast - target) / denom
-
-
import tensorflow as tf
from keras import backend
def smape(y_true, y_pred):
- """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
- `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)`
- Args:
- y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
- y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
- Returns:
- Symmetric mean absolute percentage error values. shape = `[batch_size, d0, ..
- dN-1]`.
- """
+ """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
+ `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)`
+ Args:
+ y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+ y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+ Returns:
+ Symmetric mean absolute percentage error values. shape = `[batch_size, d0, ..
+ dN-1]`.
+ """
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
diff = tf.abs(
- (y_true - y_pred) /
- backend.maximum(y_true + y_pred, backend.epsilon())
+ (y_true - y_pred) / backend.maximum(y_true + y_pred, backend.epsilon())
)
return 200.0 * backend.mean(diff, axis=-1)
diff --git a/benchmark/metalearned/common/samplers.py b/benchmark/metalearned/common/samplers.py
index ebb2306..498a62f 100644
--- a/benchmark/metalearned/common/samplers.py
+++ b/benchmark/metalearned/common/samplers.py
@@ -2,12 +2,14 @@
class UnivariateTimeseriesSampler:
- def __init__(self,
- timeseries: np.ndarray,
- insample_size: int,
- outsample_size: int,
- window_sampling_limit: int,
- batch_size: int):
+ def __init__(
+ self,
+ timeseries: np.ndarray,
+ insample_size: int,
+ outsample_size: int,
+ window_sampling_limit: int,
+ batch_size: int,
+ ):
self.timeseries = [ts for ts in timeseries]
self.window_sampling_limit = window_sampling_limit
self.batch_size = batch_size
@@ -20,20 +22,29 @@ def __iter__(self):
insample_mask = np.zeros((self.batch_size, self.insample_size))
outsample = np.zeros((self.batch_size, self.outsample_size))
outsample_mask = np.zeros((self.batch_size, self.outsample_size))
- sampled_ts_indices = np.random.randint(len(self.timeseries), size=self.batch_size)
+ sampled_ts_indices = np.random.randint(
+ len(self.timeseries), size=self.batch_size
+ )
for i, sampled_index in enumerate(sampled_ts_indices):
sampled_timeseries = self.timeseries[sampled_index]
- cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit),
- high=len(sampled_timeseries),
- size=1)[0]
+ cut_point = np.random.randint(
+ low=max(1, len(sampled_timeseries) - self.window_sampling_limit),
+ high=len(sampled_timeseries),
+ size=1,
+ )[0]
- insample_window = sampled_timeseries[max(0, cut_point - self.insample_size):cut_point]
- insample[i, -len(insample_window):] = insample_window
- insample_mask[i, -len(insample_window):] = 1.0
+ insample_window = sampled_timeseries[
+ max(0, cut_point - self.insample_size) : cut_point
+ ]
+ insample[i, -len(insample_window) :] = insample_window
+ insample_mask[i, -len(insample_window) :] = 1.0
outsample_window = sampled_timeseries[
- cut_point:min(len(sampled_timeseries), cut_point + self.outsample_size)]
- outsample[i, :len(outsample_window)] = outsample_window
- outsample_mask[i, :len(outsample_window)] = 1.0
+ cut_point : min(
+ len(sampled_timeseries), cut_point + self.outsample_size
+ )
+ ]
+ outsample[i, : len(outsample_window)] = outsample_window
+ outsample_mask[i, : len(outsample_window)] = 1.0
yield insample, insample_mask, outsample, outsample_mask
def sequential_latest_insamples(self):
@@ -41,7 +52,7 @@ def sequential_latest_insamples(self):
insample = np.zeros((batch_size, self.insample_size))
insample_mask = np.zeros((batch_size, self.insample_size))
for i, ts in enumerate(self.timeseries):
- ts_last_window = ts[-self.insample_size:]
- insample[i, -len(ts):] = ts_last_window
- insample_mask[i, -len(ts):] = 1.0
+ ts_last_window = ts[-self.insample_size :]
+ insample[i, -len(ts) :] = ts_last_window
+ insample_mask[i, -len(ts) :] = 1.0
return insample, insample_mask
diff --git a/benchmark/metalearned/common/summary_utils.py b/benchmark/metalearned/common/summary_utils.py
index 704f16b..a784ad3 100644
--- a/benchmark/metalearned/common/summary_utils.py
+++ b/benchmark/metalearned/common/summary_utils.py
@@ -42,40 +42,69 @@ def __init__(self, filter_path: str, evaluator):
self.groups[parameter_key] = {}
if parameter_value not in self.groups[parameter_key]:
self.groups[parameter_key][parameter_value] = []
- self.groups[parameter_key][parameter_value].append(len(self.predictions) - 1)
+ self.groups[parameter_key][parameter_value].append(
+ len(self.predictions) - 1
+ )
self.group_names = ', '.join(self.groups.keys())
logging.debug(f'Loaded {len(self.predictions)} predictions')
logging.debug(f'Parameters: {self.group_names}')
- def bootstrap(self,
- ensemble_keys: List[str],
- bootstrap_key: str,
- bootstrap_size: int,
- number_of_samples: int):
+ def bootstrap(
+ self,
+ ensemble_keys: List[str],
+ bootstrap_key: str,
+ bootstrap_size: int,
+ number_of_samples: int,
+ ):
group_keys = self.groups.keys() - set(ensemble_keys)
- group_values = list(itertools.product(*map(lambda g: self.groups[g].keys(), group_keys)))
+ group_values = list(
+ itertools.product(*map(lambda g: self.groups[g].keys(), group_keys))
+ )
results = []
for group_instance in tqdm(group_values):
- group_ids = [set(self.groups[group_key][group_value]) for group_key, group_value in
- list(zip(group_keys, group_instance))]
+ group_ids = [
+ set(self.groups[group_key][group_value])
+ for group_key, group_value in list(zip(group_keys, group_instance))
+ ]
group_filter = set.intersection(*group_ids) if len(group_ids) > 0 else None
- if group_instance != () and (group_filter is None or len(group_filter) == 0):
+ if group_instance != () and (
+ group_filter is None or len(group_filter) == 0
+ ):
continue
for _ in range(number_of_samples):
sampled_ids = set(
- itertools.chain(*random.sample(list(self.groups[bootstrap_key].values()), k=bootstrap_size)))
- ensemble_ids = sampled_ids.intersection(group_filter) if group_filter is not None else sampled_ids
+ itertools.chain(
+ *random.sample(
+ list(self.groups[bootstrap_key].values()), k=bootstrap_size
+ )
+ )
+ )
+ ensemble_ids = (
+ sampled_ids.intersection(group_filter)
+ if group_filter is not None
+ else sampled_ids
+ )
if ensemble_ids is None or len(ensemble_ids) == 0:
continue
- ensemble_predictions = pd.concat([self.predictions[i]
- for i in ensemble_ids],
- sort=False).groupby(level='id', sort=False).median()
+ ensemble_predictions = (
+ pd.concat([self.predictions[i] for i in ensemble_ids], sort=False)
+ .groupby(level='id', sort=False)
+ .median()
+ )
group_columns = dict(zip(group_keys, group_instance))
- evaluation_results = self.evaluator.evaluate(ensemble_predictions.values)
+ evaluation_results = self.evaluator.evaluate(
+ ensemble_predictions.values
+ )
for evaluation_key, evaluation_value in evaluation_results.items():
- results.append(pd.DataFrame({
- 'metric': evaluation_value,
- 'evaluation_key': evaluation_key,
- **group_columns}, index=[0]))
+ results.append(
+ pd.DataFrame(
+ {
+ 'metric': evaluation_value,
+ 'evaluation_key': evaluation_key,
+ **group_columns,
+ },
+ index=[0],
+ )
+ )
return pd.concat(results, sort=False).reset_index()
diff --git a/benchmark/metalearned/common/timeseries.py b/benchmark/metalearned/common/timeseries.py
index 33428e4..e65c907 100644
--- a/benchmark/metalearned/common/timeseries.py
+++ b/benchmark/metalearned/common/timeseries.py
@@ -5,9 +5,9 @@
from pathlib import Path
from typing import Any, Callable, Dict, List, Tuple
+import dill
import numpy as np
from dateutil.relativedelta import relativedelta
-import dill
class TimeUnit(ABC):
@@ -98,25 +98,33 @@ class Timeseries:
meta: Dict[str, Any]
def copy(self, start_date: datetime, values: np.ndarray) -> 'Timeseries':
- return Timeseries(id=self.id,
- start_date=start_date,
- time_unit=self.time_unit,
- frequency=self.frequency,
- period=self.period,
- values=values,
- meta=self.meta)
+ return Timeseries(
+ id=self.id,
+ start_date=start_date,
+ time_unit=self.time_unit,
+ frequency=self.frequency,
+ period=self.period,
+ values=values,
+ meta=self.meta,
+ )
def future_values(self, values: np.ndarray) -> 'Timeseries':
- return self.copy(start_date=self.time_unit.add(self.start_date, len(self.values)), values=values)
+ return self.copy(
+ start_date=self.time_unit.add(self.start_date, len(self.values)),
+ values=values,
+ )
def split(self, n: int) -> TimeseriesSplit:
time_shift = n if n >= 0 else len(self.values) + n
split_time = self.time_unit.add(self.start_date, time_shift * self.frequency)
- return self.copy(start_date=self.start_date, values=self.values[:n]), self.copy(start_date=split_time,
- values=self.values[n:])
+ return self.copy(start_date=self.start_date, values=self.values[:n]), self.copy(
+ start_date=split_time, values=self.values[n:]
+ )
def split_by_time(self, split_date: datetime) -> TimeseriesSplit:
- points_to_include = int(self.time_unit.delta(split_date, self.start_date) // self.frequency)
+ points_to_include = int(
+ self.time_unit.delta(split_date, self.start_date) // self.frequency
+ )
if points_to_include < 0:
before = self.copy(split_date, np.empty(0))
on_and_after = self
@@ -135,8 +143,12 @@ def values(self) -> List[np.ndarray]:
def time_stamps(self) -> List[np.ndarray]:
def _make_time_stamps(ts):
- return np.array([ts.time_unit.add(ts.start_date, ts.frequency*i)
- for i in range(len(ts.values))])
+ return np.array(
+ [
+ ts.time_unit.add(ts.start_date, ts.frequency * i)
+ for i in range(len(ts.values))
+ ]
+ )
return list(map(_make_time_stamps, self.timeseries))
@@ -152,7 +164,9 @@ def filter(self, f: Callable[[Timeseries], bool]) -> 'TimeseriesBundle':
def map(self, f: Callable[[Timeseries], Timeseries]) -> 'TimeseriesBundle':
return TimeseriesBundle(list(map(f, self.timeseries)))
- def split(self, f: Callable[[Timeseries], TimeseriesSplit]) -> Tuple['TimeseriesBundle', 'TimeseriesBundle']:
+ def split(
+ self, f: Callable[[Timeseries], TimeseriesSplit]
+ ) -> Tuple['TimeseriesBundle', 'TimeseriesBundle']:
bucket_1 = []
bucket_2 = []
for timeseries in self.timeseries:
@@ -161,14 +175,20 @@ def split(self, f: Callable[[Timeseries], TimeseriesSplit]) -> Tuple['Timeseries
bucket_2.append(part_2)
return TimeseriesBundle(bucket_1), TimeseriesBundle(bucket_2)
- def intersection_by_id(self, bundle: 'TimeseriesBundle') -> Tuple['TimeseriesBundle', 'TimeseriesBundle']:
+ def intersection_by_id(
+ self, bundle: 'TimeseriesBundle'
+ ) -> Tuple['TimeseriesBundle', 'TimeseriesBundle']:
bundle_ids = bundle.ids()
ids = [ts_id for ts_id in self.ids() if ts_id in bundle_ids]
- return self.filter(lambda ts: ts.id in ids), bundle.filter(lambda ts: ts.id in ids)
+ return self.filter(lambda ts: ts.id in ids), bundle.filter(
+ lambda ts: ts.id in ids
+ )
def future_values(self, values: np.array) -> 'TimeseriesBundle':
assert len(values) == len(self.timeseries)
- return TimeseriesBundle([ts.future_values(values[i]) for i, ts in enumerate(self.timeseries)])
+ return TimeseriesBundle(
+ [ts.future_values(values[i]) for i, ts in enumerate(self.timeseries)]
+ )
class TimeseriesLoader(ABC):
@@ -192,4 +212,3 @@ def download(self) -> TimeseriesBundle:
:return: Training and test splits.
"""
pass
-
diff --git a/benchmark/metalearned/common/torch_utils.py b/benchmark/metalearned/common/torch_utils.py
index cdaf534..1ed055e 100644
--- a/benchmark/metalearned/common/torch_utils.py
+++ b/benchmark/metalearned/common/torch_utils.py
@@ -22,8 +22,8 @@ def to_device(module: t.nn.Module, use_cuda: bool = True):
def div_no_nan(a, b):
result = a / b
- result[result != result] = .0
- result[result == np.inf] = .0
+ result[result != result] = 0.0
+ result[result == np.inf] = 0.0
return result
@@ -33,11 +33,16 @@ def mape_loss(forecast, target, mask):
def smape_1_loss(forecast, target, mask):
- return 200 * t.mean(div_no_nan(t.abs(forecast - target), forecast.data + target.data) * mask)
+ return 200 * t.mean(
+ div_no_nan(t.abs(forecast - target), forecast.data + target.data) * mask
+ )
def smape_2_loss(forecast, target, mask):
- return 200 * t.mean(div_no_nan(t.abs(forecast - target), t.abs(forecast.data) + t.abs(target.data)) * mask)
+ return 200 * t.mean(
+ div_no_nan(t.abs(forecast - target), t.abs(forecast.data) + t.abs(target.data))
+ * mask
+ )
def mase_loss(insample, freq, forecast, target, mask):
@@ -47,7 +52,9 @@ def mase_loss(insample, freq, forecast, target, mask):
class SnapshotManager:
- def __init__(self, snapshot_dir: str, logging_frequency: int, snapshot_frequency: int):
+ def __init__(
+ self, snapshot_dir: str, logging_frequency: int, snapshot_frequency: int
+ ):
self.model_snapshot_file = os.path.join(snapshot_dir, 'model')
self.optimizer_snapshot_file = os.path.join(snapshot_dir, 'optimizer')
self.losses_file = os.path.join(snapshot_dir, 'losses')
@@ -59,16 +66,26 @@ def __init__(self, snapshot_dir: str, logging_frequency: int, snapshot_frequency
self.losses = {'training': {}, 'validation': {}}
self.time_track = {}
- def restore(self, model: Optional[t.nn.Module], optimizer: Optional[t.optim.Optimizer]) -> int:
+ def restore(
+ self, model: Optional[t.nn.Module], optimizer: Optional[t.optim.Optimizer]
+ ) -> int:
if model is not None and os.path.isfile(self.model_snapshot_file):
model.load_state_dict(t.load(self.model_snapshot_file))
if optimizer is not None and os.path.isfile(self.optimizer_snapshot_file):
optimizer.load_state_dict(t.load(self.optimizer_snapshot_file))
- iteration = t.load(self.iteration_file)['iteration'] if os.path.isfile(self.iteration_file) else 0
+ iteration = (
+ t.load(self.iteration_file)['iteration']
+ if os.path.isfile(self.iteration_file)
+ else 0
+ )
if os.path.isfile(self.losses_file):
losses = t.load(self.losses_file)
- training_losses = {k: v for k, v in losses['training'].items() if k <= iteration}
- validation_losses = {k: v for k, v in losses['validation'].items() if k <= iteration}
+ training_losses = {
+ k: v for k, v in losses['training'].items() if k <= iteration
+ }
+ validation_losses = {
+ k: v for k, v in losses['validation'].items() if k <= iteration
+ }
# when restoring remove losses which were after the last snapshot
self.losses = {'training': training_losses, 'validation': validation_losses}
self.snapshot(self.losses_file, self.losses)
@@ -86,12 +103,14 @@ def load_training_losses(self) -> pd.DataFrame:
def enable_time_tracking(self):
self.start_time = time.time()
- def register(self,
- iteration: int,
- training_loss: float,
- validation_loss: float,
- model: t.nn.Module,
- optimizer: Optional[t.optim.Optimizer]) -> None:
+ def register(
+ self,
+ iteration: int,
+ training_loss: float,
+ validation_loss: float,
+ model: t.nn.Module,
+ optimizer: Optional[t.optim.Optimizer],
+ ) -> None:
if iteration == 1 or iteration % self.logging_frequency == 0:
self.losses['training'][iteration] = training_loss
self.losses['validation'][iteration] = validation_loss
diff --git a/benchmark/metalearned/common/utils.py b/benchmark/metalearned/common/utils.py
index c456c46..2da7bdf 100644
--- a/benchmark/metalearned/common/utils.py
+++ b/benchmark/metalearned/common/utils.py
@@ -4,15 +4,15 @@
import pathlib
import sys
import urllib
-from decimal import Decimal, ROUND_HALF_UP
+from decimal import ROUND_HALF_UP, Decimal
from glob import glob
from itertools import dropwhile, takewhile
+from math import pow
from typing import Any, Callable, List
from urllib import request
import numpy as np
import pandas as pd
-from math import pow
from tqdm import tqdm
@@ -24,17 +24,32 @@ def get_module_path():
def round_half_up(n, precision):
- return int(Decimal(n * pow(10, precision)).to_integral_value(rounding=ROUND_HALF_UP)) / pow(10, precision)
-
-
-def median_ensemble(experiment_path: str,
- summary_filter: str = '**',
- forecast_file: str = 'forecast.csv',
- group_by: str = 'id'):
- return pd.concat([pd.read_csv(file)
- for file in
- tqdm(glob(os.path.join(experiment_path, summary_filter, forecast_file)))], sort=False) \
- .set_index(group_by).groupby(level=group_by, sort=False).median().values
+ return int(
+ Decimal(n * pow(10, precision)).to_integral_value(rounding=ROUND_HALF_UP)
+ ) / pow(10, precision)
+
+
+def median_ensemble(
+ experiment_path: str,
+ summary_filter: str = '**',
+ forecast_file: str = 'forecast.csv',
+ group_by: str = 'id',
+):
+ return (
+ pd.concat(
+ [
+ pd.read_csv(file)
+ for file in tqdm(
+ glob(os.path.join(experiment_path, summary_filter, forecast_file))
+ )
+ ],
+ sort=False,
+ )
+ .set_index(group_by)
+ .groupby(level=group_by, sort=False)
+ .median()
+ .values
+ )
def group_values(values: np.ndarray, groups: np.ndarray, group_name: str):
@@ -50,8 +65,11 @@ def download_url(url: str, file_path: str) -> None:
"""
def progress(count, block_size, total_size):
- sys.stdout.write('\rDownloading {} from {} {:.1f}%'.format(file_path, url, float(count * block_size) / float(
- total_size) * 100.0))
+ sys.stdout.write(
+ '\rDownloading {} from {} {:.1f}%'.format(
+ file_path, url, float(count * block_size) / float(total_size) * 100.0
+ )
+ )
sys.stdout.flush()
if not os.path.isfile(file_path):
@@ -63,7 +81,9 @@ def progress(count, block_size, total_size):
sys.stdout.write('\n')
sys.stdout.flush()
file_info = os.stat(f)
- logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.')
+ logging.info(
+ f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.'
+ )
else:
file_info = os.stat(file_path)
logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.')
@@ -107,5 +127,8 @@ def ordered_insert(ordered_stack: List, value, f: Callable[[Any, Any], bool]):
(and truncated if necessary).
:return: New instance of stack with inserted element.
"""
- return (list(takewhile(lambda x: f(x, value), ordered_stack)) + [value] +
- list(dropwhile(lambda x: f(x, value), ordered_stack)))[:len(ordered_stack)]
+ return (
+ list(takewhile(lambda x: f(x, value), ordered_stack))
+ + [value]
+ + list(dropwhile(lambda x: f(x, value), ordered_stack))
+ )[: len(ordered_stack)]
diff --git a/benchmark/metalearned/dataset.py b/benchmark/metalearned/dataset.py
index 6b41842..d3aad4c 100644
--- a/benchmark/metalearned/dataset.py
+++ b/benchmark/metalearned/dataset.py
@@ -7,11 +7,18 @@
import numpy as np
import pandas as pd
import patoolib
-from tqdm import tqdm
-
from common.settings import RESOURCES_DIR
-from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month, Day, Hour
+from common.timeseries import (
+ Day,
+ Hour,
+ Month,
+ Timeseries,
+ TimeseriesBundle,
+ TimeseriesLoader,
+ Year,
+)
from common.utils import download_url
+from tqdm import tqdm
@dataclass(frozen=True)
@@ -31,29 +38,37 @@ def period_map(self):
class M4Dataset(TimeseriesLoader):
def download(self) -> TimeseriesBundle:
url_template = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/{}/{}-{}.csv'
- m4_info_url = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv'
+ m4_info_url = (
+ 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv'
+ )
m4_info_path = os.path.join(self.path, 'M4info.csv')
ssl._create_default_https_context = ssl._create_unverified_context
download_url(m4_info_url, m4_info_path)
for sp in M4Meta.seasonal_patterns:
- training_url = url_template.format("Train", sp, "train")
- download_url(training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv'))
- test_url = url_template.format("Test", sp, "test")
+ training_url = url_template.format('Train', sp, 'train')
+ download_url(
+ training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')
+ )
+ test_url = url_template.format('Test', sp, 'test')
download_url(test_url, os.path.join(M4Meta.dataset_path, f'{sp}-test.csv'))
# Download naive2 forecasts, needed for OWA metric
m4_naive2_archive = os.path.join(self.path, 'naive2.rar')
- download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar',
- m4_naive2_archive)
+ download_url(
+ 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar',
+ m4_naive2_archive,
+ )
patoolib.extract_archive(m4_naive2_archive, outdir=self.path)
os.remove(m4_naive2_archive)
# Download m4 competition winner predictions, for summary testing purposes only
m4_winner_archive = os.path.join(self.path, 'submission-118.rar')
- download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar',
- m4_winner_archive)
+ download_url(
+ 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar',
+ m4_winner_archive,
+ )
patoolib.extract_archive(m4_winner_archive, outdir=self.path)
os.remove(m4_winner_archive)
@@ -66,12 +81,14 @@ def download(self) -> TimeseriesBundle:
'Monthly': (Month(), 1),
'Weekly': (Day(), 7),
'Daily': (Day(), 1),
- 'Hourly': (Hour(), 1)
+ 'Hourly': (Hour(), 1),
}
all_timeseries = []
for sp in M4Meta.seasonal_patterns:
- training_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-train.csv'))
+ training_set = pd.read_csv(
+ os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')
+ )
test_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-test.csv'))
time_unit, frequency = time_units_mapping[sp]
@@ -89,11 +106,15 @@ def download(self) -> TimeseriesBundle:
parsed_date = None
for parsing_format in parsing_formats:
try:
- parsed_date = datetime.strptime(timeseries_info.StartingDate, parsing_format)
+ parsed_date = datetime.strptime(
+ timeseries_info.StartingDate, parsing_format
+ )
except Exception:
continue
if parsed_date is None:
- raise ValueError(f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}')
+ raise ValueError(
+ f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}'
+ )
# all M4 years are in the 1900s or 1800s
if parsed_date.year > 2000:
parsed_date = parsed_date.replace(year=parsed_date.year - 100)
@@ -101,14 +122,15 @@ def download(self) -> TimeseriesBundle:
if parsed_date.year > 2000:
print('over')
- timeseries = Timeseries(id=timeseries_id,
- start_date=parsed_date,
- time_unit=time_unit,
- frequency=frequency,
- period=int(timeseries_info.Frequency),
- values=np.concatenate([training_values, test_values]),
- meta={'seasonal_pattern': sp}
- )
+ timeseries = Timeseries(
+ id=timeseries_id,
+ start_date=parsed_date,
+ time_unit=time_unit,
+ frequency=frequency,
+ period=int(timeseries_info.Frequency),
+ values=np.concatenate([training_values, test_values]),
+ meta={'seasonal_pattern': sp},
+ )
all_timeseries.append(timeseries)
return TimeseriesBundle(all_timeseries)
@@ -116,7 +138,9 @@ def download(self) -> TimeseriesBundle:
def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]:
bundle = self.load_cache()
horizons_map = M4Meta().horizons_map()
- return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]))
+ return bundle.split(
+ lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])
+ )
@staticmethod
def filter(bundle: TimeseriesBundle, seasonal_pattern: str) -> TimeseriesBundle:
diff --git a/benchmark/metalearned/experiments/tl/main.py b/benchmark/metalearned/experiments/tl/main.py
index 04173ec..1249558 100644
--- a/benchmark/metalearned/experiments/tl/main.py
+++ b/benchmark/metalearned/experiments/tl/main.py
@@ -4,18 +4,21 @@
import numpy as np
import pandas as pd
import torch as t
-from fire import Fire
-from scipy.interpolate import interp1d
-from torch import optim
-
-from common.experiment import create_experiment
-from common.experiment import load_experiment_parameters
+from common.experiment import create_experiment, load_experiment_parameters
from common.samplers import UnivariateTimeseriesSampler
from common.settings import experiment_path
from common.timeseries import TimeseriesBundle
-from common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss
+from common.torch_utils import (
+ SnapshotManager,
+ mape_loss,
+ mase_loss,
+ smape_2_loss,
+ to_device,
+ to_tensor,
+)
from common.utils import get_module_path
from experiments.tl.parameters import parameters
+from fire import Fire
from models.nbeats_torch import nbeats_generic, nbeats_interpretable
from resources.electricity.dataset import ElectricityDataset, ElectricityMeta
from resources.fred.dataset import FredDataset, FredMeta
@@ -23,19 +26,27 @@
from resources.m4.dataset import M4Dataset, M4Meta
from resources.tourism.dataset import TourismDataset, TourismMeta
from resources.traffic.dataset import TrafficDataset, TrafficMeta
+from scipy.interpolate import interp1d
+from torch import optim
module_path = get_module_path()
def init(name: str):
- create_experiment(experiment_path=experiment_path(module_path, name),
- parameters=parameters[name],
- command=lambda path, params: f'python {module_path}/main.py run --path={path}')
+ create_experiment(
+ experiment_path=experiment_path(module_path, name),
+ parameters=parameters[name],
+ command=lambda path, params: f'python {module_path}/main.py run --path={path}',
+ )
def run(path: str):
experiment_parameters = load_experiment_parameters(path)
- source_dataset_name = experiment_parameters['source_dataset'] if 'source_dataset' in experiment_parameters else 'M4'
+ source_dataset_name = (
+ experiment_parameters['source_dataset']
+ if 'source_dataset' in experiment_parameters
+ else 'M4'
+ )
loss_name = experiment_parameters['loss_name']
model_horizons = {
@@ -95,67 +106,90 @@ def run(path: str):
tl_models = {}
for model_name, horizon in model_horizons.items():
sp = model_sps[model_name]
- training_subset = source_dataset.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
+ training_subset = source_dataset.filter(
+ lambda ts: ts.meta['seasonal_pattern'] == sp
+ )
training_values = np.array(training_subset.values())
if source_dataset_name == 'FRED': # interpolate monthly data
if model_name == 'H24':
training_values = []
for values in training_subset.values():
- interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear')
- training_values.append(interpolation_fn(np.arange(0, len(values) - 0.5, 0.5)))
+ interpolation_fn = interp1d(
+ x=np.array(range(len(values))), y=values, kind='linear'
+ )
+ training_values.append(
+ interpolation_fn(np.arange(0, len(values) - 0.5, 0.5))
+ )
training_values = np.array(training_values)
elif model_name == 'H48':
training_values = []
for values in training_subset.values():
- interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear')
- training_values.append(interpolation_fn(np.arange(0, len(values) - 0.75, 0.25)))
+ interpolation_fn = interp1d(
+ x=np.array(range(len(values))), y=values, kind='linear'
+ )
+ training_values.append(
+ interpolation_fn(np.arange(0, len(values) - 0.75, 0.25))
+ )
training_values = np.array(training_values)
input_size = experiment_parameters['lookback_period'] * horizon
- training_dataset = UnivariateTimeseriesSampler(timeseries=training_values,
- insample_size=input_size,
- outsample_size=horizon,
- window_sampling_limit=int(
- experiment_parameters['history_horizons'] * horizon),
- batch_size=experiment_parameters['batch_size'])
+ training_dataset = UnivariateTimeseriesSampler(
+ timeseries=training_values,
+ insample_size=input_size,
+ outsample_size=horizon,
+ window_sampling_limit=int(
+ experiment_parameters['history_horizons'] * horizon
+ ),
+ batch_size=experiment_parameters['batch_size'],
+ )
#
# Training
#
snapshot_dir = os.path.join(path, 'snapshots', model_name)
- snapshot_manager = SnapshotManager(snapshot_dir=snapshot_dir,
- logging_frequency=experiment_parameters['logging_frequency'],
- snapshot_frequency=experiment_parameters['snapshot_frequency'])
+ snapshot_manager = SnapshotManager(
+ snapshot_dir=snapshot_dir,
+ logging_frequency=experiment_parameters['logging_frequency'],
+ snapshot_frequency=experiment_parameters['snapshot_frequency'],
+ )
if experiment_parameters['model_type'] == 'generic':
- model = nbeats_generic(input_size=input_size,
- output_size=horizon,
- blocks=experiment_parameters['blocks'],
- stacks=experiment_parameters['stacks'],
- fc_layers=experiment_parameters['layers'],
- fc_layers_size=experiment_parameters['width'],
- scaling=experiment_parameters['scaling'],
- mode=experiment_parameters['mode'])
+ model = nbeats_generic(
+ input_size=input_size,
+ output_size=horizon,
+ blocks=experiment_parameters['blocks'],
+ stacks=experiment_parameters['stacks'],
+ fc_layers=experiment_parameters['layers'],
+ fc_layers_size=experiment_parameters['width'],
+ scaling=experiment_parameters['scaling'],
+ mode=experiment_parameters['mode'],
+ )
else:
- model = nbeats_interpretable(input_size=input_size,
- output_size=horizon,
- trend_blocks=experiment_parameters['trend_blocks'],
- trend_fc_layers=experiment_parameters['layers'],
- trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'],
- degree_of_polynomial=experiment_parameters['degree_of_polynomial'],
- seasonality_blocks=experiment_parameters['seasonality_blocks'],
- seasonality_fc_layers=experiment_parameters['layers'],
- seasonality_fc_layers_size=experiment_parameters['seasonality_fc_layers_size'],
- num_of_harmonics=experiment_parameters['num_of_harmonics'],
- scaling=experiment_parameters['scaling'],
- mode=experiment_parameters['mode'])
+ model = nbeats_interpretable(
+ input_size=input_size,
+ output_size=horizon,
+ trend_blocks=experiment_parameters['trend_blocks'],
+ trend_fc_layers=experiment_parameters['layers'],
+ trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'],
+ degree_of_polynomial=experiment_parameters['degree_of_polynomial'],
+ seasonality_blocks=experiment_parameters['seasonality_blocks'],
+ seasonality_fc_layers=experiment_parameters['layers'],
+ seasonality_fc_layers_size=experiment_parameters[
+ 'seasonality_fc_layers_size'
+ ],
+ num_of_harmonics=experiment_parameters['num_of_harmonics'],
+ scaling=experiment_parameters['scaling'],
+ mode=experiment_parameters['mode'],
+ )
model = to_device(model)
- optimizer = optim.Adam(model.parameters(),
- lr=experiment_parameters['learning_rate'],
- weight_decay=0.0)
+ optimizer = optim.Adam(
+ model.parameters(),
+ lr=experiment_parameters['learning_rate'],
+ weight_decay=0.0,
+ )
lr_decay_step = experiment_parameters['iterations'] // 3
if lr_decay_step == 0:
@@ -176,7 +210,9 @@ def run(path: str):
if loss_name == 'MAPE':
training_loss = mape_loss(forecast, y, y_mask)
elif loss_name == 'MASE':
- training_loss = mase_loss(x, training_subset.timeseries[0].period, forecast, y, y_mask)
+ training_loss = mase_loss(
+ x, training_subset.timeseries[0].period, forecast, y, y_mask
+ )
elif loss_name == 'SMAPE':
training_loss = smape_2_loss(forecast, y, y_mask)
else:
@@ -190,13 +226,22 @@ def run(path: str):
optimizer.step()
for param_group in optimizer.param_groups:
- param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** (i // lr_decay_step)
-
- snapshot_manager.register(iteration=i,
- training_loss=float(training_loss),
- validation_loss=np.nan, model=model,
- optimizer=optimizer)
- tl_models[model_name] = {'p_model': model, 'p_input_size': input_size, 'p_horizon': horizon}
+ param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** (
+ i // lr_decay_step
+ )
+
+ snapshot_manager.register(
+ iteration=i,
+ training_loss=float(training_loss),
+ validation_loss=np.nan,
+ model=model,
+ optimizer=optimizer,
+ )
+ tl_models[model_name] = {
+ 'p_model': model,
+ 'p_input_size': input_size,
+ 'p_horizon': horizon,
+ }
#
# Predictions
@@ -205,113 +250,233 @@ def run(path: str):
def forecast(bundle: TimeseriesBundle, p_model, p_input_size, p_horizon):
forecasts = []
input_set = np.array(bundle.values())
- input_set = UnivariateTimeseriesSampler(timeseries=input_set,
- insample_size=p_input_size,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1)
+ input_set = UnivariateTimeseriesSampler(
+ timeseries=input_set,
+ insample_size=p_input_size,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ )
p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples())
p_model.eval()
with t.no_grad():
forecasts.extend(p_model(p_x, p_x_mask).cpu().detach().numpy())
- forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)])
+ forecasts_df = pd.DataFrame(
+ forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)]
+ )
forecasts_df.index = bundle.ids()
forecasts_df.index.name = 'id'
return forecasts_df
- def rolling_daily_forecast(base_insample: TimeseriesBundle, rolling_insample: TimeseriesBundle,
- p_model, p_input_size, p_horizon):
+ def rolling_daily_forecast(
+ base_insample: TimeseriesBundle,
+ rolling_insample: TimeseriesBundle,
+ p_model,
+ p_input_size,
+ p_horizon,
+ ):
forecasts = []
base_insample_values = np.array(base_insample.values())
rolling_insample_values = np.array(rolling_insample.values())
for window_id in range(7):
- insample = np.concatenate([base_insample_values, rolling_insample_values[:, :window_id * p_horizon]],
- axis=1)
- input_set = UnivariateTimeseriesSampler(timeseries=insample,
- insample_size=p_input_size,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1)
+ insample = np.concatenate(
+ [
+ base_insample_values,
+ rolling_insample_values[:, : window_id * p_horizon],
+ ],
+ axis=1,
+ )
+ input_set = UnivariateTimeseriesSampler(
+ timeseries=insample,
+ insample_size=p_input_size,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ )
p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples())
p_model.eval()
with t.no_grad():
window_forecast = p_model(p_x, p_x_mask).cpu().detach().numpy()
- forecasts = window_forecast if len(forecasts) == 0 else np.concatenate([forecasts, window_forecast],
- axis=1)
-
- forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)])
+ forecasts = (
+ window_forecast
+ if len(forecasts) == 0
+ else np.concatenate([forecasts, window_forecast], axis=1)
+ )
+
+ forecasts_df = pd.DataFrame(
+ forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)]
+ )
forecasts_df.index = base_insample.ids()
forecasts_df.index.name = 'id'
- forecasts_df.columns = [f'V{i}' for i in range(1, len(forecasts_df.columns) + 1)]
+ forecasts_df.columns = [
+ f'V{i}' for i in range(1, len(forecasts_df.columns) + 1)
+ ]
return forecasts_df
# M4
target_input, _ = M4Dataset(M4Meta.dataset_path).standard_split()
- yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y6'])
- quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8'])
- monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M18'])
- weekly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), **tl_models['W13'])
- daily = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), **tl_models['D14'])
- hourly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'), **tl_models['H48'])
+ yearly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'),
+ **tl_models['Y6'],
+ )
+ quarterly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'),
+ **tl_models['Q8'],
+ )
+ monthly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'),
+ **tl_models['M18'],
+ )
+ weekly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'),
+ **tl_models['W13'],
+ )
+ daily = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'),
+ **tl_models['D14'],
+ )
+ hourly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'),
+ **tl_models['H48'],
+ )
pd.concat([yearly, quarterly, monthly, weekly, daily, hourly], sort=False).to_csv(
- os.path.join(os.path.join(path, 'M4.csv')))
+ os.path.join(os.path.join(path, 'M4.csv'))
+ )
# M3
target_input, _ = M3Dataset(M3Meta.dataset_path).standard_split()
- yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'), **tl_models['Y6'])
- quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'), **tl_models['Q8'])
- monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'), **tl_models['M18'])
- others = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'), **tl_models['Q8'])
- pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(os.path.join(os.path.join(path, 'M3.csv')))
+ yearly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'),
+ **tl_models['Y6'],
+ )
+ quarterly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'),
+ **tl_models['Q8'],
+ )
+ monthly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'),
+ **tl_models['M18'],
+ )
+ others = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'),
+ **tl_models['Q8'],
+ )
+ pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(
+ os.path.join(os.path.join(path, 'M3.csv'))
+ )
# Tourism
target_input, _ = TourismDataset(TourismMeta.dataset_path).standard_split()
- yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y4'])
- quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8'])
- monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M24'])
- pd.concat([yearly, quarterly, monthly], sort=False).to_csv(os.path.join(os.path.join(path, 'tourism.csv')))
+ yearly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'),
+ **tl_models['Y4'],
+ )
+ quarterly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'),
+ **tl_models['Q8'],
+ )
+ monthly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'),
+ **tl_models['M24'],
+ )
+ pd.concat([yearly, quarterly, monthly], sort=False).to_csv(
+ os.path.join(os.path.join(path, 'tourism.csv'))
+ )
# Electricity
- target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path). \
- load_cache().split(lambda ts: ts.split(-24 * 7))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv')))
-
- target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv')))
-
- target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv')))
+ target_input, rolling_target_input = (
+ ElectricityDataset(ElectricityMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split(-24 * 7))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv')))
+
+ target_input, rolling_target_input = (
+ ElectricityDataset(ElectricityMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv')))
+
+ target_input, rolling_target_input = (
+ ElectricityDataset(ElectricityMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv')))
# Traffic
- target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache().\
- split(lambda ts: ts.split(-24 * 7))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv')))
-
- target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv')))
-
- target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv')))
+ target_input, rolling_target_input = (
+ TrafficDataset(TrafficMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split(-24 * 7))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv')))
+
+ target_input, rolling_target_input = (
+ TrafficDataset(TrafficMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv')))
+
+ target_input, rolling_target_input = (
+ TrafficDataset(TrafficMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv')))
# FRED
target_input, _ = FredDataset(FredMeta.dataset_path).standard_split()
- yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y6'])
- quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8'])
- monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M18'])
- weekly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), **tl_models['W13'])
- daily = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), **tl_models['D14'])
- pd.concat([yearly, quarterly, monthly, weekly, daily]).to_csv(os.path.join(os.path.join(path, 'fred.csv')))
+ yearly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'),
+ **tl_models['Y6'],
+ )
+ quarterly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'),
+ **tl_models['Q8'],
+ )
+ monthly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'),
+ **tl_models['M18'],
+ )
+ weekly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'),
+ **tl_models['W13'],
+ )
+ daily = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'),
+ **tl_models['D14'],
+ )
+ pd.concat([yearly, quarterly, monthly, weekly, daily]).to_csv(
+ os.path.join(os.path.join(path, 'fred.csv'))
+ )
def evaluate(name: str, summary_filter: str, validation_mode: bool = False):
diff --git a/benchmark/metalearned/experiments/tl/parameters.py b/benchmark/metalearned/experiments/tl/parameters.py
index 789b624..6e70d26 100644
--- a/benchmark/metalearned/experiments/tl/parameters.py
+++ b/benchmark/metalearned/experiments/tl/parameters.py
@@ -1,31 +1,24 @@
common = {
'repeats': list(range(10)),
-
'lookback_period': list(range(2, 8)),
'loss_name': 'MASE',
'scaling': 'max',
'iterations': 15000,
'history_horizons': 10,
-
'batch_size': 1024,
'learning_rate': 0.001,
-
'mode': 'dress',
-
'width': 512,
'layers': 4,
'blocks': 10,
'stacks': 1,
-
# interpretable
'trend_blocks': 3,
'trend_fc_layers_size': 256,
'degree_of_polynomial': 3,
-
'seasonality_blocks': 3,
'seasonality_fc_layers_size': 2048,
'num_of_harmonics': 1,
-
'logging_frequency': 500,
'snapshot_frequency': 5000,
}
@@ -39,7 +32,7 @@
'blocks': 30,
'stacks': 1,
'iterations': [5000, 15000],
- 'loss_name': ['MASE', 'MAPE', 'SMAPE']
+ 'loss_name': ['MASE', 'MAPE', 'SMAPE'],
},
'shared_grid': {
**common,
@@ -76,5 +69,5 @@
'blocks': 1,
'stacks': 30,
'mode': 'dress',
- }
+ },
}
diff --git a/benchmark/metalearned/main.py b/benchmark/metalearned/main.py
index dd45654..db0a1eb 100644
--- a/benchmark/metalearned/main.py
+++ b/benchmark/metalearned/main.py
@@ -4,18 +4,21 @@
import numpy as np
import pandas as pd
import torch as t
-from fire import Fire
-from scipy.interpolate import interp1d
-from torch import optim
-
-from common.experiment import create_experiment
-from common.experiment import load_experiment_parameters
+from common.experiment import create_experiment, load_experiment_parameters
from common.samplers import UnivariateTimeseriesSampler
from common.settings import experiment_path
from common.timeseries import TimeseriesBundle
-from common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss
+from common.torch_utils import (
+ SnapshotManager,
+ mape_loss,
+ mase_loss,
+ smape_2_loss,
+ to_device,
+ to_tensor,
+)
from common.utils import get_module_path
from experiments.tl.parameters import parameters
+from fire import Fire
from models.nbeats_torch import nbeats_generic, nbeats_interpretable
from resources.electricity.dataset import ElectricityDataset, ElectricityMeta
from resources.fred.dataset import FredDataset, FredMeta
@@ -23,19 +26,27 @@
from resources.m4.dataset import M4Dataset, M4Meta
from resources.tourism.dataset import TourismDataset, TourismMeta
from resources.traffic.dataset import TrafficDataset, TrafficMeta
+from scipy.interpolate import interp1d
+from torch import optim
module_path = get_module_path()
def init(name: str):
- create_experiment(experiment_path=experiment_path(module_path, name),
- parameters=parameters[name],
- command=lambda path, params: f'python {module_path}/main.py run --path={path}')
+ create_experiment(
+ experiment_path=experiment_path(module_path, name),
+ parameters=parameters[name],
+ command=lambda path, params: f'python {module_path}/main.py run --path={path}',
+ )
def run(path: str):
experiment_parameters = load_experiment_parameters(path)
- source_dataset_name = experiment_parameters['source_dataset'] if 'source_dataset' in experiment_parameters else 'M4'
+ source_dataset_name = (
+ experiment_parameters['source_dataset']
+ if 'source_dataset' in experiment_parameters
+ else 'M4'
+ )
loss_name = experiment_parameters['loss_name']
model_horizons = {
@@ -95,67 +106,90 @@ def run(path: str):
tl_models = {}
for model_name, horizon in model_horizons.items():
sp = model_sps[model_name]
- training_subset = source_dataset.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
+ training_subset = source_dataset.filter(
+ lambda ts: ts.meta['seasonal_pattern'] == sp
+ )
training_values = np.array(training_subset.values())
if source_dataset_name == 'FRED': # interpolate monthly data
if model_name == 'H24':
training_values = []
for values in training_subset.values():
- interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear')
- training_values.append(interpolation_fn(np.arange(0, len(values) - 0.5, 0.5)))
+ interpolation_fn = interp1d(
+ x=np.array(range(len(values))), y=values, kind='linear'
+ )
+ training_values.append(
+ interpolation_fn(np.arange(0, len(values) - 0.5, 0.5))
+ )
training_values = np.array(training_values)
elif model_name == 'H48':
training_values = []
for values in training_subset.values():
- interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear')
- training_values.append(interpolation_fn(np.arange(0, len(values) - 0.75, 0.25)))
+ interpolation_fn = interp1d(
+ x=np.array(range(len(values))), y=values, kind='linear'
+ )
+ training_values.append(
+ interpolation_fn(np.arange(0, len(values) - 0.75, 0.25))
+ )
training_values = np.array(training_values)
input_size = experiment_parameters['lookback_period'] * horizon
- training_dataset = UnivariateTimeseriesSampler(timeseries=training_values,
- insample_size=input_size,
- outsample_size=horizon,
- window_sampling_limit=int(
- experiment_parameters['history_horizons'] * horizon),
- batch_size=experiment_parameters['batch_size'])
+ training_dataset = UnivariateTimeseriesSampler(
+ timeseries=training_values,
+ insample_size=input_size,
+ outsample_size=horizon,
+ window_sampling_limit=int(
+ experiment_parameters['history_horizons'] * horizon
+ ),
+ batch_size=experiment_parameters['batch_size'],
+ )
#
# Training
#
snapshot_dir = os.path.join(path, 'snapshots', model_name)
- snapshot_manager = SnapshotManager(snapshot_dir=snapshot_dir,
- logging_frequency=experiment_parameters['logging_frequency'],
- snapshot_frequency=experiment_parameters['snapshot_frequency'])
+ snapshot_manager = SnapshotManager(
+ snapshot_dir=snapshot_dir,
+ logging_frequency=experiment_parameters['logging_frequency'],
+ snapshot_frequency=experiment_parameters['snapshot_frequency'],
+ )
if experiment_parameters['model_type'] == 'generic':
- model = nbeats_generic(input_size=input_size,
- output_size=horizon,
- blocks=experiment_parameters['blocks'],
- stacks=experiment_parameters['stacks'],
- fc_layers=experiment_parameters['layers'],
- fc_layers_size=experiment_parameters['width'],
- scaling=experiment_parameters['scaling'],
- mode=experiment_parameters['mode'])
+ model = nbeats_generic(
+ input_size=input_size,
+ output_size=horizon,
+ blocks=experiment_parameters['blocks'],
+ stacks=experiment_parameters['stacks'],
+ fc_layers=experiment_parameters['layers'],
+ fc_layers_size=experiment_parameters['width'],
+ scaling=experiment_parameters['scaling'],
+ mode=experiment_parameters['mode'],
+ )
else:
- model = nbeats_interpretable(input_size=input_size,
- output_size=horizon,
- trend_blocks=experiment_parameters['trend_blocks'],
- trend_fc_layers=experiment_parameters['layers'],
- trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'],
- degree_of_polynomial=experiment_parameters['degree_of_polynomial'],
- seasonality_blocks=experiment_parameters['seasonality_blocks'],
- seasonality_fc_layers=experiment_parameters['layers'],
- seasonality_fc_layers_size=experiment_parameters['seasonality_fc_layers_size'],
- num_of_harmonics=experiment_parameters['num_of_harmonics'],
- scaling=experiment_parameters['scaling'],
- mode=experiment_parameters['mode'])
+ model = nbeats_interpretable(
+ input_size=input_size,
+ output_size=horizon,
+ trend_blocks=experiment_parameters['trend_blocks'],
+ trend_fc_layers=experiment_parameters['layers'],
+ trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'],
+ degree_of_polynomial=experiment_parameters['degree_of_polynomial'],
+ seasonality_blocks=experiment_parameters['seasonality_blocks'],
+ seasonality_fc_layers=experiment_parameters['layers'],
+ seasonality_fc_layers_size=experiment_parameters[
+ 'seasonality_fc_layers_size'
+ ],
+ num_of_harmonics=experiment_parameters['num_of_harmonics'],
+ scaling=experiment_parameters['scaling'],
+ mode=experiment_parameters['mode'],
+ )
model = to_device(model)
- optimizer = optim.Adam(model.parameters(),
- lr=experiment_parameters['learning_rate'],
- weight_decay=0.0)
+ optimizer = optim.Adam(
+ model.parameters(),
+ lr=experiment_parameters['learning_rate'],
+ weight_decay=0.0,
+ )
lr_decay_step = experiment_parameters['iterations'] // 3
if lr_decay_step == 0:
@@ -176,7 +210,9 @@ def run(path: str):
if loss_name == 'MAPE':
training_loss = mape_loss(forecast, y, y_mask)
elif loss_name == 'MASE':
- training_loss = mase_loss(x, training_subset.timeseries[0].period, forecast, y, y_mask)
+ training_loss = mase_loss(
+ x, training_subset.timeseries[0].period, forecast, y, y_mask
+ )
elif loss_name == 'SMAPE':
training_loss = smape_2_loss(forecast, y, y_mask)
else:
@@ -190,13 +226,22 @@ def run(path: str):
optimizer.step()
for param_group in optimizer.param_groups:
- param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** (i // lr_decay_step)
-
- snapshot_manager.register(iteration=i,
- training_loss=float(training_loss),
- validation_loss=np.nan, model=model,
- optimizer=optimizer)
- tl_models[model_name] = {'p_model': model, 'p_input_size': input_size, 'p_horizon': horizon}
+ param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** (
+ i // lr_decay_step
+ )
+
+ snapshot_manager.register(
+ iteration=i,
+ training_loss=float(training_loss),
+ validation_loss=np.nan,
+ model=model,
+ optimizer=optimizer,
+ )
+ tl_models[model_name] = {
+ 'p_model': model,
+ 'p_input_size': input_size,
+ 'p_horizon': horizon,
+ }
#
# Predictions
@@ -205,104 +250,207 @@ def run(path: str):
def forecast(bundle: TimeseriesBundle, p_model, p_input_size, p_horizon):
forecasts = []
input_set = np.array(bundle.values())
- input_set = UnivariateTimeseriesSampler(timeseries=input_set,
- insample_size=p_input_size,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1)
+ input_set = UnivariateTimeseriesSampler(
+ timeseries=input_set,
+ insample_size=p_input_size,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ )
p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples())
p_model.eval()
with t.no_grad():
forecasts.extend(p_model(p_x, p_x_mask).cpu().detach().numpy())
- forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)])
+ forecasts_df = pd.DataFrame(
+ forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)]
+ )
forecasts_df.index = bundle.ids()
forecasts_df.index.name = 'id'
return forecasts_df
- def rolling_daily_forecast(base_insample: TimeseriesBundle, rolling_insample: TimeseriesBundle,
- p_model, p_input_size, p_horizon):
+ def rolling_daily_forecast(
+ base_insample: TimeseriesBundle,
+ rolling_insample: TimeseriesBundle,
+ p_model,
+ p_input_size,
+ p_horizon,
+ ):
forecasts = []
base_insample_values = np.array(base_insample.values())
rolling_insample_values = np.array(rolling_insample.values())
for window_id in range(7):
- insample = np.concatenate([base_insample_values, rolling_insample_values[:, :window_id * p_horizon]],
- axis=1)
- input_set = UnivariateTimeseriesSampler(timeseries=insample,
- insample_size=p_input_size,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1)
+ insample = np.concatenate(
+ [
+ base_insample_values,
+ rolling_insample_values[:, : window_id * p_horizon],
+ ],
+ axis=1,
+ )
+ input_set = UnivariateTimeseriesSampler(
+ timeseries=insample,
+ insample_size=p_input_size,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ )
p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples())
p_model.eval()
with t.no_grad():
window_forecast = p_model(p_x, p_x_mask).cpu().detach().numpy()
- forecasts = window_forecast if len(forecasts) == 0 else np.concatenate([forecasts, window_forecast],
- axis=1)
-
- forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)])
+ forecasts = (
+ window_forecast
+ if len(forecasts) == 0
+ else np.concatenate([forecasts, window_forecast], axis=1)
+ )
+
+ forecasts_df = pd.DataFrame(
+ forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)]
+ )
forecasts_df.index = base_insample.ids()
forecasts_df.index.name = 'id'
- forecasts_df.columns = [f'V{i}' for i in range(1, len(forecasts_df.columns) + 1)]
+ forecasts_df.columns = [
+ f'V{i}' for i in range(1, len(forecasts_df.columns) + 1)
+ ]
return forecasts_df
# M4
target_input, _ = M4Dataset(M4Meta.dataset_path).standard_split()
- yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y6'])
- quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8'])
- monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M18'])
- weekly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), **tl_models['W13'])
- daily = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), **tl_models['D14'])
- hourly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'), **tl_models['H48'])
+ yearly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'),
+ **tl_models['Y6'],
+ )
+ quarterly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'),
+ **tl_models['Q8'],
+ )
+ monthly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'),
+ **tl_models['M18'],
+ )
+ weekly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'),
+ **tl_models['W13'],
+ )
+ daily = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'),
+ **tl_models['D14'],
+ )
+ hourly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'),
+ **tl_models['H48'],
+ )
pd.concat([yearly, quarterly, monthly, weekly, daily, hourly], sort=False).to_csv(
- os.path.join(os.path.join(path, 'M4.csv')))
+ os.path.join(os.path.join(path, 'M4.csv'))
+ )
# M3
target_input, _ = M3Dataset(M3Meta.dataset_path).standard_split()
- yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'), **tl_models['Y6'])
- quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'), **tl_models['Q8'])
- monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'), **tl_models['M18'])
- others = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'), **tl_models['Q8'])
- pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(os.path.join(os.path.join(path, 'M3.csv')))
+ yearly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'),
+ **tl_models['Y6'],
+ )
+ quarterly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'),
+ **tl_models['Q8'],
+ )
+ monthly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'),
+ **tl_models['M18'],
+ )
+ others = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'),
+ **tl_models['Q8'],
+ )
+ pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(
+ os.path.join(os.path.join(path, 'M3.csv'))
+ )
# Tourism
target_input, _ = TourismDataset(TourismMeta.dataset_path).standard_split()
- yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y4'])
- quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8'])
- monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M24'])
- pd.concat([yearly, quarterly, monthly], sort=False).to_csv(os.path.join(os.path.join(path, 'tourism.csv')))
+ yearly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'),
+ **tl_models['Y4'],
+ )
+ quarterly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'),
+ **tl_models['Q8'],
+ )
+ monthly = forecast(
+ target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'),
+ **tl_models['M24'],
+ )
+ pd.concat([yearly, quarterly, monthly], sort=False).to_csv(
+ os.path.join(os.path.join(path, 'tourism.csv'))
+ )
# Electricity
- target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path). \
- load_cache().split(lambda ts: ts.split(-24 * 7))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv')))
-
- target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv')))
-
- target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv')))
+ target_input, rolling_target_input = (
+ ElectricityDataset(ElectricityMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split(-24 * 7))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv')))
+
+ target_input, rolling_target_input = (
+ ElectricityDataset(ElectricityMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv')))
+
+ target_input, rolling_target_input = (
+ ElectricityDataset(ElectricityMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv')))
# Traffic
- target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache().\
- split(lambda ts: ts.split(-24 * 7))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv')))
-
- target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv')))
-
- target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \
- split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split))
- rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \
- to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv')))
+ target_input, rolling_target_input = (
+ TrafficDataset(TrafficMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split(-24 * 7))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv')))
+
+ target_input, rolling_target_input = (
+ TrafficDataset(TrafficMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv')))
+
+ target_input, rolling_target_input = (
+ TrafficDataset(TrafficMeta.dataset_path)
+ .load_cache()
+ .split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split))
+ )
+ rolling_daily_forecast(
+ base_insample=target_input,
+ rolling_insample=rolling_target_input,
+ **tl_models['H24'],
+ ).to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv')))
# FRED
# target_input, _ = FredDataset(FredMeta.dataset_path).standard_split()
diff --git a/benchmark/metalearned/main_ForecastPFN.py b/benchmark/metalearned/main_ForecastPFN.py
index 0bd365e..ad51509 100644
--- a/benchmark/metalearned/main_ForecastPFN.py
+++ b/benchmark/metalearned/main_ForecastPFN.py
@@ -1,42 +1,40 @@
-from tqdm import tqdm
import pathlib
import sys
+
+from tqdm import tqdm
+
sys.path.append('..')
import logging
import os
-import numpy as np
import pandas as pd
-import torch as t
import tensorflow as tf
-import tensorflow_io
-from fire import Fire
-from scipy.interpolate import interp1d
-from torch import optim
-
-from common.experiment import create_experiment
-from common.experiment import load_experiment_parameters
-from data_provider.UnivariateTimeseriesSampler_WithStamps import UnivariateTimeseriesSampler_WithStamps
-from exp.exp_ForecastPFN import Exp_ForecastPFN
+from common.experiment import create_experiment, load_experiment_parameters
+from common.metrics import smape
from common.settings import experiment_path
from common.timeseries import TimeseriesBundle
-from common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss
+from common.torch_utils import to_tensor
from common.utils import get_module_path
-from common.metrics import smape
from experiments.tl.parameters import parameters
-from models.nbeats_torch import nbeats_generic, nbeats_interpretable
+from fire import Fire
from resources.m3.dataset import M3Dataset, M3Meta
-from resources.m4.dataset import M4Dataset, M4Meta
from resources.tourism.dataset import TourismDataset, TourismMeta
+from data_provider.UnivariateTimeseriesSampler_WithStamps import (
+ UnivariateTimeseriesSampler_WithStamps,
+)
+from exp.exp_ForecastPFN import Exp_ForecastPFN
+
module_path = get_module_path()
def init(name: str):
- create_experiment(experiment_path=experiment_path(module_path, name),
- parameters=parameters[name],
- command=lambda path, params: f'python {module_path}/main.py run --path={path}')
+ create_experiment(
+ experiment_path=experiment_path(module_path, name),
+ parameters=parameters[name],
+ command=lambda path, params: f'python {module_path}/main.py run --path={path}',
+ )
def run(path: str):
@@ -59,65 +57,79 @@ def run(path: str):
input_size = experiment_parameters['lookback_period'] * horizon
model = Exp_ForecastPFN(None)
- tl_models[model_name] = {'p_model': model, 'p_input_size': input_size, 'p_horizon': horizon}
+ tl_models[model_name] = {
+ 'p_model': model,
+ 'p_input_size': input_size,
+ 'p_horizon': horizon,
+ }
#
# Predictions
#
- def forecast(in_bundle: TimeseriesBundle, out_bundle: TimeseriesBundle,
- sp: str,
- p_model, p_input_size, p_horizon):
+ def forecast(
+ in_bundle: TimeseriesBundle,
+ out_bundle: TimeseriesBundle,
+ sp: str,
+ p_model,
+ p_input_size,
+ p_horizon,
+ ):
forecasts = []
- in_bundle = in_bundle.filter(
- lambda ts: ts.meta['seasonal_pattern'] == sp)
- out_bundle = out_bundle.filter(
- lambda ts: ts.meta['seasonal_pattern'] == sp)
+ in_bundle = in_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
+ out_bundle = out_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
input_set = in_bundle.values()
input_timestamps = in_bundle.time_stamps()
- input_set = UnivariateTimeseriesSampler_WithStamps(timeseries=input_set,
- time_stamps=input_timestamps,
- insample_size=p_input_size,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1,
- time_features=p_model._ForecastPFN_time_features,
- )
+ input_set = UnivariateTimeseriesSampler_WithStamps(
+ timeseries=input_set,
+ time_stamps=input_timestamps,
+ insample_size=p_input_size,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ time_features=p_model._ForecastPFN_time_features,
+ )
p_x, p_x_mask, p_x_timestamps = input_set.sequential_latest_insamples()
output_set = out_bundle.values()
output_timestamps = out_bundle.time_stamps()
- output_set = UnivariateTimeseriesSampler_WithStamps(timeseries=output_set,
- time_stamps=output_timestamps,
- insample_size=p_horizon,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1,
- time_features=p_model._ForecastPFN_time_features,
- )
+ output_set = UnivariateTimeseriesSampler_WithStamps(
+ timeseries=output_set,
+ time_stamps=output_timestamps,
+ insample_size=p_horizon,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ time_features=p_model._ForecastPFN_time_features,
+ )
p_y, p_y_mask, p_y_timestamps = output_set.sequential_latest_insamples()
x, x_mark, y, y_mark = p_x, p_x_timestamps, p_y, p_y_timestamps
batch_x, batch_y = to_tensor(x)[:, :, None], to_tensor(y)[:, :, None]
- batch_x_mark, batch_y_mark = to_tensor(
- x_mark.astype(int)), to_tensor(y_mark.astype(int))
-
-
+ batch_x_mark, batch_y_mark = (
+ to_tensor(x_mark.astype(int)),
+ to_tensor(y_mark.astype(int)),
+ )
+
model = tf.keras.models.load_model(
- str(pathlib.Path(path).parent) + '/ckpts/', custom_objects={'smape': smape})
- for idx, (x, y, x_mark, y_mark) in tqdm(enumerate(zip(batch_x, batch_y, batch_x_mark, batch_y_mark))):
+ str(pathlib.Path(path).parent) + '/ckpts/', custom_objects={'smape': smape}
+ )
+ for idx, (x, y, x_mark, y_mark) in tqdm(
+ enumerate(zip(batch_x, batch_y, batch_x_mark, batch_y_mark))
+ ):
pred = p_model._process_tuple(x, x_mark, y_mark, model, p_horizon)
forecasts.extend(pred)
- forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)])
+ forecasts_df = pd.DataFrame(
+ forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)]
+ )
forecasts_df.index = in_bundle.ids()
forecasts_df.index.name = 'id'
return forecasts_df
-
# M4
# target_input, target_output = M4Dataset(
# M4Meta.dataset_path).standard_split()
@@ -131,20 +143,26 @@ def forecast(in_bundle: TimeseriesBundle, out_bundle: TimeseriesBundle,
# os.path.join(os.path.join(path, 'M4.csv')))
# M3
- target_input, target_output = M3Dataset(
- M3Meta.dataset_path).standard_split()
+ target_input, target_output = M3Dataset(M3Meta.dataset_path).standard_split()
yearly = forecast(target_input, target_output, 'M3Year', **tl_models['Y6'])
quarterly = forecast(target_input, target_output, 'M3Quart', **tl_models['Q8'])
monthly = forecast(target_input, target_output, 'M3Month', **tl_models['M18'])
others = forecast(target_input, target_output, 'M3Other', **tl_models['Q8'])
- pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(os.path.join(os.path.join(path, 'M3.csv')))
+ pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(
+ os.path.join(os.path.join(path, 'M3.csv'))
+ )
# Tourism
- target_input, target_output = TourismDataset(TourismMeta.dataset_path).standard_split()
+ target_input, target_output = TourismDataset(
+ TourismMeta.dataset_path
+ ).standard_split()
yearly = forecast(target_input, target_output, 'Yearly', **tl_models['Y4'])
quarterly = forecast(target_input, target_output, 'Quarterly', **tl_models['Q8'])
monthly = forecast(target_input, target_output, 'Monthly', **tl_models['M24'])
- pd.concat([yearly, quarterly, monthly], sort=False).to_csv(os.path.join(os.path.join(path, 'tourism.csv')))
+ pd.concat([yearly, quarterly, monthly], sort=False).to_csv(
+ os.path.join(os.path.join(path, 'tourism.csv'))
+ )
+
def evaluate(name: str, summary_filter: str, validation_mode: bool = False):
pass
diff --git a/benchmark/metalearned/models/nbeats_torch.py b/benchmark/metalearned/models/nbeats_torch.py
index 419b969..9569665 100644
--- a/benchmark/metalearned/models/nbeats_torch.py
+++ b/benchmark/metalearned/models/nbeats_torch.py
@@ -1,21 +1,21 @@
+from itertools import chain
from typing import Tuple
import numpy as np
import torch as t
-from itertools import chain
-
from common.torch_utils import div_no_nan
class NBeatsFC(t.nn.Module):
- def __init__(self,
- input_size: int,
- fc_layers: int,
- output_size: int):
+ def __init__(self, input_size: int, fc_layers: int, output_size: int):
super().__init__()
- self.fc_layers = t.nn.ModuleList([t.nn.Linear(in_features=input_size, out_features=output_size)] +
- [t.nn.Linear(in_features=output_size, out_features=output_size)
- for _ in range(fc_layers - 1)])
+ self.fc_layers = t.nn.ModuleList(
+ [t.nn.Linear(in_features=input_size, out_features=output_size)]
+ + [
+ t.nn.Linear(in_features=output_size, out_features=output_size)
+ for _ in range(fc_layers - 1)
+ ]
+ )
def forward(self, x: t.Tensor) -> t.Tensor:
output = x
@@ -25,16 +25,16 @@ def forward(self, x: t.Tensor) -> t.Tensor:
class NBeatsGenericBlock(t.nn.Module):
- def __init__(self,
- input_size: int,
- fc_layers: int,
- fc_layers_size: int,
- output_size: int):
+ def __init__(
+ self, input_size: int, fc_layers: int, fc_layers_size: int, output_size: int
+ ):
super().__init__()
- self.fc = NBeatsFC(input_size=input_size,
- fc_layers=fc_layers,
- output_size=fc_layers_size)
- self.basis = t.nn.Linear(in_features=fc_layers_size, out_features=input_size + output_size)
+ self.fc = NBeatsFC(
+ input_size=input_size, fc_layers=fc_layers, output_size=fc_layers_size
+ )
+ self.basis = t.nn.Linear(
+ in_features=fc_layers_size, out_features=input_size + output_size
+ )
self.output_size = output_size
self.backcast_dump = None
@@ -51,55 +51,95 @@ def forward(self, x: t.Tensor) -> Tuple[t.Tensor, t.Tensor]:
class NBeatsTrendBlock(t.nn.Module):
- def __init__(self,
- input_size: int,
- fc_layers: int,
- fc_layers_size: int,
- degree_of_polynomial: int,
- output_size: int):
+ def __init__(
+ self,
+ input_size: int,
+ fc_layers: int,
+ fc_layers_size: int,
+ degree_of_polynomial: int,
+ output_size: int,
+ ):
super().__init__()
- self.polynomial_size = degree_of_polynomial + 1 # degree of polynomial with constant term
- self.fc = NBeatsFC(input_size=input_size,
- fc_layers=fc_layers,
- output_size=fc_layers_size)
- self.basis = t.nn.Linear(in_features=fc_layers_size, out_features=2 * self.polynomial_size)
+ self.polynomial_size = (
+ degree_of_polynomial + 1
+ ) # degree of polynomial with constant term
+ self.fc = NBeatsFC(
+ input_size=input_size, fc_layers=fc_layers, output_size=fc_layers_size
+ )
+ self.basis = t.nn.Linear(
+ in_features=fc_layers_size, out_features=2 * self.polynomial_size
+ )
self.output_size = output_size
- self.backcast_time = np.concatenate([np.power(np.arange(input_size, dtype=np.float) / input_size, i)[None, :]
- for i in range(self.polynomial_size)])
- self.forecast_time = np.concatenate([np.power(np.arange(output_size, dtype=np.float) / output_size, i)[None, :]
- for i in range(self.polynomial_size)])
+ self.backcast_time = np.concatenate(
+ [
+ np.power(np.arange(input_size, dtype=np.float) / input_size, i)[None, :]
+ for i in range(self.polynomial_size)
+ ]
+ )
+ self.forecast_time = np.concatenate(
+ [
+ np.power(np.arange(output_size, dtype=np.float) / output_size, i)[
+ None, :
+ ]
+ for i in range(self.polynomial_size)
+ ]
+ )
self.backcast_dump = None
self.forecast_dump = None
def forward(self, x: t.Tensor):
thetas = self.basis(self.fc(x))
- backcast = t.einsum('bp,pt->bt', thetas[:, self.polynomial_size:], x.new(self.backcast_time))
- forecast = t.einsum('bp,pt->bt', thetas[:, :self.polynomial_size], x.new(self.forecast_time))
+ backcast = t.einsum(
+ 'bp,pt->bt', thetas[:, self.polynomial_size :], x.new(self.backcast_time)
+ )
+ forecast = t.einsum(
+ 'bp,pt->bt', thetas[:, : self.polynomial_size], x.new(self.forecast_time)
+ )
self.backcast_dump = backcast
self.forecast_dump = forecast
return backcast, forecast
class NBeatsSeasonalityBlock(t.nn.Module):
- def __init__(self,
- input_size: int,
- fc_layers: int,
- fc_layers_size: int,
- num_of_harmonics: int,
- output_size: int):
+ def __init__(
+ self,
+ input_size: int,
+ fc_layers: int,
+ fc_layers_size: int,
+ num_of_harmonics: int,
+ output_size: int,
+ ):
super().__init__()
- self.basis_parameters = int(np.ceil(num_of_harmonics / 2 * output_size) - (num_of_harmonics - 1))
-
- self.fc = NBeatsFC(input_size=input_size,
- fc_layers=fc_layers,
- output_size=fc_layers_size)
- self.basis = t.nn.Linear(in_features=fc_layers_size, out_features=4 * self.basis_parameters)
-
- frequency = np.append(np.zeros(1, dtype=np.float32),
- np.arange(num_of_harmonics, num_of_harmonics / 2 * output_size,
- dtype=np.float32) / num_of_harmonics)[None, :]
- backcast_grid = -2 * np.pi * (np.arange(input_size, dtype=np.float32)[:, None] / output_size) * frequency
- forecast_grid = 2 * np.pi * (np.arange(output_size, dtype=np.float32)[:, None] / output_size) * frequency
+ self.basis_parameters = int(
+ np.ceil(num_of_harmonics / 2 * output_size) - (num_of_harmonics - 1)
+ )
+
+ self.fc = NBeatsFC(
+ input_size=input_size, fc_layers=fc_layers, output_size=fc_layers_size
+ )
+ self.basis = t.nn.Linear(
+ in_features=fc_layers_size, out_features=4 * self.basis_parameters
+ )
+
+ frequency = np.append(
+ np.zeros(1, dtype=np.float32),
+ np.arange(
+ num_of_harmonics, num_of_harmonics / 2 * output_size, dtype=np.float32
+ )
+ / num_of_harmonics,
+ )[None, :]
+ backcast_grid = (
+ -2
+ * np.pi
+ * (np.arange(input_size, dtype=np.float32)[:, None] / output_size)
+ * frequency
+ )
+ forecast_grid = (
+ 2
+ * np.pi
+ * (np.arange(output_size, dtype=np.float32)[:, None] / output_size)
+ * frequency
+ )
self.backcast_cos_template = np.transpose(np.cos(backcast_grid))
self.backcast_sin_template = np.transpose(np.sin(backcast_grid))
self.forecast_cos_template = np.transpose(np.cos(forecast_grid))
@@ -111,20 +151,28 @@ def __init__(self,
def forward(self, x: t.Tensor):
harmonics_weights = self.basis(self.fc(x))
- backcast_harmonics_cos = t.einsum('bp,pt->bt',
- harmonics_weights[:, 2 * self.basis_parameters:3 * self.basis_parameters],
- x.new(self.backcast_cos_template))
- backcast_harmonics_sin = t.einsum('bp,pt->bt',
- harmonics_weights[:, 3 * self.basis_parameters:],
- x.new(self.backcast_sin_template))
+ backcast_harmonics_cos = t.einsum(
+ 'bp,pt->bt',
+ harmonics_weights[:, 2 * self.basis_parameters : 3 * self.basis_parameters],
+ x.new(self.backcast_cos_template),
+ )
+ backcast_harmonics_sin = t.einsum(
+ 'bp,pt->bt',
+ harmonics_weights[:, 3 * self.basis_parameters :],
+ x.new(self.backcast_sin_template),
+ )
backcast = backcast_harmonics_sin + backcast_harmonics_cos
- forecast_harmonics_cos = t.einsum('bp,pt->bt',
- harmonics_weights[:, :self.basis_parameters],
- x.new(self.forecast_cos_template))
- forecast_harmonics_sin = t.einsum('bp,pt->bt',
- harmonics_weights[:, self.basis_parameters:2 * self.basis_parameters],
- x.new(self.forecast_sin_template))
+ forecast_harmonics_cos = t.einsum(
+ 'bp,pt->bt',
+ harmonics_weights[:, : self.basis_parameters],
+ x.new(self.forecast_cos_template),
+ )
+ forecast_harmonics_sin = t.einsum(
+ 'bp,pt->bt',
+ harmonics_weights[:, self.basis_parameters : 2 * self.basis_parameters],
+ x.new(self.forecast_sin_template),
+ )
forecast = forecast_harmonics_sin + forecast_harmonics_cos
self.backcast_dump = backcast
@@ -189,41 +237,67 @@ def forward(self, x: t.Tensor, input_mask: t.Tensor) -> t.Tensor:
return forecast
-def nbeats_generic(input_size: int, output_size: int,
- blocks: int = 1, stacks: int = 30,
- fc_layers: int = 4, fc_layers_size: int = 512,
- scaling: str = None, mode: str = 'dress'):
- modules = [[NBeatsGenericBlock(input_size=input_size,
- fc_layers=fc_layers,
- fc_layers_size=fc_layers_size,
- output_size=output_size)] * blocks for _ in range(stacks)]
-
- return NBeats(t.nn.ModuleList(list(chain.from_iterable(modules))),
- scaling=scaling,
- mode=mode)
-
-
-def nbeats_interpretable(input_size: int, output_size: int,
- trend_blocks: int = 3,
- trend_fc_layers: int = 4,
- trend_fc_layers_size: int = 256,
- degree_of_polynomial: int = 3,
- seasonality_blocks: int = 3,
- seasonality_fc_layers: int = 4,
- seasonality_fc_layers_size: int = 2048,
- num_of_harmonics: int = 1,
- scaling: str = None,
- mode: str = 'dress'):
- trend_block = NBeatsTrendBlock(input_size=input_size,
- fc_layers=trend_fc_layers,
- fc_layers_size=trend_fc_layers_size,
- degree_of_polynomial=degree_of_polynomial,
- output_size=output_size)
- seasonality_block = NBeatsSeasonalityBlock(input_size=input_size,
- fc_layers=seasonality_fc_layers,
- fc_layers_size=seasonality_fc_layers_size,
- num_of_harmonics=num_of_harmonics,
- output_size=output_size)
- return NBeats(t.nn.ModuleList(
- [trend_block for _ in range(trend_blocks)] + [seasonality_block for _ in range(seasonality_blocks)]),
- scaling=scaling, mode=mode)
+def nbeats_generic(
+ input_size: int,
+ output_size: int,
+ blocks: int = 1,
+ stacks: int = 30,
+ fc_layers: int = 4,
+ fc_layers_size: int = 512,
+ scaling: str = None,
+ mode: str = 'dress',
+):
+ modules = [
+ [
+ NBeatsGenericBlock(
+ input_size=input_size,
+ fc_layers=fc_layers,
+ fc_layers_size=fc_layers_size,
+ output_size=output_size,
+ )
+ ]
+ * blocks
+ for _ in range(stacks)
+ ]
+
+ return NBeats(
+ t.nn.ModuleList(list(chain.from_iterable(modules))), scaling=scaling, mode=mode
+ )
+
+
+def nbeats_interpretable(
+ input_size: int,
+ output_size: int,
+ trend_blocks: int = 3,
+ trend_fc_layers: int = 4,
+ trend_fc_layers_size: int = 256,
+ degree_of_polynomial: int = 3,
+ seasonality_blocks: int = 3,
+ seasonality_fc_layers: int = 4,
+ seasonality_fc_layers_size: int = 2048,
+ num_of_harmonics: int = 1,
+ scaling: str = None,
+ mode: str = 'dress',
+):
+ trend_block = NBeatsTrendBlock(
+ input_size=input_size,
+ fc_layers=trend_fc_layers,
+ fc_layers_size=trend_fc_layers_size,
+ degree_of_polynomial=degree_of_polynomial,
+ output_size=output_size,
+ )
+ seasonality_block = NBeatsSeasonalityBlock(
+ input_size=input_size,
+ fc_layers=seasonality_fc_layers,
+ fc_layers_size=seasonality_fc_layers_size,
+ num_of_harmonics=num_of_harmonics,
+ output_size=output_size,
+ )
+ return NBeats(
+ t.nn.ModuleList(
+ [trend_block for _ in range(trend_blocks)]
+ + [seasonality_block for _ in range(seasonality_blocks)]
+ ),
+ scaling=scaling,
+ mode=mode,
+ )
diff --git a/benchmark/metalearned/resources/electricity/dataset.py b/benchmark/metalearned/resources/electricity/dataset.py
index f6b5282..7336021 100644
--- a/benchmark/metalearned/resources/electricity/dataset.py
+++ b/benchmark/metalearned/resources/electricity/dataset.py
@@ -5,11 +5,10 @@
import numpy as np
import patoolib
-from tqdm import tqdm
-
from common.settings import RESOURCES_DIR
-from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Hour
+from common.timeseries import Hour, Timeseries, TimeseriesBundle, TimeseriesLoader
from common.utils import download_url
+from tqdm import tqdm
"""
Hourly aggregated dataset from https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014
@@ -39,20 +38,29 @@ class ElectricityDataset(TimeseriesLoader):
def download(self) -> TimeseriesBundle:
archive_file = os.path.join(self.path, 'dataset.zip')
raw_file = os.path.join(self.path, 'LD2011_2014.txt')
- download_url('https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip',
- archive_file)
+ download_url(
+ 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip',
+ archive_file,
+ )
patoolib.extract_archive(archive_file, outdir=self.path)
with open(raw_file, 'r') as f:
raw = f.readlines()
- parsed_values = np.array(list(map(
- lambda raw_line: np.array(raw_line.replace(',', '.').strip().split(';')[1:]).astype(np.float), tqdm(raw[1:])
- )))
+ parsed_values = np.array(
+ list(
+ map(
+ lambda raw_line: np.array(
+ raw_line.replace(',', '.').strip().split(';')[1:]
+ ).astype(np.float),
+ tqdm(raw[1:]),
+ )
+ )
+ )
aggregated = []
for i in tqdm(range(0, parsed_values.shape[0], 4)):
- aggregated.append(parsed_values[i:i + 4, :].sum(axis=0))
+ aggregated.append(parsed_values[i : i + 4, :].sum(axis=0))
aggregated = np.array(aggregated)
# regarding time labels, in dataset description authors specify
@@ -62,19 +70,25 @@ def download(self) -> TimeseriesBundle:
# neither for "2012-03-25 01:45:00", thus it's not clear how to deal with daylight saving time change in this
# dataset. Taking into account this uncertainty the starting date is treated as UTC (without time changes).
- start_date = datetime(2011, 1, 1, 1, 0, 0) # aggregated towards next hour instead of current hour.
+ start_date = datetime(
+ 2011, 1, 1, 1, 0, 0
+ ) # aggregated towards next hour instead of current hour.
dataset = aggregated.T # use time step as second dimension.
timeseries = []
for i, values in enumerate(dataset):
- timeseries.append(Timeseries(id=str(i),
- start_date=start_date,
- time_unit=Hour(),
- frequency=1,
- period=ElectricityMeta.period,
- values=values,
- meta={}))
+ timeseries.append(
+ Timeseries(
+ id=str(i),
+ start_date=start_date,
+ time_unit=Hour(),
+ frequency=1,
+ period=ElectricityMeta.period,
+ values=values,
+ meta={},
+ )
+ )
return TimeseriesBundle(timeseries)
def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]:
diff --git a/benchmark/metalearned/resources/electricity/evaluator.py b/benchmark/metalearned/resources/electricity/evaluator.py
index 99e53c9..8f3561d 100644
--- a/benchmark/metalearned/resources/electricity/evaluator.py
+++ b/benchmark/metalearned/resources/electricity/evaluator.py
@@ -2,8 +2,7 @@
from typing import Callable
import numpy as np
-
-from common.evaluator import Evaluator, EvaluationResult
+from common.evaluator import EvaluationResult, Evaluator
from common.metrics import nd
from common.timeseries import TimeseriesBundle
from common.utils import round_half_up
@@ -15,6 +14,11 @@ class ElectricityEvaluator(Evaluator):
precision: int = 2
def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult:
- return {'metric': round_half_up(self.metric_fn(np.array(forecasts.values()),
- np.array(self.test_set.values())),
- self.precision)}
+ return {
+ 'metric': round_half_up(
+ self.metric_fn(
+ np.array(forecasts.values()), np.array(self.test_set.values())
+ ),
+ self.precision,
+ )
+ }
diff --git a/benchmark/metalearned/resources/fred/api.py b/benchmark/metalearned/resources/fred/api.py
index d811291..cc654d0 100644
--- a/benchmark/metalearned/resources/fred/api.py
+++ b/benchmark/metalearned/resources/fred/api.py
@@ -30,8 +30,10 @@ def __init__(self, dataset_path):
self.dataset_path = dataset_path
if not os.path.isfile(key_path):
- raise Exception(f'Cannot find FRED key file. Create an API key and place it in {key_path}. '
- 'https://research.stlouisfed.org/docs/api/api_key.html')
+ raise Exception(
+ f'Cannot find FRED key file. Create an API key and place it in {key_path}. '
+ 'https://research.stlouisfed.org/docs/api/api_key.html'
+ )
with open(key_path, 'r') as f:
key = f.readline().strip()
@@ -42,7 +44,7 @@ def call(self, api_fn: Callable[[Fred], A], attempt=1) -> A:
raise Exception('Maximum retries exceeded')
try:
return api_fn(self.api)
- except Exception as e:
+ except Exception:
# logging.info(f'API Error: {str(e)}. Waiting {self.wait_delay} seconds to retry. Attempt: {attempt}')
time.sleep(self.wait_delay)
return self.call(api_fn=api_fn, attempt=attempt + 1)
@@ -57,14 +59,18 @@ def fetch_categories(self, parent=0):
def fetch_observation(self, timeseries_id: str):
try:
- values = self.api.series.observations(timeseries_id, params={'output_type': 1,
- 'realtime_start': '1776-07-04'})
+ values = self.api.series.observations(
+ timeseries_id, params={'output_type': 1, 'realtime_start': '1776-07-04'}
+ )
values = values.groupby('date').head(1)
values = values.set_index('date')['value']
except Exception as e:
- if 'The series does not exist in ALFRED but may exist in FRED' in str(e) \
- or 'this exceeds the maximum number of vintage dates allowed' in str(e).lower() \
- or 'bad request' in str(e).lower():
+ if (
+ 'The series does not exist in ALFRED but may exist in FRED' in str(e)
+ or 'this exceeds the maximum number of vintage dates allowed'
+ in str(e).lower()
+ or 'bad request' in str(e).lower()
+ ):
# There are a couple of situations where ALFRED (vintage data)
# would not work properly
values = self.api.series.observations(timeseries_id)
@@ -88,7 +94,7 @@ def fetch_all(self):
categories = pickle.load(f)
logging.info(f'Loaded {len(categories)} categories')
else:
- logging.info(f'Fetching categories')
+ logging.info('Fetching categories')
categories = self.fetch_categories()
logging.info(f'Fetched {len(categories)} categories')
with open(categories_cache_path, 'wb') as f:
@@ -97,7 +103,7 @@ def fetch_all(self):
#
# Fetch timeseries
#
- logging.info(f'Fetching timeseries')
+ logging.info('Fetching timeseries')
dataset_file_path = os.path.join(self.dataset_path, 'dataset.pickle')
dataset = {'processed_categories': [], 'data': {}}
@@ -105,38 +111,46 @@ def fetch_all(self):
with open(dataset_file_path, 'rb') as cache_file_name:
dataset = pickle.load(cache_file_name)
- categories_to_process = [c for c in categories if c not in dataset['processed_categories']]
+ categories_to_process = [
+ c for c in categories if c not in dataset['processed_categories']
+ ]
limit = 1000
for category_id in tqdm(categories_to_process):
offset = 0
while True:
- timeseries_meta = self.call(lambda api: api.category.series(category_id, params={'limit': limit,
- 'offset': offset}))
+ timeseries_meta = self.call(
+ lambda api: api.category.series(
+ category_id, params={'limit': limit, 'offset': offset}
+ )
+ )
if len(timeseries_meta) == 0:
break
for _, ts_meta in timeseries_meta.iterrows():
ts_id = str(ts_meta.id)
- start_date = datetime.datetime.strptime(str(ts_meta.observation_start), '%Y-%m-%d %H:%M:%S')
+ start_date = datetime.datetime.strptime(
+ str(ts_meta.observation_start), '%Y-%m-%d %H:%M:%S'
+ )
time_unit = str(ts_meta.frequency)
if ts_id not in dataset['data']:
dataset['data'][ts_id] = {
'start_date': start_date,
'time_unit': time_unit,
- 'meta': {
- 'categories': [category_id]
- },
- 'values': self.call(lambda api: self.fetch_observation(ts_id))
+ 'meta': {'categories': [category_id]},
+ 'values': self.call(
+ lambda api: self.fetch_observation(ts_id)
+ ),
}
else:
dataset['data'][ts_id]['meta']['categories'].append(category_id)
offset += 1
dataset['processed_categories'].append(category_id)
- temp_file = tempfile.NamedTemporaryFile(dir=self.dataset_path, delete=False, mode='wb')
+ temp_file = tempfile.NamedTemporaryFile(
+ dir=self.dataset_path, delete=False, mode='wb'
+ )
pickle.dump(dataset, temp_file, protocol=pickle.HIGHEST_PROTOCOL)
temp_file.flush()
os.fsync(temp_file.fileno())
os.rename(temp_file.name, dataset_file_path)
-
diff --git a/benchmark/metalearned/resources/fred/dataset.py b/benchmark/metalearned/resources/fred/dataset.py
index f419063..1fbdda6 100644
--- a/benchmark/metalearned/resources/fred/dataset.py
+++ b/benchmark/metalearned/resources/fred/dataset.py
@@ -4,11 +4,17 @@
from dataclasses import dataclass
from typing import Tuple
-from tqdm import tqdm
-
from common.settings import RESOURCES_DIR
-from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month, Day
+from common.timeseries import (
+ Day,
+ Month,
+ Timeseries,
+ TimeseriesBundle,
+ TimeseriesLoader,
+ Year,
+)
from resources.fred.api import FredAPI
+from tqdm import tqdm
@dataclass(frozen=True)
@@ -36,7 +42,7 @@ def download(self) -> TimeseriesBundle:
'Quarterly': (Month(), 3),
'Monthly': (Month(), 1),
'Weekly': (Day(), 7),
- 'Daily': (Day(), 1)
+ 'Daily': (Day(), 1),
}
period_map = FredMeta().period_map()
@@ -44,26 +50,33 @@ def download(self) -> TimeseriesBundle:
timeseries = []
for ts_id, record in tqdm(raw_data.items()):
sp = record['time_unit']
- frequency = [frequency_map[s] for s in frequency_map.keys() if sp.startswith(s)]
+ frequency = [
+ frequency_map[s] for s in frequency_map.keys() if sp.startswith(s)
+ ]
period = [period_map[s] for s in period_map.keys() if sp.startswith(s)]
if len(frequency) > 0:
frequency = frequency[0]
else:
- raise Exception(f"Cannot match frequency for: {sp}")
+ raise Exception(f'Cannot match frequency for: {sp}')
if len(period) > 0:
period = period[0]
else:
- raise Exception(f"Cannot match frequency for: {sp}")
- timeseries.append(Timeseries(id=ts_id,
- start_date=record['start_date'],
- time_unit=frequency[0],
- frequency=frequency[1],
- period=period,
- values=record['values'],
- meta={'seasonal_pattern': sp}
- ))
- grouped_timeseries = [list(filter(lambda ts: ts.meta['seasonal_pattern'] == sp, timeseries))
- for sp in FredMeta.seasonal_patterns]
+ raise Exception(f'Cannot match frequency for: {sp}')
+ timeseries.append(
+ Timeseries(
+ id=ts_id,
+ start_date=record['start_date'],
+ time_unit=frequency[0],
+ frequency=frequency[1],
+ period=period,
+ values=record['values'],
+ meta={'seasonal_pattern': sp},
+ )
+ )
+ grouped_timeseries = [
+ list(filter(lambda ts: ts.meta['seasonal_pattern'] == sp, timeseries))
+ for sp in FredMeta.seasonal_patterns
+ ]
grouped_timeseries = [ts for sp_ts in grouped_timeseries for ts in sp_ts]
return TimeseriesBundle(grouped_timeseries)
@@ -71,7 +84,9 @@ def download(self) -> TimeseriesBundle:
def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]:
bundle = self.load_cache()
horizons_map = FredMeta().horizons_map()
- return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]))
+ return bundle.split(
+ lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])
+ )
if __name__ == '__main__':
diff --git a/benchmark/metalearned/resources/fred/evaluator.py b/benchmark/metalearned/resources/fred/evaluator.py
index 7942721..69a48af 100644
--- a/benchmark/metalearned/resources/fred/evaluator.py
+++ b/benchmark/metalearned/resources/fred/evaluator.py
@@ -1,10 +1,8 @@
from collections import OrderedDict
-from collections import OrderedDict
from dataclasses import dataclass
import numpy as np
-
-from common.evaluator import Evaluator, EvaluationResult
+from common.evaluator import EvaluationResult, Evaluator
from common.metrics import smape_2
from common.timeseries import TimeseriesBundle
from common.utils import round_half_up
@@ -19,11 +17,19 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult:
insamples, _ = FredDataset(FredMeta.dataset_path).standard_split()
if self.validation:
horizons_map = FredMeta().horizons_map()
- insamples, _ = insamples.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]))
+ insamples, _ = insamples.split(
+ lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])
+ )
- grouped_smapes = {sp: np.mean(smape_2(forecast=np.array(self.filter_by_sp(forecast, sp).values()),
- target=np.array(self.filter_by_sp(self.test_set, sp).values())))
- for sp in FredMeta.seasonal_patterns}
+ grouped_smapes = {
+ sp: np.mean(
+ smape_2(
+ forecast=np.array(self.filter_by_sp(forecast, sp).values()),
+ target=np.array(self.filter_by_sp(self.test_set, sp).values()),
+ )
+ )
+ for sp in FredMeta.seasonal_patterns
+ }
grouped_smapes = self.summarize_groups(grouped_smapes)
@@ -34,7 +40,9 @@ def summarize_groups(self, scores):
weighted_score = {}
for sp in ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily']:
- weighted_score[sp] = scores[sp] * len(self.filter_by_sp(self.test_set, sp).timeseries)
+ weighted_score[sp] = scores[sp] * len(
+ self.filter_by_sp(self.test_set, sp).timeseries
+ )
scores_summary[sp] = scores[sp]
average = np.sum(list(weighted_score.values())) / len(self.test_set.timeseries)
@@ -43,7 +51,9 @@ def summarize_groups(self, scores):
return scores_summary
@staticmethod
- def filter_by_sp(bundle: TimeseriesBundle, seasonal_pattern: str) -> TimeseriesBundle:
+ def filter_by_sp(
+ bundle: TimeseriesBundle, seasonal_pattern: str
+ ) -> TimeseriesBundle:
return bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == seasonal_pattern)
@staticmethod
diff --git a/benchmark/metalearned/resources/m3/dataset.py b/benchmark/metalearned/resources/m3/dataset.py
index 0904d3c..24e104c 100644
--- a/benchmark/metalearned/resources/m3/dataset.py
+++ b/benchmark/metalearned/resources/m3/dataset.py
@@ -5,11 +5,17 @@
import numpy as np
import pandas as pd
-from tqdm import tqdm
-
from common.settings import RESOURCES_DIR
-from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Unknown, Year, Month
+from common.timeseries import (
+ Month,
+ Timeseries,
+ TimeseriesBundle,
+ TimeseriesLoader,
+ Unknown,
+ Year,
+)
from common.utils import download_url
+from tqdm import tqdm
@dataclass(frozen=True)
@@ -19,9 +25,30 @@ class M3Meta:
seasonal_patterns = ['M3Year', 'M3Quart', 'M3Month', 'M3Other']
horizons = [6, 8, 18, 8]
frequency = [1, 4, 12, 1]
- models = ['NAIVE2', 'SINGLE', 'HOLT', 'DAMPEN', 'WINTER', 'COMB S-H-D', 'B-J auto', 'AutoBox1', 'AutoBox2',
- 'AutoBox3', 'ROBUST-Trend', 'ARARMA', 'Auto-ANN', 'Flors-Pearc1', 'Flors-Pearc2', 'PP-Autocast',
- 'ForecastPro', 'SMARTFCS', 'THETAsm', 'THETA', 'RBF', 'ForcX']
+ models = [
+ 'NAIVE2',
+ 'SINGLE',
+ 'HOLT',
+ 'DAMPEN',
+ 'WINTER',
+ 'COMB S-H-D',
+ 'B-J auto',
+ 'AutoBox1',
+ 'AutoBox2',
+ 'AutoBox3',
+ 'ROBUST-Trend',
+ 'ARARMA',
+ 'Auto-ANN',
+ 'Flors-Pearc1',
+ 'Flors-Pearc2',
+ 'PP-Autocast',
+ 'ForecastPro',
+ 'SMARTFCS',
+ 'THETAsm',
+ 'THETA',
+ 'RBF',
+ 'ForcX',
+ ]
def horizons_map(self):
return dict(zip(self.seasonal_patterns, self.horizons))
@@ -67,37 +94,48 @@ def download(self) -> TimeseriesBundle:
time_unit = Unknown()
pass
- timeseries.append(Timeseries(id=str(row['Series']),
- start_date=starting_date,
- time_unit=time_unit,
- frequency=frequency,
- period=1,
- values=row.T[6:row.N + 6].values.astype(np.float32),
- meta={'seasonal_pattern': sp}
- ))
+ timeseries.append(
+ Timeseries(
+ id=str(row['Series']),
+ start_date=starting_date,
+ time_unit=time_unit,
+ frequency=frequency,
+ period=1,
+ values=row.T[6 : row.N + 6].values.astype(np.float32),
+ meta={'seasonal_pattern': sp},
+ )
+ )
return TimeseriesBundle(timeseries)
def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]:
bundle = self.load_cache()
horizons_map = M3Meta().horizons_map()
- return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]))
+ return bundle.split(
+ lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])
+ )
class M3Forecasts(TimeseriesLoader):
def download(self) -> TimeseriesBundle:
raw_file_path = os.path.join(M3Meta.forecasts_path, 'M3Forecast.xls')
- download_url('https://forecasters.org/data/m3comp/M3Forecast.xls', raw_file_path)
+ download_url(
+ 'https://forecasters.org/data/m3comp/M3Forecast.xls', raw_file_path
+ )
original_timeseries = M3Dataset(M3Meta().dataset_path).load_cache()
horizon_mapping = M3Meta().horizons_map()
- training_set, _ = original_timeseries.split(lambda t: t.split(-horizon_mapping[t.meta['seasonal_pattern']]))
+ training_set, _ = original_timeseries.split(
+ lambda t: t.split(-horizon_mapping[t.meta['seasonal_pattern']])
+ )
training_timeseries = training_set.timeseries
models_forecasts = []
for model_name in tqdm(M3Meta.models):
forecast = pd.read_excel(raw_file_path, sheet_name=model_name, header=None)
for i, row in forecast.iterrows():
- ts = training_timeseries[i].future_values(row.T[2:row[1] + 2].values.astype(np.float32))
+ ts = training_timeseries[i].future_values(
+ row.T[2 : row[1] + 2].values.astype(np.float32)
+ )
ts.meta = {**ts.meta, 'model': model_name}
models_forecasts.append(ts)
return TimeseriesBundle(models_forecasts)
diff --git a/benchmark/metalearned/resources/m3/evaluator.py b/benchmark/metalearned/resources/m3/evaluator.py
index 00cf895..f9d8243 100644
--- a/benchmark/metalearned/resources/m3/evaluator.py
+++ b/benchmark/metalearned/resources/m3/evaluator.py
@@ -2,8 +2,7 @@
from dataclasses import dataclass
import numpy as np
-
-from common.evaluator import Evaluator, EvaluationResult
+from common.evaluator import EvaluationResult, Evaluator
from common.metrics import smape_1, smape_2
from common.timeseries import TimeseriesBundle
from resources.m3.dataset import M3Meta
@@ -22,7 +21,9 @@ def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult:
evaluation_function = smape_1 if self.smape_1 else smape_2
for sp in M3Meta.seasonal_patterns:
- target_sp = self.test_set.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
+ target_sp = self.test_set.filter(
+ lambda ts: ts.meta['seasonal_pattern'] == sp
+ )
forecast_sp = forecasts.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
target, forecast = target_sp.intersection_by_id(forecast_sp)
diff --git a/benchmark/metalearned/resources/m3/metrics.py b/benchmark/metalearned/resources/m3/metrics.py
index edc7578..0419a62 100644
--- a/benchmark/metalearned/resources/m3/metrics.py
+++ b/benchmark/metalearned/resources/m3/metrics.py
@@ -32,7 +32,9 @@ def smape_m3_dataset_horizon(target_dataset, forecast_dataset, horizon):
i = 0
for prediction, target in zip(forecast_dataset.values, target_dataset.values):
if target_dataset.horizons[i] >= horizon:
- smape_cum += smape_m3(prediction[horizon-1], target[-len(prediction)+horizon-1]).sum()
+ smape_cum += smape_m3(
+ prediction[horizon - 1], target[-len(prediction) + horizon - 1]
+ ).sum()
smape_n_points += 1
i += 1
@@ -57,9 +59,12 @@ def smape_m3_dataset_horizon_avg(target_dataset, forecast_dataset, horizon):
for prediction, target in zip(forecast_dataset.values, target_dataset.values):
horizon_clamped = min(target_dataset.horizons[i], horizon)
if horizon_clamped == target_dataset.horizons[i]:
- target_clamped = target[-target_dataset.horizons[i]:]
+ target_clamped = target[-target_dataset.horizons[i] :]
else:
- target_clamped = target[-target_dataset.horizons[i]:-target_dataset.horizons[i]+horizon_clamped]
+ target_clamped = target[
+ -target_dataset.horizons[i] : -target_dataset.horizons[i]
+ + horizon_clamped
+ ]
smape_cum += smape_m3(prediction[:horizon_clamped], target_clamped).sum()
smape_n_points += len(target_clamped)
i += 1
@@ -107,7 +112,9 @@ def smape_m3_dataset_horizon(target_dataset, forecast_dataset, horizon):
i = 0
for prediction, target in zip(forecast_dataset.values, target_dataset.values):
if target_dataset.horizons[i] >= horizon:
- smape_cum += smape_m3(prediction[horizon-1], target[-len(prediction)+horizon-1]).sum()
+ smape_cum += smape_m3(
+ prediction[horizon - 1], target[-len(prediction) + horizon - 1]
+ ).sum()
smape_n_points += 1
i += 1
@@ -132,12 +139,14 @@ def smape_m3_dataset_horizon_avg(target_dataset, forecast_dataset, horizon):
for prediction, target in zip(forecast_dataset.values, target_dataset.values):
horizon_clamped = min(target_dataset.horizons[i], horizon)
if horizon_clamped == target_dataset.horizons[i]:
- target_clamped = target[-target_dataset.horizons[i]:]
+ target_clamped = target[-target_dataset.horizons[i] :]
else:
- target_clamped = target[-target_dataset.horizons[i]:-target_dataset.horizons[i]+horizon_clamped]
+ target_clamped = target[
+ -target_dataset.horizons[i] : -target_dataset.horizons[i]
+ + horizon_clamped
+ ]
smape_cum += smape_m3(prediction[:horizon_clamped], target_clamped).sum()
smape_n_points += len(target_clamped)
i += 1
return smape_cum / smape_n_points
-
diff --git a/benchmark/metalearned/resources/m4/dataset.py b/benchmark/metalearned/resources/m4/dataset.py
index a919b1d..199e82e 100644
--- a/benchmark/metalearned/resources/m4/dataset.py
+++ b/benchmark/metalearned/resources/m4/dataset.py
@@ -7,11 +7,18 @@
import numpy as np
import pandas as pd
import patoolib
-from tqdm import tqdm
-
from common.settings import RESOURCES_DIR
-from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month, Day, Hour
+from common.timeseries import (
+ Day,
+ Hour,
+ Month,
+ Timeseries,
+ TimeseriesBundle,
+ TimeseriesLoader,
+ Year,
+)
from common.utils import download_url
+from tqdm import tqdm
@dataclass(frozen=True)
@@ -31,29 +38,37 @@ def period_map(self):
class M4Dataset(TimeseriesLoader):
def download(self) -> TimeseriesBundle:
url_template = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/{}/{}-{}.csv'
- m4_info_url = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv'
+ m4_info_url = (
+ 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv'
+ )
m4_info_path = os.path.join(self.path, 'M4info.csv')
ssl._create_default_https_context = ssl._create_unverified_context
download_url(m4_info_url, m4_info_path)
for sp in M4Meta.seasonal_patterns:
- training_url = url_template.format("Train", sp, "train")
- download_url(training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv'))
- test_url = url_template.format("Test", sp, "test")
+ training_url = url_template.format('Train', sp, 'train')
+ download_url(
+ training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')
+ )
+ test_url = url_template.format('Test', sp, 'test')
download_url(test_url, os.path.join(M4Meta.dataset_path, f'{sp}-test.csv'))
# Download naive2 forecasts, needed for OWA metric
m4_naive2_archive = os.path.join(self.path, 'naive2.rar')
- download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar',
- m4_naive2_archive)
+ download_url(
+ 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar',
+ m4_naive2_archive,
+ )
patoolib.extract_archive(m4_naive2_archive, outdir=self.path)
os.remove(m4_naive2_archive)
# Download m4 competition winner predictions, for summary testing purposes only
m4_winner_archive = os.path.join(self.path, 'submission-118.rar')
- download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar',
- m4_winner_archive)
+ download_url(
+ 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar',
+ m4_winner_archive,
+ )
patoolib.extract_archive(m4_winner_archive, outdir=self.path)
os.remove(m4_winner_archive)
@@ -66,12 +81,14 @@ def download(self) -> TimeseriesBundle:
'Monthly': (Month(), 1),
'Weekly': (Day(), 7),
'Daily': (Day(), 1),
- 'Hourly': (Hour(), 1)
+ 'Hourly': (Hour(), 1),
}
all_timeseries = []
for sp in M4Meta.seasonal_patterns:
- training_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-train.csv'))
+ training_set = pd.read_csv(
+ os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')
+ )
test_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-test.csv'))
time_unit, frequency = time_units_mapping[sp]
@@ -89,23 +106,28 @@ def download(self) -> TimeseriesBundle:
parsed_date = None
for parsing_format in parsing_formats:
try:
- parsed_date = datetime.strptime(timeseries_info.StartingDate, parsing_format)
+ parsed_date = datetime.strptime(
+ timeseries_info.StartingDate, parsing_format
+ )
except Exception:
continue
if parsed_date is None:
- raise ValueError(f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}')
+ raise ValueError(
+ f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}'
+ )
# all M4 years are in the 1900s or 1800s
if parsed_date.year > 2000:
parsed_date = parsed_date.replace(year=parsed_date.year - 100)
- timeseries = Timeseries(id=timeseries_id,
- start_date=parsed_date,
- time_unit=time_unit,
- frequency=frequency,
- period=int(timeseries_info.Frequency),
- values=np.concatenate([training_values, test_values]),
- meta={'seasonal_pattern': sp}
- )
+ timeseries = Timeseries(
+ id=timeseries_id,
+ start_date=parsed_date,
+ time_unit=time_unit,
+ frequency=frequency,
+ period=int(timeseries_info.Frequency),
+ values=np.concatenate([training_values, test_values]),
+ meta={'seasonal_pattern': sp},
+ )
all_timeseries.append(timeseries)
return TimeseriesBundle(all_timeseries)
@@ -113,7 +135,9 @@ def download(self) -> TimeseriesBundle:
def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]:
bundle = self.load_cache()
horizons_map = M4Meta().horizons_map()
- return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]))
+ return bundle.split(
+ lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])
+ )
@staticmethod
def filter(bundle: TimeseriesBundle, seasonal_pattern: str) -> TimeseriesBundle:
diff --git a/benchmark/metalearned/resources/m4/evaluator.py b/benchmark/metalearned/resources/m4/evaluator.py
index 194f65f..7225056 100644
--- a/benchmark/metalearned/resources/m4/evaluator.py
+++ b/benchmark/metalearned/resources/m4/evaluator.py
@@ -4,8 +4,7 @@
import numpy as np
import pandas as pd
-
-from common.evaluator import Evaluator, EvaluationResult
+from common.evaluator import EvaluationResult, Evaluator
from common.metrics import mase, smape_2
from common.timeseries import TimeseriesBundle
from common.utils import clean_nans, round_half_up
@@ -21,11 +20,19 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult:
insamples, _ = M4Dataset(M4Meta.dataset_path).standard_split()
if self.validation:
horizons_map = M4Meta().horizons_map()
- insamples, _ = insamples.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]))
-
- grouped_smapes = {sp: np.mean(smape_2(forecast=np.array(M4Dataset.filter(forecast, sp).values()),
- target=np.array(M4Dataset.filter(self.test_set, sp).values())))
- for sp in M4Meta.seasonal_patterns}
+ insamples, _ = insamples.split(
+ lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])
+ )
+
+ grouped_smapes = {
+ sp: np.mean(
+ smape_2(
+ forecast=np.array(M4Dataset.filter(forecast, sp).values()),
+ target=np.array(M4Dataset.filter(self.test_set, sp).values()),
+ )
+ )
+ for sp in M4Meta.seasonal_patterns
+ }
grouped_smapes = self.summarize_groups(grouped_smapes)
@@ -33,7 +40,8 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult:
grouped_owa = OrderedDict()
if not self.validation:
naive2_forecasts = pd.read_csv(
- os.path.join(M4Meta.dataset_path, 'submission-Naive2.csv'))
+ os.path.join(M4Meta.dataset_path, 'submission-Naive2.csv')
+ )
naive2_forecasts.set_index(keys='id', inplace=True)
model_mases = {}
@@ -41,32 +49,56 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult:
naive2_mases = {}
for sp in M4Meta.seasonal_patterns:
model_forecasts = M4Dataset.filter(forecast, sp)
- naive2_forecast = clean_nans(naive2_forecasts.loc[model_forecasts.ids()].values)
+ naive2_forecast = clean_nans(
+ naive2_forecasts.loc[model_forecasts.ids()].values
+ )
model_forecast_values = model_forecasts.values()
- target = self.test_set.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
+ target = self.test_set.filter(
+ lambda ts: ts.meta['seasonal_pattern'] == sp
+ )
target_values = np.array(target.values())
# all timeseries within group have same frequency
period = target.period()[0]
insample = M4Dataset.filter(insamples, sp).values()
- model_mases[sp] = np.mean([mase(forecast=model_forecast_values[i],
- insample=insample[i],
- outsample=target_values[i],
- frequency=period) for i in range(len(model_forecast_values))])
- naive2_mases[sp] = np.mean([mase(forecast=naive2_forecast[i],
- insample=insample[i],
- outsample=target_values[i],
- frequency=period) for i in range(len(model_forecast_values))])
+ model_mases[sp] = np.mean(
+ [
+ mase(
+ forecast=model_forecast_values[i],
+ insample=insample[i],
+ outsample=target_values[i],
+ frequency=period,
+ )
+ for i in range(len(model_forecast_values))
+ ]
+ )
+ naive2_mases[sp] = np.mean(
+ [
+ mase(
+ forecast=naive2_forecast[i],
+ insample=insample[i],
+ outsample=target_values[i],
+ frequency=period,
+ )
+ for i in range(len(model_forecast_values))
+ ]
+ )
naive2_smapes[sp] = np.mean(smape_2(naive2_forecast, target_values))
grouped_model_mases = self.summarize_groups(model_mases)
grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
grouped_naive2_mases = self.summarize_groups(naive2_mases)
for k in grouped_model_mases.keys():
- grouped_owa[k] = round_half_up((grouped_model_mases[k] / grouped_naive2_mases[k] +
- grouped_smapes[k] / grouped_naive2_smapes[k]) / 2, 3)
+ grouped_owa[k] = round_half_up(
+ (
+ grouped_model_mases[k] / grouped_naive2_mases[k]
+ + grouped_smapes[k] / grouped_naive2_smapes[k]
+ )
+ / 2,
+ 3,
+ )
return self.round_values(grouped_owa)
else:
return self.round_values(grouped_smapes)
@@ -76,7 +108,9 @@ def summarize_groups(self, scores):
weighted_score = {}
for sp in ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']:
- weighted_score[sp] = scores[sp] * len(M4Dataset.filter(self.test_set, sp).timeseries)
+ weighted_score[sp] = scores[sp] * len(
+ M4Dataset.filter(self.test_set, sp).timeseries
+ )
scores_summary[sp] = scores[sp]
others_score = 0
diff --git a/benchmark/metalearned/resources/tourism/dataset.py b/benchmark/metalearned/resources/tourism/dataset.py
index 14864a9..f96fc00 100644
--- a/benchmark/metalearned/resources/tourism/dataset.py
+++ b/benchmark/metalearned/resources/tourism/dataset.py
@@ -6,9 +6,14 @@
import numpy as np
import pandas as pd
import patoolib
-
from common.settings import RESOURCES_DIR
-from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month
+from common.timeseries import (
+ Month,
+ Timeseries,
+ TimeseriesBundle,
+ TimeseriesLoader,
+ Year,
+)
from common.utils import download_url
@@ -29,74 +34,106 @@ def period_map(self):
class TourismDataset(TimeseriesLoader):
def download(self) -> TimeseriesBundle:
archive_file = os.path.join(self.path, 'm3.zip')
- download_url('https://robjhyndman.com/data/27-3-Athanasopoulos1.zip', archive_file)
+ download_url(
+ 'https://robjhyndman.com/data/27-3-Athanasopoulos1.zip', archive_file
+ )
patoolib.extract_archive(archive_file, outdir=self.path)
timeseries = []
# Yearly
- insample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'yearly_in.csv'), header=0)
- outsample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'yearly_oos.csv'), header=0)
+ insample = pd.read_csv(
+ os.path.join(TourismMeta.dataset_path, 'yearly_in.csv'), header=0
+ )
+ outsample = pd.read_csv(
+ os.path.join(TourismMeta.dataset_path, 'yearly_oos.csv'), header=0
+ )
outsampleT = outsample.T
for timeseries_id, ts_row in insample.T.iterrows():
outsample_row = outsampleT.loc[timeseries_id].values
start_date = datetime.strptime(str(int(ts_row[[1]])), '%Y')
- insample_values = ts_row.values[2:2 + int(ts_row[[0]])]
- outsample_values = outsample_row[2:2 + int(outsample_row[0])]
+ insample_values = ts_row.values[2 : 2 + int(ts_row[[0]])]
+ outsample_values = outsample_row[2 : 2 + int(outsample_row[0])]
values = np.concatenate([insample_values, outsample_values])
- timeseries.append(Timeseries(id=timeseries_id,
- start_date=start_date,
- time_unit=Year(),
- frequency=1,
- period=1,
- values=values,
- meta={'seasonal_pattern': 'Yearly'}))
+ timeseries.append(
+ Timeseries(
+ id=timeseries_id,
+ start_date=start_date,
+ time_unit=Year(),
+ frequency=1,
+ period=1,
+ values=values,
+ meta={'seasonal_pattern': 'Yearly'},
+ )
+ )
# Quarterly
- insample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'quarterly_in.csv'), header=0)
- outsample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'quarterly_oos.csv'), header=0)
+ insample = pd.read_csv(
+ os.path.join(TourismMeta.dataset_path, 'quarterly_in.csv'), header=0
+ )
+ outsample = pd.read_csv(
+ os.path.join(TourismMeta.dataset_path, 'quarterly_oos.csv'), header=0
+ )
outsampleT = outsample.T
for timeseries_id, ts_row in insample.T.iterrows():
outsample_row = outsampleT.loc[timeseries_id].values
- start_date = datetime.strptime(f'{str(int(ts_row[[1]]))}-{str((int(ts_row[[2]]) - 1) * 3)}', '%Y-%M')
- insample_values = ts_row.values[3:3 + int(ts_row[[0]])]
- outsample_values = outsample_row[3:3 + int(outsample_row[0])]
+ start_date = datetime.strptime(
+ f'{str(int(ts_row[[1]]))}-{str((int(ts_row[[2]]) - 1) * 3)}', '%Y-%M'
+ )
+ insample_values = ts_row.values[3 : 3 + int(ts_row[[0]])]
+ outsample_values = outsample_row[3 : 3 + int(outsample_row[0])]
values = np.concatenate([insample_values, outsample_values])
- timeseries.append(Timeseries(id=timeseries_id,
- start_date=start_date,
- time_unit=Month(),
- frequency=3,
- period=1,
- values=values,
- meta={'seasonal_pattern': 'Quarterly'}))
+ timeseries.append(
+ Timeseries(
+ id=timeseries_id,
+ start_date=start_date,
+ time_unit=Month(),
+ frequency=3,
+ period=1,
+ values=values,
+ meta={'seasonal_pattern': 'Quarterly'},
+ )
+ )
# Monthly
- insample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'monthly_in.csv'), header=0)
- outsample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'monthly_oos.csv'), header=0)
+ insample = pd.read_csv(
+ os.path.join(TourismMeta.dataset_path, 'monthly_in.csv'), header=0
+ )
+ outsample = pd.read_csv(
+ os.path.join(TourismMeta.dataset_path, 'monthly_oos.csv'), header=0
+ )
outsampleT = outsample.T
for timeseries_id, ts_row in insample.T.iterrows():
outsample_row = outsampleT.loc[timeseries_id].values
- start_date = datetime.strptime(f'{str(int(ts_row[[1]]))}-{str(int(ts_row[[2]]))}', '%Y-%M')
- insample_values = ts_row.values[3:3 + int(ts_row[[0]])]
- outsample_values = outsample_row[3:3 + int(outsample_row[0])]
+ start_date = datetime.strptime(
+ f'{str(int(ts_row[[1]]))}-{str(int(ts_row[[2]]))}', '%Y-%M'
+ )
+ insample_values = ts_row.values[3 : 3 + int(ts_row[[0]])]
+ outsample_values = outsample_row[3 : 3 + int(outsample_row[0])]
values = np.concatenate([insample_values, outsample_values])
- timeseries.append(Timeseries(id=timeseries_id,
- start_date=start_date,
- time_unit=Month(),
- frequency=1,
- period=1,
- values=values,
- meta={'seasonal_pattern': 'Monthly'}))
+ timeseries.append(
+ Timeseries(
+ id=timeseries_id,
+ start_date=start_date,
+ time_unit=Month(),
+ frequency=1,
+ period=1,
+ values=values,
+ meta={'seasonal_pattern': 'Monthly'},
+ )
+ )
return TimeseriesBundle(timeseries)
def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]:
bundle = self.load_cache()
horizons_map = TourismMeta().horizons_map()
- return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]))
+ return bundle.split(
+ lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])
+ )
if __name__ == '__main__':
diff --git a/benchmark/metalearned/resources/tourism/evaluator.py b/benchmark/metalearned/resources/tourism/evaluator.py
index 1f0df89..0763cbb 100644
--- a/benchmark/metalearned/resources/tourism/evaluator.py
+++ b/benchmark/metalearned/resources/tourism/evaluator.py
@@ -3,8 +3,7 @@
from typing import Callable
import numpy as np
-
-from common.evaluator import Evaluator, EvaluationResult
+from common.evaluator import EvaluationResult, Evaluator
from common.metrics import mape
from common.timeseries import TimeseriesBundle
from resources.tourism.dataset import TourismMeta
@@ -22,8 +21,12 @@ def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult:
offset = 0
for sp in TourismMeta.seasonal_patterns:
- target_for_sp = self.test_set.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
- forecast_for_sp = forecasts.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
+ target_for_sp = self.test_set.filter(
+ lambda ts: ts.meta['seasonal_pattern'] == sp
+ )
+ forecast_for_sp = forecasts.filter(
+ lambda ts: ts.meta['seasonal_pattern'] == sp
+ )
target = np.array(target_for_sp.values())
forecast = np.array(forecast_for_sp.values())
@@ -39,5 +42,7 @@ def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult:
results[sp] = round(float(np.mean(score)), self.precision)
offset += len(target)
- results['Average'] = round(cumulative_metrics / cumulative_points, self.precision)
+ results['Average'] = round(
+ cumulative_metrics / cumulative_points, self.precision
+ )
return results
diff --git a/benchmark/metalearned/resources/traffic/dataset.py b/benchmark/metalearned/resources/traffic/dataset.py
index 66709ab..1c4ad5b 100644
--- a/benchmark/metalearned/resources/traffic/dataset.py
+++ b/benchmark/metalearned/resources/traffic/dataset.py
@@ -5,12 +5,10 @@
import numpy as np
import patoolib
-from tqdm import tqdm
-
from common.settings import RESOURCES_DIR
-from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Hour
+from common.timeseries import Hour, Timeseries, TimeseriesBundle, TimeseriesLoader
from common.utils import download_url
-
+from tqdm import tqdm
"""
Hourly aggregated dataset from https://archive.ics.uci.edu/ml/datasets/PEMS-SF
@@ -19,6 +17,8 @@
Dataset was also compared with the one built by the TRMF paper's author:
https://github.com/rofuyu/exp-trmf-nips16/blob/master/python/exp-scripts/datasets/download-data.sh
"""
+
+
@dataclass(frozen=True)
class TrafficMeta:
dataset_path = os.path.join(RESOURCES_DIR, 'traffic')
@@ -38,8 +38,10 @@ def download(self) -> TimeseriesBundle:
train_raw_file = os.path.join(self.path, 'PEMS_train')
test_raw_file = os.path.join(self.path, 'PEMS_test')
perm_raw_file = os.path.join(self.path, 'randperm')
- download_url('https://archive.ics.uci.edu/ml/machine-learning-databases/00204/PEMS-SF.zip',
- archive_file)
+ download_url(
+ 'https://archive.ics.uci.edu/ml/machine-learning-databases/00204/PEMS-SF.zip',
+ archive_file,
+ )
patoolib.extract_archive(archive_file, outdir=self.path)
with open(train_raw_file, 'r') as f:
train_raw_data = f.readlines()
@@ -47,7 +49,9 @@ def download(self) -> TimeseriesBundle:
test_raw_data = f.readlines()
with open(perm_raw_file, 'r') as f:
permutations = f.readlines()
- permutations = np.array(permutations[0].rstrip()[1:-1].split(' ')).astype(np.int)
+ permutations = np.array(permutations[0].rstrip()[1:-1].split(' ')).astype(
+ np.int
+ )
raw_data = train_raw_data + test_raw_data
@@ -77,7 +81,9 @@ def download(self) -> TimeseriesBundle:
# - Mar. 8, 2009 - Anomaly
# ------------------------------------------
# Thus 455 - 15 = 440 days from 2008-01-01 to 2008-03-30 (incl.)
- start_date = datetime.strptime('2008-01-02', '%Y-%m-%d') # 2008-01-01 is a holiday
+ start_date = datetime.strptime(
+ '2008-01-02', '%Y-%m-%d'
+ ) # 2008-01-01 is a holiday
current_date = start_date
excluded_dates = [
datetime.strptime('2008-01-21', '%Y-%m-%d'),
@@ -110,18 +116,27 @@ def download(self) -> TimeseriesBundle:
values = np.concatenate([values, daily], axis=1)
else: # should never be in the first 24*7 records.
# fill gaps with same day of previous week.
- values = np.concatenate([values, values[:, -24 * 7 * 6:-24 * 6 * 6]], axis=1)
+ values = np.concatenate(
+ [values, values[:, -24 * 7 * 6 : -24 * 6 * 6]], axis=1
+ )
current_date += timedelta(days=1)
# aggregate 10 minutes events to hourly
- hourly = np.array([list(map(np.mean, zip(*(iter(lane),) * 6))) for lane in tqdm(values)])
- timeseries = [Timeseries(id=str(i),
- start_date=start_date,
- time_unit=Hour(),
- frequency=1,
- period=24 * 7,
- values=values,
- meta={}) for i, values in enumerate(hourly)]
+ hourly = np.array(
+ [list(map(np.mean, zip(*(iter(lane),) * 6))) for lane in tqdm(values)]
+ )
+ timeseries = [
+ Timeseries(
+ id=str(i),
+ start_date=start_date,
+ time_unit=Hour(),
+ frequency=1,
+ period=24 * 7,
+ values=values,
+ meta={},
+ )
+ for i, values in enumerate(hourly)
+ ]
return TimeseriesBundle(timeseries=timeseries)
def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]:
diff --git a/benchmark/metalearned/resources/traffic/evaluator.py b/benchmark/metalearned/resources/traffic/evaluator.py
index 08e5ea8..f936f84 100644
--- a/benchmark/metalearned/resources/traffic/evaluator.py
+++ b/benchmark/metalearned/resources/traffic/evaluator.py
@@ -2,8 +2,7 @@
from typing import Callable
import numpy as np
-
-from common.evaluator import Evaluator, EvaluationResult
+from common.evaluator import EvaluationResult, Evaluator
from common.metrics import nd
from common.timeseries import TimeseriesBundle
from common.utils import round_half_up
@@ -15,6 +14,11 @@ class TrafficEvaluator(Evaluator):
precision: int = 2
def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult:
- return {'metric': round_half_up(self.metric_fn(np.array(forecasts.values()),
- np.array(self.test_set.values())),
- self.precision)}
+ return {
+ 'metric': round_half_up(
+ self.metric_fn(
+ np.array(forecasts.values()), np.array(self.test_set.values())
+ ),
+ self.precision,
+ )
+ }
diff --git a/benchmark/run.py b/benchmark/run.py
index 9212084..9db5158 100644
--- a/benchmark/run.py
+++ b/benchmark/run.py
@@ -1,60 +1,89 @@
-import numpy as np
+import argparse
import random
-from exp.exp_resolver import resolve_experiment
+import sys
+
+import numpy as np
import torch
-import argparse
-from utils.arg_resolver import resolve_transformer_args, _model_is_transformer, setting_string, resolve_args
-import sys
-sys.path.append("metalearned")
+from exp.exp_resolver import resolve_experiment
+from utils.arg_resolver import (
+ _model_is_transformer,
+ resolve_args,
+ resolve_transformer_args,
+ setting_string,
+)
-def parse():
+sys.path.append('metalearned')
+
+def parse():
parser = argparse.ArgumentParser(
- description='Comparing performance of ForecastPFN to other Time Series Benchmarks')
+ description='Comparing performance of ForecastPFN to other Time Series Benchmarks'
+ )
parser.add_argument('--is_training', type=int, default=1, help='status')
parser.add_argument('--use_gpu', type=bool, default=True, help='status')
parser.add_argument('--itr', type=int, default=1, help='status')
# model settings
- parser.add_argument('--model', type=str, default='ForecastPFN',
- help='model name, options: [ForecastPFN, FEDformer, Autoformer, Informer, Transformer, Arima, Prophet]')
+ parser.add_argument(
+ '--model',
+ type=str,
+ default='ForecastPFN',
+ help='model name, options: [ForecastPFN, FEDformer, Autoformer, Informer, Transformer, Arima, Prophet]',
+ )
# forecasting task
- parser.add_argument('--seq_len', type=int, default=96,
- help='input sequence length')
- parser.add_argument('--label_len', type=int,
- default=48, help='start token length')
- parser.add_argument('--pred_len', type=int, default=96,
- help='prediction sequence length')
-
- parser.add_argument('--time_budget', type=int,
- help='amount of time budget to train the model')
- parser.add_argument('--train_budget', type=int,
- help='length of training sequence')
+ parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
+ parser.add_argument('--label_len', type=int, default=48, help='start token length')
+ parser.add_argument(
+ '--pred_len', type=int, default=96, help='prediction sequence length'
+ )
+
+ parser.add_argument(
+ '--time_budget', type=int, help='amount of time budget to train the model'
+ )
+ parser.add_argument('--train_budget', type=int, help='length of training sequence')
# data loader
- parser.add_argument('--data', type=str,
- default='ETTh1', help='dataset type')
- parser.add_argument('--root_path', type=str,
- default='./dataset/ETT/', help='root path of the data file')
- parser.add_argument('--data_path', type=str,
- default='ETTh1.csv', help='data file')
- parser.add_argument('--target', type=str,
- default='OT', help='name of target column')
- parser.add_argument('--scale', type=bool, default=True,
- help='scale the time series with sklearn.StandardScale()')
+ parser.add_argument('--data', type=str, default='ETTh1', help='dataset type')
+ parser.add_argument(
+ '--root_path',
+ type=str,
+ default='./dataset/ETT/',
+ help='root path of the data file',
+ )
+ parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
+ parser.add_argument(
+ '--target', type=str, default='OT', help='name of target column'
+ )
+ parser.add_argument(
+ '--scale',
+ type=bool,
+ default=True,
+ help='scale the time series with sklearn.StandardScale()',
+ )
# ForecastPFN
- parser.add_argument('--model_path', type=str, default='s3://realityengines.datasets/forecasting/pretrained/gurnoor/models/20230202-025828/ckpts',
- help='encoder input size')
- parser.add_argument('--scaler', type=str, default='standard',
- help='scale the test series with sklearn.StandardScale()')
+ parser.add_argument(
+ '--model_path',
+ type=str,
+ default='s3://realityengines.datasets/forecasting/pretrained/gurnoor/models/20230202-025828/ckpts',
+ help='encoder input size',
+ )
+ parser.add_argument(
+ '--scaler',
+ type=str,
+ default='standard',
+ help='scale the test series with sklearn.StandardScale()',
+ )
# Metalearn
- parser.add_argument('--metalearn_freq', type=str,
- help='which type of model should be used for the Metalearn model. Typically M, W, or D.')
+ parser.add_argument(
+ '--metalearn_freq',
+ type=str,
+ help='which type of model should be used for the Metalearn model. Typically M, W, or D.',
+ )
return parser
@@ -71,7 +100,7 @@ def main():
args = resolve_args(args)
if _model_is_transformer(args.model):
args = resolve_transformer_args(args)
-
+
if args.model != 'ForecastPFN':
args.model_name = None
else:
@@ -85,7 +114,6 @@ def main():
args.device_ids = [int(id_) for id_ in device_ids]
args.gpu = args.device_ids[0]
-
print('Args in experiment:')
print(args)
@@ -96,10 +124,14 @@ def main():
# setting record of experiments
setting = setting_string(args, ii)
- print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
+ print(
+ '>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)
+ )
exp.train(setting)
- print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
+ print(
+ '>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)
+ )
exp.test(setting)
torch.cuda.empty_cache()
@@ -107,12 +139,11 @@ def main():
else:
ii = 0
setting = setting_string(args, ii)
-
print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
exp.test(setting, test=1)
torch.cuda.empty_cache()
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/benchmark/transformer_models/model_resolver.py b/benchmark/transformer_models/model_resolver.py
index bf5e9a2..4a6f4d3 100644
--- a/benchmark/transformer_models/model_resolver.py
+++ b/benchmark/transformer_models/model_resolver.py
@@ -1,17 +1,15 @@
-import pandas as pd
-import numpy as np
-import prophet
import pmdarima
+import prophet
-from transformer_models.models import FEDformer, Autoformer, Informer, Transformer
+from transformer_models.models import Autoformer, FEDformer, Informer, Transformer
-class Arima():
+class Arima:
def __init__(self) -> None:
self.model = pmdarima.auto_arima
-class Prophet():
+class Prophet:
def __init__(self) -> None:
self.model = prophet.Prophet()
diff --git a/benchmark/transformer_models/models/Autoformer.py b/benchmark/transformer_models/models/Autoformer.py
index 352ef14..2cc4f94 100644
--- a/benchmark/transformer_models/models/Autoformer.py
+++ b/benchmark/transformer_models/models/Autoformer.py
@@ -5,10 +5,17 @@
import torch
import torch.nn as nn
-import torch.nn.functional as F
-from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
+
from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
-from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
+from layers.Autoformer_EncDec import (
+ Decoder,
+ DecoderLayer,
+ Encoder,
+ EncoderLayer,
+ my_Layernorm,
+ series_decomp,
+)
+from layers.Embed import DataEmbedding_wo_pos
class Model(nn.Module):
@@ -16,6 +23,7 @@ class Model(nn.Module):
Autoformer is the first method to achieve the series-wise connection,
with inherent O(LlogL) complexity
"""
+
def __init__(self, configs):
super(Model, self).__init__()
self.seq_len = configs.seq_len
@@ -30,40 +38,69 @@ def __init__(self, configs):
# Embedding
# The series-wise connection inherently contains the sequential information.
# Thus, we can discard the position embedding of transformers.
- self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
- self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
+ self.enc_embedding = DataEmbedding_wo_pos(
+ configs.enc_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
+ self.dec_embedding = DataEmbedding_wo_pos(
+ configs.dec_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AutoCorrelationLayer(
- AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
- output_attention=configs.output_attention),
- configs.d_model, configs.n_heads),
+ AutoCorrelation(
+ False,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=configs.output_attention,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
configs.d_model,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
- activation=configs.activation
- ) for l in range(configs.e_layers)
+ activation=configs.activation,
+ )
+ for l in range(configs.e_layers)
],
- norm_layer=my_Layernorm(configs.d_model)
+ norm_layer=my_Layernorm(configs.d_model),
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AutoCorrelationLayer(
- AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout,
- output_attention=False),
- configs.d_model, configs.n_heads),
+ AutoCorrelation(
+ True,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=False,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
AutoCorrelationLayer(
- AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
- output_attention=False),
- configs.d_model, configs.n_heads),
+ AutoCorrelation(
+ False,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=False,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
configs.d_model,
configs.c_out,
configs.d_ff,
@@ -74,29 +111,46 @@ def __init__(self, configs):
for l in range(configs.d_layers)
],
norm_layer=my_Layernorm(configs.d_model),
- projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+ projection=nn.Linear(configs.d_model, configs.c_out, bias=True),
)
- def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
- enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
+ def forward(
+ self,
+ x_enc,
+ x_mark_enc,
+ x_dec,
+ x_mark_dec,
+ enc_self_mask=None,
+ dec_self_mask=None,
+ dec_enc_mask=None,
+ ):
# decomp init
mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
- zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device)
+ zeros = torch.zeros(
+ [x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device
+ )
seasonal_init, trend_init = self.decomp(x_enc)
# decoder input
- trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
- seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
+ trend_init = torch.cat([trend_init[:, -self.label_len :, :], mean], dim=1)
+ seasonal_init = torch.cat(
+ [seasonal_init[:, -self.label_len :, :], zeros], dim=1
+ )
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
# dec
dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
- seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
- trend=trend_init)
+ seasonal_part, trend_part = self.decoder(
+ dec_out,
+ enc_out,
+ x_mask=dec_self_mask,
+ cross_mask=dec_enc_mask,
+ trend=trend_init,
+ )
# final
dec_out = trend_part + seasonal_part
if self.output_attention:
- return dec_out[:, -self.pred_len:, :], attns
+ return dec_out[:, -self.pred_len :, :], attns
else:
- return dec_out[:, -self.pred_len:, :]
\ No newline at end of file
+ return dec_out[:, -self.pred_len :, :]
diff --git a/benchmark/transformer_models/models/FEDformer.py b/benchmark/transformer_models/models/FEDformer.py
index 21c11ce..baf281d 100644
--- a/benchmark/transformer_models/models/FEDformer.py
+++ b/benchmark/transformer_models/models/FEDformer.py
@@ -1,23 +1,29 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
-from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
-from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
+
+from layers.AutoCorrelation import AutoCorrelationLayer
+from layers.Autoformer_EncDec import (
+ Decoder,
+ DecoderLayer,
+ Encoder,
+ EncoderLayer,
+ my_Layernorm,
+ series_decomp,
+ series_decomp_multi,
+)
+from layers.Embed import DataEmbedding_wo_pos
from layers.FourierCorrelation import FourierBlock, FourierCrossAttention
from layers.MultiWaveletCorrelation import MultiWaveletCross, MultiWaveletTransform
-from layers.SelfAttention_Family import FullAttention, ProbAttention
-from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp, series_decomp_multi
-import math
-import numpy as np
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class Model(nn.Module):
"""
FEDformer performs the attention mechanism on frequency domain and achieved O(N) complexity
"""
+
def __init__(self, configs):
super(Model, self).__init__()
self.version = configs.version
@@ -38,70 +44,94 @@ def __init__(self, configs):
# Embedding
# The series-wise connection inherently contains the sequential information.
# Thus, we can discard the position embedding of transformers.
- self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
- self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
+ self.enc_embedding = DataEmbedding_wo_pos(
+ configs.enc_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
+ self.dec_embedding = DataEmbedding_wo_pos(
+ configs.dec_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
if configs.version == 'Wavelets':
- encoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=configs.L, base=configs.base)
- decoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=configs.L, base=configs.base)
- decoder_cross_att = MultiWaveletCross(in_channels=configs.d_model,
- out_channels=configs.d_model,
- seq_len_q=self.seq_len // 2 + self.pred_len,
- seq_len_kv=self.seq_len,
- modes=configs.modes,
- ich=configs.d_model,
- base=configs.base,
- activation=configs.cross_activation)
+ encoder_self_att = MultiWaveletTransform(
+ ich=configs.d_model, L=configs.L, base=configs.base
+ )
+ decoder_self_att = MultiWaveletTransform(
+ ich=configs.d_model, L=configs.L, base=configs.base
+ )
+ decoder_cross_att = MultiWaveletCross(
+ in_channels=configs.d_model,
+ out_channels=configs.d_model,
+ seq_len_q=self.seq_len // 2 + self.pred_len,
+ seq_len_kv=self.seq_len,
+ modes=configs.modes,
+ ich=configs.d_model,
+ base=configs.base,
+ activation=configs.cross_activation,
+ )
else:
- encoder_self_att = FourierBlock(in_channels=configs.d_model,
- out_channels=configs.d_model,
- seq_len=self.seq_len,
- modes=configs.modes,
- mode_select_method=configs.mode_select)
- decoder_self_att = FourierBlock(in_channels=configs.d_model,
- out_channels=configs.d_model,
- seq_len=self.seq_len//2+self.pred_len,
- modes=configs.modes,
- mode_select_method=configs.mode_select)
- decoder_cross_att = FourierCrossAttention(in_channels=configs.d_model,
- out_channels=configs.d_model,
- seq_len_q=self.seq_len//2+self.pred_len,
- seq_len_kv=self.seq_len,
- modes=configs.modes,
- mode_select_method=configs.mode_select)
+ encoder_self_att = FourierBlock(
+ in_channels=configs.d_model,
+ out_channels=configs.d_model,
+ seq_len=self.seq_len,
+ modes=configs.modes,
+ mode_select_method=configs.mode_select,
+ )
+ decoder_self_att = FourierBlock(
+ in_channels=configs.d_model,
+ out_channels=configs.d_model,
+ seq_len=self.seq_len // 2 + self.pred_len,
+ modes=configs.modes,
+ mode_select_method=configs.mode_select,
+ )
+ decoder_cross_att = FourierCrossAttention(
+ in_channels=configs.d_model,
+ out_channels=configs.d_model,
+ seq_len_q=self.seq_len // 2 + self.pred_len,
+ seq_len_kv=self.seq_len,
+ modes=configs.modes,
+ mode_select_method=configs.mode_select,
+ )
# Encoder
- enc_modes = int(min(configs.modes, configs.seq_len//2))
- dec_modes = int(min(configs.modes, (configs.seq_len//2+configs.pred_len)//2))
+ enc_modes = int(min(configs.modes, configs.seq_len // 2))
+ dec_modes = int(
+ min(configs.modes, (configs.seq_len // 2 + configs.pred_len) // 2)
+ )
print('enc_modes: {}, dec_modes: {}'.format(enc_modes, dec_modes))
self.encoder = Encoder(
[
EncoderLayer(
AutoCorrelationLayer(
- encoder_self_att,
- configs.d_model, configs.n_heads),
-
+ encoder_self_att, configs.d_model, configs.n_heads
+ ),
configs.d_model,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
- activation=configs.activation
- ) for l in range(configs.e_layers)
+ activation=configs.activation,
+ )
+ for l in range(configs.e_layers)
],
- norm_layer=my_Layernorm(configs.d_model)
+ norm_layer=my_Layernorm(configs.d_model),
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AutoCorrelationLayer(
- decoder_self_att,
- configs.d_model, configs.n_heads),
+ decoder_self_att, configs.d_model, configs.n_heads
+ ),
AutoCorrelationLayer(
- decoder_cross_att,
- configs.d_model, configs.n_heads),
+ decoder_cross_att, configs.d_model, configs.n_heads
+ ),
configs.d_model,
configs.c_out,
configs.d_ff,
@@ -112,35 +142,53 @@ def __init__(self, configs):
for l in range(configs.d_layers)
],
norm_layer=my_Layernorm(configs.d_model),
- projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+ projection=nn.Linear(configs.d_model, configs.c_out, bias=True),
)
- def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
- enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
+ def forward(
+ self,
+ x_enc,
+ x_mark_enc,
+ x_dec,
+ x_mark_dec,
+ enc_self_mask=None,
+ dec_self_mask=None,
+ dec_enc_mask=None,
+ ):
# decomp init
mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
- zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]]).to(device) # cuda()
+ torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]]).to(
+ device
+ ) # cuda()
seasonal_init, trend_init = self.decomp(x_enc)
# decoder input
- trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
- seasonal_init = F.pad(seasonal_init[:, -self.label_len:, :], (0, 0, 0, self.pred_len))
+ trend_init = torch.cat([trend_init[:, -self.label_len :, :], mean], dim=1)
+ seasonal_init = F.pad(
+ seasonal_init[:, -self.label_len :, :], (0, 0, 0, self.pred_len)
+ )
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
# dec
dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
- seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
- trend=trend_init)
+ seasonal_part, trend_part = self.decoder(
+ dec_out,
+ enc_out,
+ x_mask=dec_self_mask,
+ cross_mask=dec_enc_mask,
+ trend=trend_init,
+ )
# final
dec_out = trend_part + seasonal_part
if self.output_attention:
- return dec_out[:, -self.pred_len:, :], attns
+ return dec_out[:, -self.pred_len :, :], attns
else:
- return dec_out[:, -self.pred_len:, :] # [B, L, D]
+ return dec_out[:, -self.pred_len :, :] # [B, L, D]
if __name__ == '__main__':
+
class Configs(object):
ab = 0
modes = 32
@@ -177,7 +225,7 @@ class Configs(object):
enc = torch.randn([3, configs.seq_len, 7])
enc_mark = torch.randn([3, configs.seq_len, 4])
- dec = torch.randn([3, configs.seq_len//2+configs.pred_len, 7])
- dec_mark = torch.randn([3, configs.seq_len//2+configs.pred_len, 4])
+ dec = torch.randn([3, configs.seq_len // 2 + configs.pred_len, 7])
+ dec_mark = torch.randn([3, configs.seq_len // 2 + configs.pred_len, 4])
out = model.forward(enc, enc_mark, dec, dec_mark)
print(out)
diff --git a/benchmark/transformer_models/models/Informer.py b/benchmark/transformer_models/models/Informer.py
index 9dfecfe..5d19385 100644
--- a/benchmark/transformer_models/models/Informer.py
+++ b/benchmark/transformer_models/models/Informer.py
@@ -1,59 +1,93 @@
import torch
import torch.nn as nn
-import torch.nn.functional as F
-from utils.masking import TriangularCausalMask, ProbMask
-from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
-from layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer
+
from layers.Embed import DataEmbedding
-import numpy as np
+from layers.SelfAttention_Family import AttentionLayer, ProbAttention
+from layers.Transformer_EncDec import (
+ ConvLayer,
+ Decoder,
+ DecoderLayer,
+ Encoder,
+ EncoderLayer,
+)
class Model(nn.Module):
"""
Informer with Propspare attention in O(LlogL) complexity
"""
+
def __init__(self, configs):
super(Model, self).__init__()
self.pred_len = configs.pred_len
self.output_attention = configs.output_attention
# Embedding
- self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
- self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
+ self.enc_embedding = DataEmbedding(
+ configs.enc_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
+ self.dec_embedding = DataEmbedding(
+ configs.dec_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
- ProbAttention(False, configs.factor, attention_dropout=configs.dropout,
- output_attention=configs.output_attention),
- configs.d_model, configs.n_heads),
+ ProbAttention(
+ False,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=configs.output_attention,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
- activation=configs.activation
- ) for l in range(configs.e_layers)
+ activation=configs.activation,
+ )
+ for l in range(configs.e_layers)
],
- [
- ConvLayer(
- configs.d_model
- ) for l in range(configs.e_layers - 1)
- ] if configs.distil else None,
- norm_layer=torch.nn.LayerNorm(configs.d_model)
+ [ConvLayer(configs.d_model) for l in range(configs.e_layers - 1)]
+ if configs.distil
+ else None,
+ norm_layer=torch.nn.LayerNorm(configs.d_model),
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(
- ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
- configs.d_model, configs.n_heads),
+ ProbAttention(
+ True,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=False,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
AttentionLayer(
- ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
- configs.d_model, configs.n_heads),
+ ProbAttention(
+ False,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=False,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
@@ -62,19 +96,28 @@ def __init__(self, configs):
for l in range(configs.d_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model),
- projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+ projection=nn.Linear(configs.d_model, configs.c_out, bias=True),
)
- def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
- enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
-
+ def forward(
+ self,
+ x_enc,
+ x_mark_enc,
+ x_dec,
+ x_mark_dec,
+ enc_self_mask=None,
+ dec_self_mask=None,
+ dec_enc_mask=None,
+ ):
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
dec_out = self.dec_embedding(x_dec, x_mark_dec)
- dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
+ dec_out = self.decoder(
+ dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask
+ )
if self.output_attention:
- return dec_out[:, -self.pred_len:, :], attns
+ return dec_out[:, -self.pred_len :, :], attns
else:
- return dec_out[:, -self.pred_len:, :]
+ return dec_out[:, -self.pred_len :, :]
diff --git a/benchmark/transformer_models/models/Transformer.py b/benchmark/transformer_models/models/Transformer.py
index 88c4a88..0d1a48c 100644
--- a/benchmark/transformer_models/models/Transformer.py
+++ b/benchmark/transformer_models/models/Transformer.py
@@ -1,50 +1,83 @@
import torch
import torch.nn as nn
-import torch.nn.functional as F
-from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
-from layers.SelfAttention_Family import FullAttention, AttentionLayer
+
from layers.Embed import DataEmbedding
+from layers.SelfAttention_Family import AttentionLayer, FullAttention
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer
class Model(nn.Module):
"""
Vanilla Transformer with O(L^2) complexity
"""
+
def __init__(self, configs):
super(Model, self).__init__()
self.pred_len = configs.pred_len
self.output_attention = configs.output_attention
# Embedding
- self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
- self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
- configs.dropout)
+ self.enc_embedding = DataEmbedding(
+ configs.enc_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
+ self.dec_embedding = DataEmbedding(
+ configs.dec_in,
+ configs.d_model,
+ configs.embed,
+ configs.freq,
+ configs.dropout,
+ )
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
- FullAttention(False, configs.factor, attention_dropout=configs.dropout,
- output_attention=configs.output_attention), configs.d_model, configs.n_heads),
+ FullAttention(
+ False,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=configs.output_attention,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
- activation=configs.activation
- ) for l in range(configs.e_layers)
+ activation=configs.activation,
+ )
+ for l in range(configs.e_layers)
],
- norm_layer=torch.nn.LayerNorm(configs.d_model)
+ norm_layer=torch.nn.LayerNorm(configs.d_model),
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(
- FullAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
- configs.d_model, configs.n_heads),
+ FullAttention(
+ True,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=False,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
AttentionLayer(
- FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
- configs.d_model, configs.n_heads),
+ FullAttention(
+ False,
+ configs.factor,
+ attention_dropout=configs.dropout,
+ output_attention=False,
+ ),
+ configs.d_model,
+ configs.n_heads,
+ ),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
@@ -53,19 +86,28 @@ def __init__(self, configs):
for l in range(configs.d_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model),
- projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+ projection=nn.Linear(configs.d_model, configs.c_out, bias=True),
)
- def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
- enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
-
+ def forward(
+ self,
+ x_enc,
+ x_mark_enc,
+ x_dec,
+ x_mark_dec,
+ enc_self_mask=None,
+ dec_self_mask=None,
+ dec_enc_mask=None,
+ ):
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
dec_out = self.dec_embedding(x_dec, x_mark_dec)
- dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
+ dec_out = self.decoder(
+ dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask
+ )
if self.output_attention:
- return dec_out[:, -self.pred_len:, :], attns
+ return dec_out[:, -self.pred_len :, :], attns
else:
- return dec_out[:, -self.pred_len:, :]
+ return dec_out[:, -self.pred_len :, :]
diff --git a/benchmark/utils/arg_resolver.py b/benchmark/utils/arg_resolver.py
index 3b73ca1..32e1ad2 100644
--- a/benchmark/utils/arg_resolver.py
+++ b/benchmark/utils/arg_resolver.py
@@ -1,21 +1,34 @@
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+
def _model_is_transformer(model):
- if model in ['FEDformer', 'FEDformer-f', 'FEDformer-w', 'FEDformer_Meta', 'Autoformer', 'Informer', 'Transformer']:
+ if model in [
+ 'FEDformer',
+ 'FEDformer-f',
+ 'FEDformer-w',
+ 'FEDformer_Meta',
+ 'Autoformer',
+ 'Informer',
+ 'Transformer',
+ ]:
return True
return False
+
def setting_string(args, ii):
- setting = '{}_{}_sl{}_ll{}_pl{}_timebudget_{}_trainbudget_{}_model-path_{}_itr_{}'.format(
- args.model,
- args.data,
- args.seq_len,
- args.label_len,
- args.pred_len,
- args.time_budget,
- args.train_budget,
- args.model_name,
- ii)
+ setting = (
+ '{}_{}_sl{}_ll{}_pl{}_timebudget_{}_trainbudget_{}_model-path_{}_itr_{}'.format(
+ args.model,
+ args.data,
+ args.seq_len,
+ args.label_len,
+ args.pred_len,
+ args.time_budget,
+ args.train_budget,
+ args.model_name,
+ ii,
+ )
+ )
return setting
@@ -36,7 +49,6 @@ def resolve_args(args):
return args
-
def resolve_transformer_args(args):
args.mode_select = 'random'
args.modes = 64
diff --git a/benchmark/utils/masking.py b/benchmark/utils/masking.py
index 4f768bd..6d68561 100644
--- a/benchmark/utils/masking.py
+++ b/benchmark/utils/masking.py
@@ -1,39 +1,49 @@
-import torch
-import numpy as np
import math
-class TriangularCausalMask():
- def __init__(self, B, L, device="cpu"):
+import numpy as np
+import torch
+
+
+class TriangularCausalMask:
+ def __init__(self, B, L, device='cpu'):
mask_shape = [B, 1, L, L]
with torch.no_grad():
- self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
+ self._mask = torch.triu(
+ torch.ones(mask_shape, dtype=torch.bool), diagonal=1
+ ).to(device)
@property
def mask(self):
return self._mask
-class ProbMask():
- def __init__(self, B, H, L, index, scores, device="cpu"):
+class ProbMask:
+ def __init__(self, B, H, L, index, scores, device='cpu'):
_mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
_mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
- indicator = _mask_ex[torch.arange(B)[:, None, None],
- torch.arange(H)[None, :, None],
- index, :].to(device)
+ indicator = _mask_ex[
+ torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
+ ].to(device)
self._mask = indicator.view(scores.shape).to(device)
@property
def mask(self):
return self._mask
-class LocalMask():
- def __init__(self, B, L,S,device="cpu"):
+
+class LocalMask:
+ def __init__(self, B, L, S, device='cpu'):
mask_shape = [B, 1, L, S]
with torch.no_grad():
self.len = math.ceil(np.log2(L))
- self._mask1 = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
- self._mask2 = ~torch.triu(torch.ones(mask_shape,dtype=torch.bool),diagonal=-self.len).to(device)
- self._mask = self._mask1+self._mask2
+ self._mask1 = torch.triu(
+ torch.ones(mask_shape, dtype=torch.bool), diagonal=1
+ ).to(device)
+ self._mask2 = ~torch.triu(
+ torch.ones(mask_shape, dtype=torch.bool), diagonal=-self.len
+ ).to(device)
+ self._mask = self._mask1 + self._mask2
+
@property
def mask(self):
- return self._mask
\ No newline at end of file
+ return self._mask
diff --git a/benchmark/utils/metrics.py b/benchmark/utils/metrics.py
index 3ab2e13..2befaa7 100644
--- a/benchmark/utils/metrics.py
+++ b/benchmark/utils/metrics.py
@@ -4,7 +4,9 @@
def RSE(pred, true):
- return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
+ return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(
+ np.sum((true - true.mean()) ** 2)
+ )
def CORR(pred, true):
@@ -42,20 +44,20 @@ def metric(pred, true):
return mae, mse, rmse, mape, mspe
+
def smape(y_true, y_pred):
- """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
- `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)`
- Args:
- y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
- y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
- Returns:
- Symmetric mean absolute percentage error values. shape = `[batch_size, d0, ..
- dN-1]`.
- """
+ """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
+ `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)`
+ Args:
+ y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+ y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+ Returns:
+ Symmetric mean absolute percentage error values. shape = `[batch_size, d0, ..
+ dN-1]`.
+ """
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
diff = tf.abs(
- (y_true - y_pred) /
- backend.maximum(y_true + y_pred, backend.epsilon())
+ (y_true - y_pred) / backend.maximum(y_true + y_pred, backend.epsilon())
)
return 200.0 * backend.mean(diff, axis=-1)
diff --git a/benchmark/utils/timefeatures.py b/benchmark/utils/timefeatures.py
index 0e93870..165df2d 100644
--- a/benchmark/utils/timefeatures.py
+++ b/benchmark/utils/timefeatures.py
@@ -14,63 +14,63 @@ def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
pass
def __repr__(self):
- return self.__class__.__name__ + "()"
+ return self.__class__.__name__ + '()'
class SecondOfMinute(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [x.second / 59.0 - 0.5 if x!=0 else 0 for x in index]
+ return [x.second / 59.0 - 0.5 if x != 0 else 0 for x in index]
class MinuteOfHour(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [x.minute / 59.0 - 0.5 if x!=0 else 0 for x in index]
+ return [x.minute / 59.0 - 0.5 if x != 0 else 0 for x in index]
class HourOfDay(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [x.hour / 23.0 - 0.5 if x!=0 else 0 for x in index]
+ return [x.hour / 23.0 - 0.5 if x != 0 else 0 for x in index]
class DayOfWeek(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [x.dayofweek / 6.0 - 0.5 if x!=0 else 0 for x in index]
+ return [x.dayofweek / 6.0 - 0.5 if x != 0 else 0 for x in index]
class DayOfMonth(TimeFeature):
"""Day of month encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [(x.day - 1) / 30.0 - 0.5 if x!=0 else 0 for x in index]
+ return [(x.day - 1) / 30.0 - 0.5 if x != 0 else 0 for x in index]
class DayOfYear(TimeFeature):
"""Day of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [(x.dayofyear - 1) / 365.0 - 0.5 if x!=0 else 0 for x in index]
+ return [(x.dayofyear - 1) / 365.0 - 0.5 if x != 0 else 0 for x in index]
class MonthOfYear(TimeFeature):
"""Month of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [(x.month - 1) / 11.0 - 0.5 if x!=0 else 0 for x in index]
+ return [(x.month - 1) / 11.0 - 0.5 if x != 0 else 0 for x in index]
class WeekOfYear(TimeFeature):
"""Week of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return [(x.isocalendar().week - 1) / 52.0 - 0.5 if x!=0 else 0 for x in index]
+ return [(x.isocalendar().week - 1) / 52.0 - 0.5 if x != 0 else 0 for x in index]
def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
diff --git a/benchmark/utils/tools.py b/benchmark/utils/tools.py
index e74b399..d3e018b 100644
--- a/benchmark/utils/tools.py
+++ b/benchmark/utils/tools.py
@@ -1,7 +1,8 @@
+import time
+
+import matplotlib.pyplot as plt
import numpy as np
import torch
-import matplotlib.pyplot as plt
-import time
plt.switch_backend('agg')
@@ -11,11 +12,8 @@ def adjust_learning_rate(optimizer, epoch, args):
if args.lradj == 'type1':
lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
elif args.lradj == 'type2':
- lr_adjust = {
- 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
- 10: 5e-7, 15: 1e-7, 20: 5e-8
- }
- elif args.lradj =='type3':
+ lr_adjust = {2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 10: 5e-7, 15: 1e-7, 20: 5e-8}
+ elif args.lradj == 'type3':
lr_adjust = {epoch: args.learning_rate}
elif args.lradj == 'type4':
lr_adjust = {epoch: args.learning_rate * (0.9 ** ((epoch - 1) // 1))}
@@ -53,13 +51,16 @@ def __call__(self, val_loss, model, path):
def save_checkpoint(self, val_loss, model, path, epoch=None):
if self.verbose:
- print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
+ print(
+ f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...'
+ )
if epoch:
torch.save(model.state_dict(), path + '/' + f'checkpoint_{epoch}.pth')
else:
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
self.val_loss_min = val_loss
+
class TimeBudget:
def __init__(self, budget):
self.budget = budget
@@ -83,14 +84,16 @@ def step(self):
self.end_timer()
return
+
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
+
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
-class StandardScaler():
+class StandardScaler:
def __init__(self, mean, std):
self.mean = mean
self.std = std
diff --git a/src/build_validation_dataset.py b/src/build_validation_dataset.py
index c365865..8e648ca 100644
--- a/src/build_validation_dataset.py
+++ b/src/build_validation_dataset.py
@@ -2,22 +2,27 @@
Module to transform different real world datasets
into format used for our synthetic dataset
"""
-import pandas as pd
-import numpy as np
-import tensorflow as tf
import csv
from datetime import datetime
+from functools import reduce
+
+import numpy as np
+import pandas as pd
+import tensorflow as tf
from dateutil.relativedelta import relativedelta
from tqdm import trange
-from functools import reduce
HISTORY = 100
HORIZON = 10
NUM_TASKS = 3
+
def compute_time_features(ts: np.ndarray):
ts = pd.to_datetime(ts)
- return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
+ return np.stack(
+ [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1
+ )
+
def build_input(ts, target_full, task=1):
# horizon should be fixed as defined in model
@@ -28,11 +33,12 @@ def build_input(ts, target_full, task=1):
target = target_full[:-HORIZON]
target_to_predict = target_full[-HORIZON:]
-
if task == 2:
target_to_predict = np.cumsum(target_to_predict) / (1 + np.arange(HORIZON))
elif task == 3:
- target_to_predict = [np.std(target_to_predict[:i+1]) for i in range(len(target_to_predict))]
+ target_to_predict = [
+ np.std(target_to_predict[: i + 1]) for i in range(len(target_to_predict))
+ ]
# this is the target value of the data before the horizon
target = tf.convert_to_tensor(target, dtype=tf.float32)
@@ -49,15 +55,20 @@ def build_input(ts, target_full, task=1):
return {
'ts': tf.repeat(tf.expand_dims(date_tensor, axis=0), [HORIZON], axis=0),
-
# repeat the before horizon values horizon number of times,
# so that for each of the predictions for each target_ts, you
# have an available set of features
'history': tf.repeat(tf.expand_dims(target, axis=0), [HORIZON], axis=0),
'target_ts': tf.expand_dims(target_dates, axis=1),
- 'task': tf.fill([HORIZON,], task)
+ 'task': tf.fill(
+ [
+ HORIZON,
+ ],
+ task,
+ ),
}, tf.expand_dims(tf.convert_to_tensor(target_to_predict, dtype=tf.float32), axis=1)
+
def read_timeseries_file(filename):
"""
Function to read the standard datasets for time series.
@@ -72,6 +83,7 @@ def read_timeseries_file(filename):
return lines
+
def get_dates(num_days, freq):
dates = []
@@ -84,14 +96,15 @@ def get_dates(num_days, freq):
for _ in range(num_days):
dates.append(pd.to_datetime(current_date))
if freq == 'daily':
- current_date += relativedelta(days = 1)
+ current_date += relativedelta(days=1)
elif freq == 'weekly':
- current_date += relativedelta(weeks = 1)
+ current_date += relativedelta(weeks=1)
elif freq == 'monthly':
- current_date += relativedelta(months = 1)
+ current_date += relativedelta(months=1)
return dates
+
def split_dataset(dataset):
"""
If the size of dataset is n * (HISTORY + HORIZON), we split it
@@ -105,8 +118,8 @@ def split_dataset(dataset):
# otherwise, slide a window starting from the first point
# with a stride of HISTORY // 3 until the elements in
# window are less than HISTORY + HORIZON
- while i == 0 or i+HISTORY+HORIZON < len(dataset):
- mini_datasets.append(dataset[i:i+HISTORY+HORIZON])
+ while i == 0 or i + HISTORY + HORIZON < len(dataset):
+ mini_datasets.append(dataset[i : i + HISTORY + HORIZON])
i += HISTORY // 3
return mini_datasets
@@ -122,11 +135,11 @@ def build_dataset(dataset, freq):
# TODO: change it from 2
# keeping it 2 for testing, as dataset creation takes time
for i in trange(100):
- # for i in trange(len(dataset)):
+ # for i in trange(len(dataset)):
for X in split_dataset(dataset[i]):
dates = get_dates(len(X), freq)
- for task in range(1, NUM_TASKS+1):
+ for task in range(1, NUM_TASKS + 1):
built_input, output = build_input(dates, X, task=task)
ts_list += [ts for ts in built_input['ts']]
@@ -136,15 +149,21 @@ def build_dataset(dataset, freq):
outputs += [y for y in output]
- dataset_frame = tf.data.Dataset.from_tensor_slices(({
- 'ts': ts_list,
- 'history': history_list,
- 'target_ts': target_ts_list,
- 'task': task_list
- }, outputs))
+ dataset_frame = tf.data.Dataset.from_tensor_slices(
+ (
+ {
+ 'ts': ts_list,
+ 'history': history_list,
+ 'target_ts': target_ts_list,
+ 'task': task_list,
+ },
+ outputs,
+ )
+ )
return dataset_frame
+
def construct_dataframe(train_dataset_and_freq):
"""
Function to construct the dataframe in accordance with the training format
@@ -155,22 +174,29 @@ def construct_dataframe(train_dataset_and_freq):
return reduce(lambda df1, df2: df1.concatenate(df2), dfs)
+
def get_validation_dataset():
"""
Function to read data from various sources and feed them as input to
build a dataframe for getting the validation dataset
"""
- wikiweb_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/wikiweb_train.csv")
- tourism_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/tourism_train.csv")
- exchange_rate_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/exchange_rate_train.csv")
- m3_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/m3_train.csv")
+ wikiweb_train = read_timeseries_file(
+ '/home/ubuntu/notebooks/forecasting/pretraining/wikiweb_train.csv'
+ )
+ tourism_train = read_timeseries_file(
+ '/home/ubuntu/notebooks/forecasting/pretraining/tourism_train.csv'
+ )
+ read_timeseries_file(
+ '/home/ubuntu/notebooks/forecasting/pretraining/exchange_rate_train.csv'
+ )
+ read_timeseries_file('/home/ubuntu/notebooks/forecasting/pretraining/m3_train.csv')
# add different datasets and their frequency here
# TODO: addition of monthly dataset shoots up
# validation loss to ~40k. Need to see how to fix that
train_dataset_and_freq = [
- (wikiweb_train, "daily"),
- (tourism_train, "monthly"),
+ (wikiweb_train, 'daily'),
+ (tourism_train, 'monthly'),
# (exchange_rate_train, "daily"),
# (m3_train, "monthly")
]
@@ -179,8 +205,10 @@ def get_validation_dataset():
# print(len(list(constructed_dataframe)))
return constructed_dataframe
+
def main():
get_validation_dataset()
+
if __name__ == '__main__':
main()
diff --git a/src/evaluate_public_datasets/evaluate.py b/src/evaluate_public_datasets/evaluate.py
index cf3055d..e7252d6 100644
--- a/src/evaluate_public_datasets/evaluate.py
+++ b/src/evaluate_public_datasets/evaluate.py
@@ -1,26 +1,29 @@
"""
Module to evaluate the model on real world datasets
"""
-import yaml
import argparse
-import tensorflow as tf
-import tensorflow_io
-import pandas as pd
+
import numpy as np
+import pandas as pd
+import tensorflow as tf
+import yaml
from process_data import read_timeseries_file
-from tqdm import trange
from scipy.stats.mstats import winsorize
-from sklearn.metrics import mean_squared_error, mean_absolute_error
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
-
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+from sklearn.preprocessing import MinMaxScaler
+from tqdm import trange
HISTORY = 100
+
def compute_time_features(ts: np.ndarray):
ts = pd.to_datetime(ts)
- return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
+ return np.stack(
+ [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1
+ )
# return np.stack([ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
+
def build_input(ts, target, task=1):
horizon = len(ts) - len(target)
all_dates = tf.numpy_function(compute_time_features, inp=[ts], Tout=tf.int64)
@@ -41,31 +44,39 @@ def build_input(ts, target, task=1):
target = target[-HISTORY:]
return {
'ts': tf.repeat(tf.expand_dims(date_tensor, axis=0), [horizon], axis=0),
-
# repeat the before horizon values horizon number of times,
# so that for each of the predictions for each target_ts, you
# have an available set of features
'history': tf.repeat(tf.expand_dims(target, axis=0), [horizon], axis=0),
'target_ts': tf.expand_dims(target_dates, axis=1),
- 'task': tf.fill([horizon,], task),
+ 'task': tf.fill(
+ [
+ horizon,
+ ],
+ task,
+ ),
}
+
def evaluate_model(config, train_data, test_data, freq, name):
pretrained = tf.keras.models.load_model(config['model_path'])
BATCH_SIZE = 100
- item_id, pred_start, actual, pred = [], [], [], []
+ _item_id, _pred_start, actual, pred = [], [], [], []
stds = []
wapes = []
for i in trange(0, len(train_data), BATCH_SIZE):
- test_points = train_data[i:(i+BATCH_SIZE)]
+ test_points = train_data[i : (i + BATCH_SIZE)]
for idx, current_point in enumerate(test_points):
-
# timestamps of history
- history_ts = pd.date_range(start='2010-01-01', periods=len(train_data[i+idx] + test_data[i+idx]), freq=freq)
+ history_ts = pd.date_range(
+ start='2010-01-01',
+ periods=len(train_data[i + idx] + test_data[i + idx]),
+ freq=freq,
+ )
# values of history
- history = train_data[i+idx]
+ history = train_data[i + idx]
# mean of history's last 6 values
history_mean = np.nanmean(history[-6:])
@@ -74,7 +85,7 @@ def evaluate_model(config, train_data, test_data, freq, name):
history_std = np.nanstd(history[-6:])
# local scale, don't know why defined so
- local_scale = (history_mean + history_std + 1e-4)
+ local_scale = history_mean + history_std + 1e-4
# change history based on local scale, to normalize it between 0 and 1
history = np.clip(history / local_scale, a_min=0, a_max=1)
@@ -87,19 +98,26 @@ def evaluate_model(config, train_data, test_data, freq, name):
pred_vals = pretrained(build_input(history_ts, history, task=1))
# get scaled mean based on the given history
- scaled_vals = (pred_vals['result'].numpy().reshape(-1) * pred_vals['scale'].numpy().reshape(-1)) * local_scale
-
- if np.mean(np.array(test_data[i+idx])):
- wape = np.mean(np.abs(scaled_vals - np.array(test_data[i+idx]))) / np.mean(np.array(test_data[i+idx]))
+ scaled_vals = (
+ pred_vals['result'].numpy().reshape(-1)
+ * pred_vals['scale'].numpy().reshape(-1)
+ ) * local_scale
+
+ if np.mean(np.array(test_data[i + idx])):
+ wape = np.mean(
+ np.abs(scaled_vals - np.array(test_data[i + idx]))
+ ) / np.mean(np.array(test_data[i + idx]))
wapes.append(wape)
- assert len(scaled_vals) == len(test_data[i+idx])
+ assert len(scaled_vals) == len(test_data[i + idx])
scaler = MinMaxScaler()
- scaler.fit(np.array(train_data[i+idx]).reshape(-1, 1))
+ scaler.fit(np.array(train_data[i + idx]).reshape(-1, 1))
predicted_scaled = scaler.transform(np.array(scaled_vals).reshape(-1, 1))
- actual_scaled = scaler.transform(np.array(test_data[i+idx]).reshape(-1, 1))
+ actual_scaled = scaler.transform(
+ np.array(test_data[i + idx]).reshape(-1, 1)
+ )
stds.append(np.std(actual_scaled))
for pred_val, actual_val in zip(predicted_scaled, actual_scaled):
@@ -108,31 +126,29 @@ def evaluate_model(config, train_data, test_data, freq, name):
pred.append(pred_val)
actual.append(actual_val)
-
-
- eval_clipped_df = pd.DataFrame(dict(
- actual=actual,
- pred=pred
- ))
+ eval_clipped_df = pd.DataFrame(dict(actual=actual, pred=pred))
eval_clipped_df = eval_clipped_df.assign(
cmape=lambda df: np.abs(df.actual - df.pred) / df.actual
).assign(
winsorized_cmape=lambda df: winsorize(df.cmape, (0.01, 0.01)),
- squashed_cmape=lambda df: np.where(df.cmape > 1, 1 + np.log(df.cmape), df.cmape)
+ squashed_cmape=lambda df: np.where(
+ df.cmape > 1, 1 + np.log(df.cmape), df.cmape
+ ),
)
print(eval_clipped_df[(eval_clipped_df.actual > 0)].describe())
# print(wapes)
# print(np.nanmean(wapes))
- print("MAE:", mean_absolute_error(actual, pred))
- print("MSE:", mean_squared_error(actual, pred))
+ print('MAE:', mean_absolute_error(actual, pred))
+ print('MSE:', mean_squared_error(actual, pred))
print(np.mean(stds))
+
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("-c", "--config", required=True, help="Path to config file")
+ parser.add_argument('-c', '--config', required=True, help='Path to config file')
args = parser.parse_args()
with open(args.config) as config_file:
@@ -141,10 +157,8 @@ def main():
train_data = read_timeseries_file(config['train_file'])
test_data = read_timeseries_file(config['test_file'])
-
evaluate_model(config, train_data, test_data, config['freq'], config['name'])
-
if __name__ == '__main__':
main()
diff --git a/src/evaluate_public_datasets/process_data.py b/src/evaluate_public_datasets/process_data.py
index 043c75d..fbf58b7 100644
--- a/src/evaluate_public_datasets/process_data.py
+++ b/src/evaluate_public_datasets/process_data.py
@@ -3,10 +3,10 @@
them as a tfrecords file
"""
import csv
-import tensorflow as tf
-import numpy as np
+
import pandas as pd
+
def read_timeseries_file(filename):
"""
Function to read the standard datasets for time series.
@@ -27,4 +27,3 @@ def generate_tf_test_examples(name, train_data, test_data, freq):
dates = pd.date_range(start='2010-01-01', periods=len_data, freq=freq)
return name, dates, train_data, test_data
-
diff --git a/src/evaluation/evaluate.py b/src/evaluation/evaluate.py
index b3158cd..d337d60 100644
--- a/src/evaluation/evaluate.py
+++ b/src/evaluation/evaluate.py
@@ -2,17 +2,17 @@
Module to evaluate on the customer dataset
"""
-import yaml
import argparse
-import pandas as pd
+
import numpy as np
+import pandas as pd
+import reainternal.mllibs.pipelinelib as PL
import tensorflow as tf
-import tensorflow_io
+import yaml
+from prepare_dataset import build_input
from reainternal import environment
-import reainternal.mllibs.pipelinelib as PL
from scipy.stats.mstats import winsorize
from tqdm import trange
-from prepare_dataset import build_input
def evaluate(config):
@@ -29,12 +29,19 @@ def evaluate(config):
BATCH_SIZE = 100
item_id, pred_start, actual, pred = [], [], [], []
for i in trange(0, len(record_index), BATCH_SIZE):
- test_points = list(model_info.prepared_dataset_instance.get_prediction_records(record_index[i:(i + BATCH_SIZE)]))
+ test_points = list(
+ model_info.prepared_dataset_instance.get_prediction_records(
+ record_index[i : (i + BATCH_SIZE)]
+ )
+ )
for current_point in test_points:
# contains the history of available values and the targets
- prediction_record, _ = model_info.serving_dataset_instance.dataset_class.prepare_data_for_prediction(
- model_info.serving_dataset_instance,
- current_point.model_input)
+ (
+ prediction_record,
+ _,
+ ) = model_info.serving_dataset_instance.dataset_class.prepare_data_for_prediction(
+ model_info.serving_dataset_instance, current_point.model_input
+ )
# timestamps of history
history_ts = prediction_record[ts_col]
@@ -49,7 +56,7 @@ def evaluate(config):
history_std = np.nanstd(history[-6:])
# local scale, don't know why defined so
- local_scale = (history_mean + history_std + 1e-4)
+ local_scale = history_mean + history_std + 1e-4
# change history based on local scale, to normalize it between 0 and 1
history = np.clip(history / local_scale, a_min=0, a_max=1)
@@ -58,34 +65,38 @@ def evaluate(config):
pred_mean = pretrained(build_input(history_ts, history, task=2))
# get scaled mean based on the given history
- scaled_mean = (pred_mean['result'].numpy().reshape(-1) * pred_mean['scale'].numpy().reshape(-1)) * local_scale
+ scaled_mean = (
+ pred_mean['result'].numpy().reshape(-1)
+ * pred_mean['scale'].numpy().reshape(-1)
+ ) * local_scale
item_id.append(current_point.test_info[0])
pred_start.append(current_point.test_info[1])
actual.append(np.mean(current_point.actual[target_col]))
pred.append(scaled_mean[-1])
- eval_clipped_df = pd.DataFrame(dict(
- item_id=item_id,
- pred_start=pred_start,
- actual=actual,
- pred=pred
- ))
+ eval_clipped_df = pd.DataFrame(
+ dict(item_id=item_id, pred_start=pred_start, actual=actual, pred=pred)
+ )
eval_clipped_df = eval_clipped_df.assign(
cmape=lambda df: np.abs(df.actual - df.pred) / df.actual
).assign(
winsorized_cmape=lambda df: winsorize(df.cmape, (0.01, 0.01)),
- squashed_cmape=lambda df: np.where(df.cmape > 1, 1 + np.log(df.cmape), df.cmape)
+ squashed_cmape=lambda df: np.where(
+ df.cmape > 1, 1 + np.log(df.cmape), df.cmape
+ ),
)
- return eval_clipped_df[(eval_clipped_df.actual > 0) & (eval_clipped_df.pred_start == '2021-06-30T00:00:00')].describe()
-
+ return eval_clipped_df[
+ (eval_clipped_df.actual > 0)
+ & (eval_clipped_df.pred_start == '2021-06-30T00:00:00')
+ ].describe()
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("-c", "--config", required=True, help="Path to config file")
+ parser.add_argument('-c', '--config', required=True, help='Path to config file')
args = parser.parse_args()
with open(args.config) as config_file:
@@ -96,5 +107,6 @@ def main():
results_df = evaluate(config)
print(results_df)
+
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
diff --git a/src/evaluation/prepare_dataset.py b/src/evaluation/prepare_dataset.py
index e69f5e9..80de27f 100644
--- a/src/evaluation/prepare_dataset.py
+++ b/src/evaluation/prepare_dataset.py
@@ -1,16 +1,19 @@
"""
Module to prepare customer dataset for evaluation
"""
-import pandas as pd
import numpy as np
+import pandas as pd
import tensorflow as tf
-import tensorflow_io
HISTORY = 100
+
def compute_time_features(ts: np.ndarray):
ts = pd.to_datetime(ts)
- return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
+ return np.stack(
+ [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1
+ )
+
def build_input(ts, target, task=1):
horizon = len(ts) - len(target)
@@ -31,11 +34,15 @@ def build_input(ts, target, task=1):
date_tensor = date_tensor[-HISTORY:]
return {
'ts': tf.repeat(tf.expand_dims(date_tensor, axis=0), [horizon], axis=0),
-
# repeat the before horizon values horizon number of times,
# so that for each of the predictions for each target_ts, you
# have an available set of features
'history': tf.repeat(tf.expand_dims(target, axis=0), [horizon], axis=0),
'target_ts': tf.expand_dims(target_dates, axis=1),
- 'task': tf.fill([horizon,], task),
+ 'task': tf.fill(
+ [
+ horizon,
+ ],
+ task,
+ ),
}
diff --git a/src/prepare_public_datasets/constants.py b/src/prepare_public_datasets/constants.py
index c14c76e..d81da53 100644
--- a/src/prepare_public_datasets/constants.py
+++ b/src/prepare_public_datasets/constants.py
@@ -1,2 +1,2 @@
CONTEXT_LENGTH = 200
-WINDOW_STRIDE = 30
\ No newline at end of file
+WINDOW_STRIDE = 30
diff --git a/src/prepare_public_datasets/prepare.py b/src/prepare_public_datasets/prepare.py
index bca22d7..eb30b10 100644
--- a/src/prepare_public_datasets/prepare.py
+++ b/src/prepare_public_datasets/prepare.py
@@ -2,16 +2,18 @@
Module to prepare public datasets for training
"""
-import csv
-import yaml
import argparse
+import csv
+from tempfile import NamedTemporaryFile
+
import numpy as np
import pandas as pd
import tensorflow as tf
-from tempfile import NamedTemporaryFile
+import yaml
+from constants import CONTEXT_LENGTH, WINDOW_STRIDE
from reainternal.cloud import CloudLocation
from tqdm import tqdm
-from constants import *
+
def read_timeseries_file(filename):
"""
@@ -27,6 +29,7 @@ def read_timeseries_file(filename):
return lines
+
def generate_tf_train_examples(name, train_data_list, freq):
"""
Method to generate the examples from train data
@@ -36,7 +39,7 @@ def generate_tf_train_examples(name, train_data_list, freq):
i = len(train_data)
while i > 0:
- train_data = train_data[max(i-CONTEXT_LENGTH, 0):i]
+ train_data = train_data[max(i - CONTEXT_LENGTH, 0) : i]
if len(train_data) < CONTEXT_LENGTH:
train_data = [0] * (CONTEXT_LENGTH - len(train_data)) + train_data
@@ -53,23 +56,24 @@ def generate_tf_train_examples(name, train_data_list, freq):
print(train_data)
yield tf.train.Example(
- features=tf.train.Features(
- feature={
- "id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[name.encode()])),
- "ts": tf.train.Feature(
- int64_list=tf.train.Int64List(
- value=dates.astype(np.int64)
- )
- ),
- "y": tf.train.Feature(
- float_list=tf.train.FloatList(value=train_data)
- ),
- "noise": tf.train.Feature(
- float_list=tf.train.FloatList(value=noise)
- ),
- }
- )
+ features=tf.train.Features(
+ feature={
+ 'id': tf.train.Feature(
+ bytes_list=tf.train.BytesList(value=[name.encode()])
+ ),
+ 'ts': tf.train.Feature(
+ int64_list=tf.train.Int64List(value=dates.astype(np.int64))
+ ),
+ 'y': tf.train.Feature(
+ float_list=tf.train.FloatList(value=train_data)
+ ),
+ 'noise': tf.train.Feature(
+ float_list=tf.train.FloatList(value=noise)
+ ),
+ }
)
+ )
+
def save_tf_records(prefix: str, dest: str, it):
"""
@@ -77,35 +81,40 @@ def save_tf_records(prefix: str, dest: str, it):
"""
with NamedTemporaryFile() as tfile:
with tf.io.TFRecordWriter(
- tfile.name, options=tf.io.TFRecordOptions(compression_type="GZIP")
+ tfile.name, options=tf.io.TFRecordOptions(compression_type='GZIP')
) as writer:
for record in tqdm(it):
writer.write(record.SerializeToString())
tfile.seek(0)
CloudLocation(prefix + dest).copy_from_file(tfile)
+
def save_tf_dataset(prefix: str, dataset_name: str, data: list, freq: str):
"""
Generate dataset and save as tf records
"""
save_tf_records(
prefix,
- f"{dataset_name}.tfrecords",
- generate_tf_train_examples(dataset_name, data, freq)
+ f'{dataset_name}.tfrecords',
+ generate_tf_train_examples(dataset_name, data, freq),
)
- print(f"Written to file {dataset_name}.tfrecords")
+ print(f'Written to file {dataset_name}.tfrecords')
+
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("-c", "--config", required=True, help="Path to config file")
+ parser.add_argument('-c', '--config', required=True, help='Path to config file')
args = parser.parse_args()
with open(args.config) as config_file:
config = yaml.load(config_file, yaml.loader.SafeLoader)
train_data = read_timeseries_file(config['train_path'])
- save_tf_dataset(config['prefix'], config['dataset_name'], train_data, config['freq'])
+ save_tf_dataset(
+ config['prefix'], config['dataset_name'], train_data, config['freq']
+ )
+
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
diff --git a/src/synthetic_generation/config_variables.py b/src/synthetic_generation/config_variables.py
index 9e8b280..55382d8 100644
--- a/src/synthetic_generation/config_variables.py
+++ b/src/synthetic_generation/config_variables.py
@@ -2,6 +2,7 @@
Module containing configuration setting for the script
"""
+
class Config:
frequencies = None
frequency_names = None
@@ -11,14 +12,35 @@ class Config:
@classmethod
def set_freq_variables(cls, is_sub_day):
if is_sub_day:
- cls.frequencies = [("min", 1/1440), ("H", 1/24), ("D", 1), ("W", 7), ("MS", 30), ("Y", 12)]
- cls.frequency_names = ["minute", "hourly", "daily", "weekly", "monthly", "yearly"]
- cls.freq_and_index = (("minute", 0), ("hourly", 1), ("daily", 2), ("weekly", 3), ("monthly", 4), ("yearly", 5))
+ cls.frequencies = [
+ ('min', 1 / 1440),
+ ('H', 1 / 24),
+ ('D', 1),
+ ('W', 7),
+ ('MS', 30),
+ ('Y', 12),
+ ]
+ cls.frequency_names = [
+ 'minute',
+ 'hourly',
+ 'daily',
+ 'weekly',
+ 'monthly',
+ 'yearly',
+ ]
+ cls.freq_and_index = (
+ ('minute', 0),
+ ('hourly', 1),
+ ('daily', 2),
+ ('weekly', 3),
+ ('monthly', 4),
+ ('yearly', 5),
+ )
else:
- cls.frequencies = [("D", 1), ("W", 7), ("MS", 30)]
- cls.frequency_names = ["daily", "weekly", "monthly"]
- cls.freq_and_index = (("daily", 0), ("weekly", 1), ("monthly", 2))
+ cls.frequencies = [('D', 1), ('W', 7), ('MS', 30)]
+ cls.frequency_names = ['daily', 'weekly', 'monthly']
+ cls.freq_and_index = (('daily', 0), ('weekly', 1), ('monthly', 2))
@classmethod
def set_transition(cls, transition):
- cls.transition = transition
\ No newline at end of file
+ cls.transition = transition
diff --git a/src/synthetic_generation/constants.py b/src/synthetic_generation/constants.py
index 505c9d7..1c9712e 100644
--- a/src/synthetic_generation/constants.py
+++ b/src/synthetic_generation/constants.py
@@ -3,28 +3,29 @@
"""
from datetime import date
+
import tensorflow as tf
-BASE_START = date.fromisoformat("1885-01-01").toordinal()
-BASE_END = date.fromisoformat("2023-12-31").toordinal() + 1
+BASE_START = date.fromisoformat('1885-01-01').toordinal()
+BASE_END = date.fromisoformat('2023-12-31').toordinal() + 1
PRODUCT_SCHEMA = {
- "doc": "Timeseries sample",
- "name": "TimeseriesSample",
- "type": "record",
- "fields": [
- {"name": "id", "type": "string"},
- {"name": "ts", "type": {"type": "int", "logicalType": "date"}},
- {"name": "y", "type": ["null", "float"]},
- {"name": "noise", "type": ["float"]}
+ 'doc': 'Timeseries sample',
+ 'name': 'TimeseriesSample',
+ 'type': 'record',
+ 'fields': [
+ {'name': 'id', 'type': 'string'},
+ {'name': 'ts', 'type': {'type': 'int', 'logicalType': 'date'}},
+ {'name': 'y', 'type': ['null', 'float']},
+ {'name': 'noise', 'type': ['float']},
],
}
CONTEXT_LENGTH = 1_000
TF_SCHEMA = {
- "id": tf.io.FixedLenFeature([], dtype=tf.string),
- "ts": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64),
- "y": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32),
- "noise": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32)
+ 'id': tf.io.FixedLenFeature([], dtype=tf.string),
+ 'ts': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64),
+ 'y': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32),
+ 'noise': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32),
}
diff --git a/src/synthetic_generation/generate_series.py b/src/synthetic_generation/generate_series.py
index 7f6b6f8..39cab33 100644
--- a/src/synthetic_generation/generate_series.py
+++ b/src/synthetic_generation/generate_series.py
@@ -2,19 +2,21 @@
Module to generate synthetic series
"""
+from datetime import date
+
import numpy as np
import pandas as pd
-from datetime import date
-from pandas.tseries.frequencies import to_offset
-from constants import *
from config_variables import Config
+from constants import BASE_END, BASE_START, CONTEXT_LENGTH
from generate_series_components import make_series
-from utils import sample_scale, get_transition_coefficients
-from series_config import ComponentScale, SeriesConfig, ComponentNoise
+from pandas.tseries.frequencies import to_offset
from scipy.stats import beta
+from series_config import ComponentNoise, ComponentScale, SeriesConfig
+from utils import get_transition_coefficients, sample_scale
+
def __generate(
- n = 100,
+ n=100,
freq_index: int = None,
start: pd.Timestamp = None,
options: dict = {},
@@ -31,22 +33,22 @@ def __generate(
# annual, monthly, weekly, hourly and minutely components
a, m, w, h, minute = 0.0, 0.0, 0.0, 0.0, 0.0
- if freq == "min":
+ if freq == 'min':
minute = np.random.uniform(0.0, 1.0)
h = np.random.uniform(0.0, 0.2)
- elif freq == "H":
+ elif freq == 'H':
minute = np.random.uniform(0.0, 0.2)
h = np.random.uniform(0.0, 1)
- elif freq == "D":
+ elif freq == 'D':
w = np.random.uniform(0.0, 1.0)
m = np.random.uniform(0.0, 0.2)
- elif freq == "W":
+ elif freq == 'W':
m = np.random.uniform(0.0, 0.3)
a = np.random.uniform(0.0, 0.3)
- elif freq == "MS":
+ elif freq == 'MS':
w = np.random.uniform(0.0, 0.1)
a = np.random.uniform(0.0, 0.5)
- elif freq == "Y":
+ elif freq == 'Y':
w = np.random.uniform(0.0, 0.2)
a = np.random.uniform(0.0, 1)
else:
@@ -54,7 +56,9 @@ def __generate(
if start is None:
# start = pd.Timestamp(date.fromordinal(np.random.randint(BASE_START, BASE_END)))
- start = pd.Timestamp(date.fromordinal(int((BASE_START - BASE_END)*beta.rvs(5,1)+BASE_START)))
+ start = pd.Timestamp(
+ date.fromordinal(int((BASE_START - BASE_END) * beta.rvs(5, 1) + BASE_START))
+ )
scale_config = ComponentScale(
1.0,
@@ -64,7 +68,7 @@ def __generate(
m=m,
w=w,
minute=minute,
- h=h
+ h=h,
)
offset_config = ComponentScale(
@@ -77,17 +81,16 @@ def __generate(
)
noise_config = ComponentNoise(
- k=np.random.uniform(1, 5),
- median=1,
- scale=sample_scale()
+ k=np.random.uniform(1, 5), median=1, scale=sample_scale()
)
cfg = SeriesConfig(scale_config, offset_config, noise_config)
return cfg, make_series(cfg, to_offset(freq), n, start, options, random_walk)
+
def generate(
- n = 100,
+ n=100,
freq_index: int = None,
start: pd.Timestamp = None,
options: dict = {},
@@ -106,10 +109,8 @@ def generate(
else:
values = series1['values']
- dataframe_data = {
- 'series_values': values,
- 'noise': series1['noise']
- }
-
- return cfg1, pd.DataFrame(data=dataframe_data, index=series1['dates'])#.clip(lower=0.0)
+ dataframe_data = {'series_values': values, 'noise': series1['noise']}
+ return cfg1, pd.DataFrame(
+ data=dataframe_data, index=series1['dates']
+ ) # .clip(lower=0.0)
diff --git a/src/synthetic_generation/generate_series_components.py b/src/synthetic_generation/generate_series_components.py
index 7bc88b2..900d5e2 100644
--- a/src/synthetic_generation/generate_series_components.py
+++ b/src/synthetic_generation/generate_series_components.py
@@ -1,12 +1,13 @@
"""
Module to generate trend and seasonal components of series
"""
+from collections import defaultdict
+
import numpy as np
import pandas as pd
-from constants import *
from series_config import SeriesConfig
-from utils import shift_axis, weibull_noise, get_random_walk_series
-from collections import defaultdict
+from utils import get_random_walk_series, shift_axis, weibull_noise
+
def make_series_trend(series: SeriesConfig, dates: pd.DatetimeIndex):
"""
@@ -25,6 +26,7 @@ def make_series_trend(series: SeriesConfig, dates: pd.DatetimeIndex):
return values
+
def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int):
"""
Method to get systematic movement of values across time
@@ -38,7 +40,7 @@ def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int):
:return: numpy array of shape dates_feature.shape containing
sinusoidal value for a given point in time
"""
- harmonics = list(range(1, n_harmonics+1))
+ harmonics = list(range(1, n_harmonics + 1))
# initialize sin and cosine coefficients with 0
sin_coef = np.zeros(n_harmonics)
@@ -46,8 +48,8 @@ def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int):
# choose coefficients inversely proportional to the harmonic
for idx, harmonic in enumerate(harmonics):
- sin_coef[idx] = np.random.normal(scale = 1 / harmonic)
- cos_coef[idx] = np.random.normal(scale = 1 / harmonic)
+ sin_coef[idx] = np.random.normal(scale=1 / harmonic)
+ cos_coef[idx] = np.random.normal(scale=1 / harmonic)
# normalize the coefficients such that their sum of squares is 1
coef_sq_sum = np.sqrt(np.sum(np.square(sin_coef)) + np.sum(np.square(cos_coef)))
@@ -58,8 +60,12 @@ def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int):
# comprises of patterns of varying frequency
return_val = 0
for idx, harmonic in enumerate(harmonics):
- return_val += sin_coef[idx] * np.sin(2 * np.pi * harmonic * dates_feature / n_total)
- return_val += cos_coef[idx] * np.cos(2 * np.pi * harmonic * dates_feature / n_total)
+ return_val += sin_coef[idx] * np.sin(
+ 2 * np.pi * harmonic * dates_feature / n_total
+ )
+ return_val += cos_coef[idx] * np.cos(
+ 2 * np.pi * harmonic * dates_feature / n_total
+ )
return return_val
@@ -75,24 +81,35 @@ def make_series_seasonal(series: SeriesConfig, dates: pd.DatetimeIndex):
seasonal_components = defaultdict(lambda: 1)
if series.scale.minute is not None:
- seasonal_components['minute'] = 1 + series.scale.minute * get_freq_component(dates.minute, 10, 60)
+ seasonal_components['minute'] = 1 + series.scale.minute * get_freq_component(
+ dates.minute, 10, 60
+ )
seasonal *= seasonal_components['minute']
if series.scale.h is not None:
- seasonal_components['h'] = 1 + series.scale.h * get_freq_component(dates.hour, 10, 24)
+ seasonal_components['h'] = 1 + series.scale.h * get_freq_component(
+ dates.hour, 10, 24
+ )
seasonal *= seasonal_components['h']
if series.scale.a is not None:
- seasonal_components['a'] = 1 + series.scale.a * get_freq_component(dates.month, 6, 12)
+ seasonal_components['a'] = 1 + series.scale.a * get_freq_component(
+ dates.month, 6, 12
+ )
seasonal *= seasonal_components['a']
if series.scale.m is not None:
- seasonal_components['m'] = 1 + series.scale.m * get_freq_component(dates.day, 10, 30.5)
+ seasonal_components['m'] = 1 + series.scale.m * get_freq_component(
+ dates.day, 10, 30.5
+ )
seasonal *= seasonal_components['m']
if series.scale.w is not None:
- seasonal_components['w'] = 1 + series.scale.w * get_freq_component(dates.dayofweek, 4, 7)
+ seasonal_components['w'] = 1 + series.scale.w * get_freq_component(
+ dates.dayofweek, 4, 7
+ )
seasonal *= seasonal_components['w']
seasonal_components['seasonal'] = seasonal
return seasonal_components
+
def make_series(
series: SeriesConfig,
freq: pd.DateOffset,
@@ -120,7 +137,7 @@ def make_series(
weibull_noise_term = weibull_noise(
k=series.noise_config.k,
median=series.noise_config.median,
- length=len(values)
+ length=len(values),
)
# approximating estimated value from median
@@ -128,13 +145,15 @@ def make_series(
# expected value of this term is 0
# for no noise, scale is set to 0
- scaled_noise_term = series.noise_config.scale * (weibull_noise_term - noise_expected_val)
+ scaled_noise_term = series.noise_config.scale * (
+ weibull_noise_term - noise_expected_val
+ )
dataframe_data = {
**values_seasonal,
'values': values,
'noise': 1 + scaled_noise_term,
- 'dates': dates
+ 'dates': dates,
}
return dataframe_data
diff --git a/src/synthetic_generation/main.py b/src/synthetic_generation/main.py
index df34c78..ee35e21 100644
--- a/src/synthetic_generation/main.py
+++ b/src/synthetic_generation/main.py
@@ -2,17 +2,17 @@
Module to generate synthetic dataset for pre training
a time series forecasting model
"""
-import yaml
import argparse
-import pandas as pd
+
+import yaml
+from config_variables import Config
from tf_generate_series import (
- save_tf_records,
- tf_generate_n,
convert_tf_to_rows,
- load_tf_dataset,
generate_product_input,
+ load_tf_dataset,
+ save_tf_records,
+ tf_generate_n,
)
-from config_variables import Config
def save_tf_dataset(prefix: str, version: str, options: dict, num_series: int = 10_000):
@@ -20,10 +20,10 @@ def save_tf_dataset(prefix: str, version: str, options: dict, num_series: int =
Generate dataset and save as tf records
"""
for freq, freq_index in Config.freq_and_index:
- print("Frequency: " + freq)
+ print('Frequency: ' + freq)
save_tf_records(
prefix,
- f"{version}/{freq}.tfrecords",
+ f'{version}/{freq}.tfrecords',
tf_generate_n(
N=num_series,
freq_index=freq_index,
@@ -38,32 +38,35 @@ def generate_product_input_dataset(prefix, version):
Load dataset from tf records and save as avro files
"""
for freq in Config.frequency_names:
- print("Frequency: " + freq)
+ print('Frequency: ' + freq)
generate_product_input(
prefix,
- f"{version}/{freq}.avro",
+ f'{version}/{freq}.avro',
convert_tf_to_rows(
- load_tf_dataset(prefix, f"{version}/{freq}.tfrecords").as_numpy_iterator()
+ load_tf_dataset(
+ prefix, f'{version}/{freq}.tfrecords'
+ ).as_numpy_iterator()
),
)
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("-c", "--config", required=True, help="Path to config file")
+ parser.add_argument('-c', '--config', required=True, help='Path to config file')
args = parser.parse_args()
with open(args.config) as config_file:
config = yaml.load(config_file, yaml.loader.SafeLoader)
- Config.set_freq_variables(config["sub_day"])
- if "transition" in config:
- Config.set_transition(config["transition"])
-
+ Config.set_freq_variables(config['sub_day'])
+ if 'transition' in config:
+ Config.set_transition(config['transition'])
- save_tf_dataset(config["prefix"], config["version"], config["options"], config["num_series"])
- generate_product_input_dataset(config["prefix"], config["version"])
+ save_tf_dataset(
+ config['prefix'], config['version'], config['options'], config['num_series']
+ )
+ generate_product_input_dataset(config['prefix'], config['version'])
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/src/synthetic_generation/series_config.py b/src/synthetic_generation/series_config.py
index 09c0d06..33b2691 100644
--- a/src/synthetic_generation/series_config.py
+++ b/src/synthetic_generation/series_config.py
@@ -1,9 +1,10 @@
"""
Module containing dataclasses for synthetic data generator
"""
-import numpy as np
from dataclasses import dataclass
+import numpy as np
+
@dataclass
class ComponentScale:
@@ -17,6 +18,7 @@ class ComponentScale:
h: np.ndarray = None
minute: np.ndarray = None
+
@dataclass
class ComponentNoise:
# shape parameter for the weibull distribution
@@ -28,6 +30,7 @@ class ComponentNoise:
# no noise can be represented by scale = 0
scale: float
+
@dataclass
class SeriesConfig:
scale: ComponentScale
@@ -35,4 +38,4 @@ class SeriesConfig:
noise_config: ComponentNoise
def __str__(self):
- return f"L{1000*self.scale.linear:+02.0f}E{10000*(self.scale.exp - 1):+02.0f}A{100*self.scale.a:02.0f}M{100*self.scale.m:02.0f}W{100*self.scale.w:02.0f}"
+ return f'L{1000*self.scale.linear:+02.0f}E{10000*(self.scale.exp - 1):+02.0f}A{100*self.scale.a:02.0f}M{100*self.scale.m:02.0f}W{100*self.scale.w:02.0f}'
diff --git a/src/synthetic_generation/tf_generate_series.py b/src/synthetic_generation/tf_generate_series.py
index b66e1aa..511847d 100644
--- a/src/synthetic_generation/tf_generate_series.py
+++ b/src/synthetic_generation/tf_generate_series.py
@@ -2,17 +2,17 @@
Module to convert process synthetic series using tensorflow
"""
+from datetime import date
+from tempfile import NamedTemporaryFile
+
import fastavro
-import tensorflow_io
import numpy as np
import pandas as pd
import tensorflow as tf
-from datetime import date
-from tempfile import NamedTemporaryFile
-from reainternal.cloud import CloudLocation
+from constants import CONTEXT_LENGTH
from generate_series import generate
-from constants import *
-from series_config import *
+from reainternal.cloud import CloudLocation
+from series_config import PRODUCT_SCHEMA, TF_SCHEMA
def tf_generate_n(
@@ -28,9 +28,9 @@ def tf_generate_n(
for i in range(N):
if i % 1000 == 0:
- print(f"Completed: {i}")
+ print(f'Completed: {i}')
- if i < N * options.get("linear_random_walk_frac", 0):
+ if i < N * options.get('linear_random_walk_frac', 0):
cfg, sample = generate(
size,
freq_index=freq_index,
@@ -43,7 +43,7 @@ def tf_generate_n(
size, freq_index=freq_index, start=start, options=options
)
# cfg is the name of the time series
- # sample is a pandas dataframe where
+ # sample is a pandas dataframe where
# the index is the datetime object
# columns `series_value` and `noise`
@@ -51,16 +51,16 @@ def tf_generate_n(
yield tf.train.Example(
features=tf.train.Features(
feature={
- "id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[id_])),
- "ts": tf.train.Feature(
+ 'id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[id_])),
+ 'ts': tf.train.Feature(
int64_list=tf.train.Int64List(
value=sample.index.astype(np.int64)
)
),
- "y": tf.train.Feature(
+ 'y': tf.train.Feature(
float_list=tf.train.FloatList(value=sample.series_values.values)
),
- "noise": tf.train.Feature(
+ 'noise': tf.train.Feature(
float_list=tf.train.FloatList(value=sample.noise.values)
),
}
@@ -74,7 +74,7 @@ def save_tf_records(prefix: str, dest: str, it):
"""
with NamedTemporaryFile() as tfile:
with tf.io.TFRecordWriter(
- tfile.name, options=tf.io.TFRecordOptions(compression_type="GZIP")
+ tfile.name, options=tf.io.TFRecordOptions(compression_type='GZIP')
) as writer:
for record in it:
writer.write(record.SerializeToString())
@@ -87,27 +87,25 @@ def decode_fn(record_bytes):
def load_tf_dataset(prefix: str, src: str):
- return tf.data.TFRecordDataset(prefix + src, compression_type="GZIP").map(
- decode_fn
- )
+ return tf.data.TFRecordDataset(prefix + src, compression_type='GZIP').map(decode_fn)
def convert_tf_to_rows(records):
for i, r in enumerate(records):
if i % 1000 == 0:
- print(f"Completed: {i}")
- id_ = r["id"].decode()
+ print(f'Completed: {i}')
+ id_ = r['id'].decode()
for ts, y, noise in zip(
- (date.fromtimestamp(v / 1_000_000_000) for v in r["ts"]),
- (float(v) for v in r["y"]),
- (float(_noise) for _noise in r["noise"])
+ (date.fromtimestamp(v / 1_000_000_000) for v in r['ts']),
+ (float(v) for v in r['y']),
+ (float(_noise) for _noise in r['noise']),
):
- yield {"id": id_, "ts": ts, "y": y, "noise": noise}
+ yield {'id': id_, 'ts': ts, 'y': y, 'noise': noise}
def generate_product_input(prefix: str, dest: str, it):
"""
Write generated dataset into avro files
"""
- with CloudLocation(prefix + dest).open(mode="wb") as file:
- fastavro.writer(file, PRODUCT_SCHEMA, it, codec="deflate")
+ with CloudLocation(prefix + dest).open(mode='wb') as file:
+ fastavro.writer(file, PRODUCT_SCHEMA, it, codec='deflate')
diff --git a/src/synthetic_generation/utils.py b/src/synthetic_generation/utils.py
index 1cf00b2..d14fc27 100644
--- a/src/synthetic_generation/utils.py
+++ b/src/synthetic_generation/utils.py
@@ -66,5 +66,5 @@ def get_transition_coefficients(context_length):
m = (a + b) / 2
k = 1 / (a - m) * np.log(f_a / (1 - f_a))
- coeff = 1 / (1 + np.exp(-k * (np.arange(1, context_length+1) - m)))
+ coeff = 1 / (1 + np.exp(-k * (np.arange(1, context_length + 1) - m)))
return coeff
diff --git a/src/training/config_variables.py b/src/training/config_variables.py
index 0140cd2..36841e0 100644
--- a/src/training/config_variables.py
+++ b/src/training/config_variables.py
@@ -2,6 +2,7 @@
Module containing configuration setting for the script
"""
+
class Config:
is_sub_day = False
diff --git a/src/training/constants.py b/src/training/constants.py
index e535604..6f7f730 100644
--- a/src/training/constants.py
+++ b/src/training/constants.py
@@ -21,14 +21,14 @@
CONTEXT_LENGTH = 500
TF_SCHEMA = {
- "id": tf.io.FixedLenFeature([], dtype=tf.string),
- "ts": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64),
- "y": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32),
- "noise": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32)
+ 'id': tf.io.FixedLenFeature([], dtype=tf.string),
+ 'ts': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64),
+ 'y': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32),
+ 'noise': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32),
}
# constant to reference where the academic_comparison and metalearning folders are
# will not be needed for training without validating on these datasets
ACADEMIC_HOME = '/home/ubuntu/ForecastPFN/academic_comparison/'
-METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/'
\ No newline at end of file
+METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/'
diff --git a/src/training/create_train_test_df.py b/src/training/create_train_test_df.py
index 52a53c0..79dac75 100644
--- a/src/training/create_train_test_df.py
+++ b/src/training/create_train_test_df.py
@@ -2,10 +2,16 @@
Module to create train and test dfs
"""
import tensorflow as tf
-import tensorflow_io
-from prepare_dataset import gen_random_single_point, gen_mean_to_random_date, \
- gen_std_to_random_date, filter_unusable_points, build_frames, gen_random_single_point_no_noise, \
- gen_mean_to_random_date_no_noise, gen_std_to_random_date_no_noise
+from prepare_dataset import (
+ build_frames,
+ filter_unusable_points,
+ gen_mean_to_random_date,
+ gen_mean_to_random_date_no_noise,
+ gen_random_single_point,
+ gen_random_single_point_no_noise,
+ gen_std_to_random_date,
+ gen_std_to_random_date_no_noise,
+)
def remove_noise(x, y):
@@ -20,10 +26,12 @@ def remove_noise(x, y):
'ts': x['ts'],
'history': x['history'],
'target_ts': x['target_ts'],
- 'task': x['task']
- }, y
+ 'task': x['task'],
+ },
+ y,
)
+
def create_train_test_df(combined_ds, test_noise=False):
"""
Method to create a train/test split from the combined_ds
@@ -36,20 +44,24 @@ def create_train_test_df(combined_ds, test_noise=False):
task_map = {
'point': gen_random_single_point,
'mean': gen_mean_to_random_date,
- 'stdev': gen_std_to_random_date
+ 'stdev': gen_std_to_random_date,
}
train_tasks_dfs = [
base_train_df.map(func, num_parallel_calls=tf.data.AUTOTUNE)
for func in task_map.values()
]
- train_df = tf.data.Dataset.choose_from_datasets(
- train_tasks_dfs, tf.data.Dataset.range(len(train_tasks_dfs)).repeat()
- ).unbatch().filter(filter_unusable_points)
+ train_df = (
+ tf.data.Dataset.choose_from_datasets(
+ train_tasks_dfs, tf.data.Dataset.range(len(train_tasks_dfs)).repeat()
+ )
+ .unbatch()
+ .filter(filter_unusable_points)
+ )
task_map_test = {
'point': gen_random_single_point_no_noise,
'mean': gen_mean_to_random_date_no_noise,
- 'stdev': gen_std_to_random_date_no_noise
+ 'stdev': gen_std_to_random_date_no_noise,
}
if test_noise:
@@ -63,9 +75,13 @@ def create_train_test_df(combined_ds, test_noise=False):
for func in task_map_test.values()
]
- test_df = tf.data.Dataset.choose_from_datasets(
- test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat()
- ).unbatch().filter(filter_unusable_points)
+ test_df = (
+ tf.data.Dataset.choose_from_datasets(
+ test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat()
+ )
+ .unbatch()
+ .filter(filter_unusable_points)
+ )
# remove noise and target_noise from train and test df as they are now useless
# train_df = train_df.map(remove_noise)
diff --git a/src/training/metalearned_validation.py b/src/training/metalearned_validation.py
index 3c97e24..164027f 100644
--- a/src/training/metalearned_validation.py
+++ b/src/training/metalearned_validation.py
@@ -1,62 +1,69 @@
+import datetime
import sys
-ACADEMIC_HOME = '/home/ubuntu/notebooks/ForecastPFN/academic_comparison/'
-METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/'
-sys.path.append(ACADEMIC_HOME)
-sys.path.append(METALEARNED_HOME)
-import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
-from data_provider.UnivariateTimeseriesSampler_WithStamps import UnivariateTimeseriesSampler_WithStamps
-from resources.tourism.dataset import TourismDataset, TourismMeta
+from data_provider.UnivariateTimeseriesSampler_WithStamps import (
+ UnivariateTimeseriesSampler_WithStamps,
+)
from resources.m3.dataset import M3Dataset, M3Meta
+from resources.tourism.dataset import TourismDataset, TourismMeta
+
+ACADEMIC_HOME = '/home/ubuntu/notebooks/ForecastPFN/academic_comparison/'
+METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/'
+sys.path.append(ACADEMIC_HOME)
+sys.path.append(METALEARNED_HOME)
+
def _ForecastPFN_time_features(ts: np.ndarray):
if type(ts[0]) == datetime.datetime:
year = [x.year for x in ts]
month = [x.month for x in ts]
day = [x.day for x in ts]
- day_of_week = [x.weekday()+1 for x in ts]
+ day_of_week = [x.weekday() + 1 for x in ts]
day_of_year = [x.timetuple().tm_yday for x in ts]
return np.stack([year, month, day, day_of_week, day_of_year], axis=-1)
ts = pd.to_datetime(ts)
- return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
-
+ return np.stack(
+ [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1
+ )
-def prepare_metalearned_test(metaleanredDataset, metalearnedMeta, sp, p_input_size, p_horizon) -> tf.data.Dataset:
+def prepare_metalearned_test(
+ metaleanredDataset, metalearnedMeta, sp, p_input_size, p_horizon
+) -> tf.data.Dataset:
target_input, target_output = metaleanredDataset(
- METALEARNED_HOME+metalearnedMeta.dataset_path).standard_split()
+ METALEARNED_HOME + metalearnedMeta.dataset_path
+ ).standard_split()
in_bundle, out_bundle, sp = target_input, target_output, sp
- in_bundle = in_bundle.filter(
- lambda ts: ts.meta['seasonal_pattern'] == sp)
- out_bundle = out_bundle.filter(
- lambda ts: ts.meta['seasonal_pattern'] == sp)
-
+ in_bundle = in_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
+ out_bundle = out_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp)
input_set = in_bundle.values()
input_timestamps = in_bundle.time_stamps()
- input_set = UnivariateTimeseriesSampler_WithStamps(timeseries=input_set,
- time_stamps=input_timestamps,
- insample_size=p_input_size,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1,
- time_features=_ForecastPFN_time_features,
- )
+ input_set = UnivariateTimeseriesSampler_WithStamps(
+ timeseries=input_set,
+ time_stamps=input_timestamps,
+ insample_size=p_input_size,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ time_features=_ForecastPFN_time_features,
+ )
p_x, p_x_mask, p_x_timestamps = input_set.sequential_latest_insamples()
output_set = out_bundle.values()
output_timestamps = out_bundle.time_stamps()
- output_set = UnivariateTimeseriesSampler_WithStamps(timeseries=output_set,
- time_stamps=output_timestamps,
- insample_size=p_horizon,
- outsample_size=0,
- window_sampling_limit=1,
- batch_size=1,
- time_features=_ForecastPFN_time_features,
- )
+ output_set = UnivariateTimeseriesSampler_WithStamps(
+ timeseries=output_set,
+ time_stamps=output_timestamps,
+ insample_size=p_horizon,
+ outsample_size=0,
+ window_sampling_limit=1,
+ batch_size=1,
+ time_features=_ForecastPFN_time_features,
+ )
p_y, p_y_mask, p_y_timestamps = output_set.sequential_latest_insamples()
x, x_mark, y, y_mark = p_x, p_x_timestamps, p_y, p_y_timestamps
@@ -67,9 +74,11 @@ def prepare_metalearned_test(metaleanredDataset, metalearnedMeta, sp, p_input_si
task = []
y_out = []
for x, y, x_mark, y_mark in zip(p_x, p_y, p_x_timestamps, p_y_timestamps):
- for yi, yi_mark in zip(y,y_mark):
+ for yi, yi_mark in zip(y, y_mark):
if sum(yi_mark):
- ts.append(np.append(np.zeros((100 - x_mark.shape[0],5)), x_mark, axis=0))
+ ts.append(
+ np.append(np.zeros((100 - x_mark.shape[0], 5)), x_mark, axis=0)
+ )
history.append(np.append(np.zeros(100 - x_mark.shape[0]), x))
target_ts.append(np.array([yi_mark]))
task.append(1)
@@ -77,34 +86,30 @@ def prepare_metalearned_test(metaleanredDataset, metalearnedMeta, sp, p_input_si
ts = tf.convert_to_tensor(np.array(ts), dtype=np.int64, name='ts')
history = tf.convert_to_tensor(np.array(history), dtype=np.float32, name='history')
- target_ts = tf.convert_to_tensor(np.array(target_ts), dtype=np.int64, name='target_ts')
+ target_ts = tf.convert_to_tensor(
+ np.array(target_ts), dtype=np.int64, name='target_ts'
+ )
task = tf.convert_to_tensor(np.array(task), dtype=np.int64, name='task')
y = tf.convert_to_tensor(np.array(y_out), dtype=np.float32)
- ds = {
- 'ts': ts,
- 'history': history,
- 'target_ts': target_ts,
- 'task': task
- }, y
+ ds = {'ts': ts, 'history': history, 'target_ts': target_ts, 'task': task}, y
return tf.data.Dataset.from_tensor_slices(ds)
# Tourism
tourism_yearly_test_df = prepare_metalearned_test(
- TourismDataset, TourismMeta, 'Yearly', 8, 4)
+ TourismDataset, TourismMeta, 'Yearly', 8, 4
+)
tourism_quarterly_test_df = prepare_metalearned_test(
- TourismDataset, TourismMeta, 'Quarterly', 16, 8)
+ TourismDataset, TourismMeta, 'Quarterly', 16, 8
+)
tourism_monthly_test_df = prepare_metalearned_test(
- TourismDataset, TourismMeta, 'Monthly', 48, 24)
+ TourismDataset, TourismMeta, 'Monthly', 48, 24
+)
# M3
-m3_yearly_test_df = prepare_metalearned_test(
- M3Dataset, M3Meta, 'M3Year', 12, 6)
-m3_quarterly_test_df = prepare_metalearned_test(
- M3Dataset, M3Meta, 'M3Quart', 16, 8)
-m3_monthly_test_df = prepare_metalearned_test(
- M3Dataset, M3Meta, 'M3Month', 36, 18)
-m3_others_test_df = prepare_metalearned_test(
- M3Dataset, M3Meta, 'M3Other', 16, 8)
+m3_yearly_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Year', 12, 6)
+m3_quarterly_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Quart', 16, 8)
+m3_monthly_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Month', 36, 18)
+m3_others_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Other', 16, 8)
diff --git a/src/training/models.py b/src/training/models.py
index d45bc3e..4727df1 100644
--- a/src/training/models.py
+++ b/src/training/models.py
@@ -1,10 +1,11 @@
from typing import Dict
+
import tensorflow as tf
-import tensorflow_io
-from tensorflow.keras import layers, Model, Input
-from constants import *
+from constants import DAY, DOW, MONTH, NUM_TASKS, YEAR
from prepare_dataset import position_encoding
-from scalers import robust_scaler, max_scaling
+from scalers import max_scaling, robust_scaler
+from tensorflow.keras import layers
+
class CustomScaling(layers.Layer):
def __init__(self, name):
@@ -14,10 +15,10 @@ def __init__(self, name):
elif name == 'robust':
self.scaler = robust_scaler
-
def call(self, history_channels, epsilon):
return self.scaler(history_channels, epsilon)
+
class PositionExpansion(layers.Layer):
def __init__(self, periods: int, freqs: int, **kwargs):
super().__init__(**kwargs)
@@ -32,14 +33,23 @@ def call(self, tc):
out_shape = tf.shape(tc)
return tf.reshape(embedded, [out_shape[0], out_shape[1], self.channels])
+
class TransformerBlock(layers.Layer):
def __init__(self, key_dim, heads=4, value_dim=None, residual=False, **kwargs):
super().__init__(**kwargs)
self.attention = layers.MultiHeadAttention(
- num_heads=heads, key_dim=key_dim, value_dim=value_dim, name=f'{self.name}_attention')
+ num_heads=heads,
+ key_dim=key_dim,
+ value_dim=value_dim,
+ name=f'{self.name}_attention',
+ )
value_dim = value_dim or key_dim
- self.ff1 = layers.Dense(4 * heads * value_dim, activation='gelu', name=f'{self.name}_ff1')
- self.ff2 = layers.Dense(heads * value_dim, activation='gelu', name=f'{self.name}_ff2')
+ self.ff1 = layers.Dense(
+ 4 * heads * value_dim, activation='gelu', name=f'{self.name}_ff1'
+ )
+ self.ff2 = layers.Dense(
+ heads * value_dim, activation='gelu', name=f'{self.name}_ff2'
+ )
self.residual = residual
if self.residual:
self.attn_norm = layers.LayerNormalization(name=f'{self.name}_attn_norm')
@@ -52,8 +62,9 @@ def call(self, x, mask):
a = self.attention(x, x, attention_mask=mask)
a = self.ff1(a)
return self.ff2(a)
- #na = self.attn_norm(a + x)
- #return self.ff_norm(self.ff(na) + na)
+ # na = self.attn_norm(a + x)
+ # return self.ff_norm(self.ff(na) + na)
+
class BaseModel(tf.keras.Model):
def __init__(self, epsilon=1e-4, scaler='robust', **kwargs):
@@ -64,9 +75,16 @@ def __init__(self, epsilon=1e-4, scaler='robust', **kwargs):
self.pos_day = PositionExpansion(31, 6)
self.pos_dow = PositionExpansion(7, 4)
self.robust_scaler = CustomScaling(scaler)
- self.embed_size = sum(emb.channels for emb in (self.pos_year, self.pos_month, self.pos_day, self.pos_dow))
- self.expand_target_nopos = layers.Dense(self.embed_size, name='NoPosEnc', activation='relu')
- self.expand_target_forpos = layers.Dense(self.embed_size, name='ForPosEnc', activation='relu')
+ self.embed_size = sum(
+ emb.channels
+ for emb in (self.pos_year, self.pos_month, self.pos_day, self.pos_dow)
+ )
+ self.expand_target_nopos = layers.Dense(
+ self.embed_size, name='NoPosEnc', activation='relu'
+ )
+ self.expand_target_forpos = layers.Dense(
+ self.embed_size, name='ForPosEnc', activation='relu'
+ )
self.concat_pos = layers.Concatenate(axis=-1, name='ConcatPos')
self.concat_embed = layers.Concatenate(axis=-1, name='ConcatEmbed')
# Will be an embedding when we have different tasks.
@@ -83,32 +101,40 @@ def call(self, x: Dict[str, tf.Tensor]):
# Build position encodings
year = self.tc(ts, YEAR)
delta_year = tf.clip_by_value(year[:, -1:] - year, 0, self.pos_year.periods)
- pos_embedding = self.concat_pos([
- self.pos_year(delta_year),
- self.pos_month(self.tc(ts, MONTH)),
- self.pos_day(self.tc(ts, DAY)),
- self.pos_dow(self.tc(ts, DOW)),
- ])
+ pos_embedding = self.concat_pos(
+ [
+ self.pos_year(delta_year),
+ self.pos_month(self.tc(ts, MONTH)),
+ self.pos_day(self.tc(ts, DAY)),
+ self.pos_dow(self.tc(ts, DOW)),
+ ]
+ )
mask = year > 0
# Embed history
history_channels = tf.expand_dims(history, axis=-1)
-# scale = self.max_scaling(history_channels) + self.epsilon
-# scaled = history_channels / scale
+ # scale = self.max_scaling(history_channels) + self.epsilon
+ # scaled = history_channels / scale
scale, scaled = self.robust_scaler(history_channels, self.epsilon)
embed_nopos = self.expand_target_nopos(scaled)
embed_pos = self.expand_target_forpos(scaled) + pos_embedding
embedded = self.concat_embed([embed_nopos, embed_pos])
-
# Embed target
- target_year = tf.clip_by_value(year[:, -1:] - self.tc(target_ts, YEAR), 0, self.pos_year.periods)
- target_pos_embed = tf.squeeze(self.concat_pos([
- self.pos_year(target_year),
- self.pos_month(self.tc(target_ts, MONTH)),
- self.pos_day(self.tc(target_ts, DAY)),
- self.pos_dow(self.tc(target_ts, DOW)),
- ]), axis=1)
+ target_year = tf.clip_by_value(
+ year[:, -1:] - self.tc(target_ts, YEAR), 0, self.pos_year.periods
+ )
+ target_pos_embed = tf.squeeze(
+ self.concat_pos(
+ [
+ self.pos_year(target_year),
+ self.pos_month(self.tc(target_ts, MONTH)),
+ self.pos_day(self.tc(target_ts, DAY)),
+ self.pos_dow(self.tc(target_ts, DOW)),
+ ]
+ ),
+ axis=1,
+ )
task_embed = self.target_marker(task)
target = self.concat_embed([task_embed, task_embed + target_pos_embed])
@@ -120,11 +146,21 @@ def call(self, x: Dict[str, tf.Tensor]):
def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None):
# return super().compute_loss(x, y, y_pred['result'], sample_weight)
scale = y_pred['scale']
- return super().compute_loss(x, y / scale, y_pred['result'] / scale, sample_weight)
-
- def forecast(self, ts: tf.Tensor, mask: tf.Tensor, scale: tf.Tensor, embedded: tf.Tensor, target: tf.Tensor):
+ return super().compute_loss(
+ x, y / scale, y_pred['result'] / scale, sample_weight
+ )
+
+ def forecast(
+ self,
+ ts: tf.Tensor,
+ mask: tf.Tensor,
+ scale: tf.Tensor,
+ embedded: tf.Tensor,
+ target: tf.Tensor,
+ ):
return NotImplemented
+
class LSTMModel(BaseModel):
def __init__(self, unit=30, **kwargs):
super().__init__(**kwargs)
@@ -133,7 +169,14 @@ def __init__(self, unit=30, **kwargs):
self.combine_target = layers.Concatenate(name='AppendTarget', axis=-1)
self.cont_output = layers.Dense(1, name='Output', activation='relu')
- def forecast(self, ts: tf.Tensor, mask: tf.Tensor, scale: tf.Tensor, embedded: tf.Tensor, target: tf.Tensor):
+ def forecast(
+ self,
+ ts: tf.Tensor,
+ mask: tf.Tensor,
+ scale: tf.Tensor,
+ embedded: tf.Tensor,
+ target: tf.Tensor,
+ ):
lstm_out = self.lstm(embedded, mask=mask)
with_target = self.combine_target([lstm_out, target])
return self.cont_output(with_target)
@@ -160,13 +203,17 @@ def __init__(self, tx_layers=2, **kwargs):
# self.encoder2 = TransformerBlock(key_dim=(self.embed_size * 2))
# self.final_output = layers.Dense(1, name='FinalOutput', activation='relu')
- def forecast(self, ts: tf.Tensor, mask: tf.Tensor, scale: tf.Tensor, embedded: tf.Tensor, target: tf.Tensor):
+ def forecast(
+ self,
+ ts: tf.Tensor,
+ mask: tf.Tensor,
+ scale: tf.Tensor,
+ embedded: tf.Tensor,
+ target: tf.Tensor,
+ ):
mask = tf.pad(mask, [[0, 0], [0, 1]], constant_values=True)
mask = tf.math.logical_and(tf.expand_dims(mask, 1), tf.expand_dims(mask, -1))
- x = self.concat_target([
- embedded,
- tf.expand_dims(target, axis=1)
- ])
+ x = self.concat_target([embedded, tf.expand_dims(target, axis=1)])
x = self.encoder1(x, mask)
x = self.encoder2(x, mask)
# x = self.encoder3(x, mask)
diff --git a/src/training/noise_ablation.py b/src/training/noise_ablation.py
index b8692e3..eeb82fd 100644
--- a/src/training/noise_ablation.py
+++ b/src/training/noise_ablation.py
@@ -2,34 +2,43 @@
Module to train the model
"""
-from keras import backend
-import yaml
-import datetime
import argparse
-import tensorflow as tf
-from tensorflow import keras
+import datetime
+
import numpy as np
-import tensorflow_io
-from utils import load_tf_dataset
-from models import TransformerModel
-from create_train_test_df import create_train_test_df
+import tensorflow as tf
+import yaml
from config_variables import Config
+from create_train_test_df import create_train_test_df
+from keras import backend
+from metalearned_validation import (
+ m3_monthly_test_df,
+ m3_others_test_df,
+ m3_quarterly_test_df,
+ m3_yearly_test_df,
+ tourism_monthly_test_df,
+ tourism_quarterly_test_df,
+ tourism_yearly_test_df,
+)
+from models import TransformerModel
+from train import AdditionalValidationSets
+from utils import load_tf_dataset
def get_combined_ds(config):
- version = config["version"]
+ version = config['version']
# all the datasets we have. Ideally we use only 3 of these for trainig
# adjust the values in this list accordingly
datasets = [
# load_tf_dataset(config["prefix"] + f"{version}/minute.tfrecords"),
# load_tf_dataset(config["prefix"] + f"{version}/hourly.tfrecords"),
- load_tf_dataset(config["prefix"] + f"{version}/daily.tfrecords"),
- load_tf_dataset(config["prefix"] + f"{version}/weekly.tfrecords"),
- load_tf_dataset(config["prefix"] + f"{version}/monthly.tfrecords"),
+ load_tf_dataset(config['prefix'] + f'{version}/daily.tfrecords'),
+ load_tf_dataset(config['prefix'] + f'{version}/weekly.tfrecords'),
+ load_tf_dataset(config['prefix'] + f'{version}/monthly.tfrecords'),
]
- # # ucomment these lines to use the real world datasets in training
+ # # uncomment these lines to use the real world datasets in training
# tourism_ds = load_tf_dataset(config['prefix'] + 'tourism.tfrecords')
# wikiweb_ds = load_tf_dataset(config['prefix'] + 'wikiweb.tfrecords')
@@ -45,24 +54,21 @@ def main():
np.random.seed(42)
parser = argparse.ArgumentParser()
- parser.add_argument("-c", "--config", required=True, help="Path to config file")
+ parser.add_argument('-c', '--config', required=True, help='Path to config file')
args = parser.parse_args()
with open(args.config) as config_file:
config = yaml.load(config_file, yaml.loader.SafeLoader)
- Config.set_sub_day(config["sub_day"])
+ Config.set_sub_day(config['sub_day'])
combined_ds = get_combined_ds(config)
- train_df, test_df = create_train_test_df(combined_ds, config["test_noise"])
-
-
+ train_df, test_df = create_train_test_df(combined_ds, config['test_noise'])
model = TransformerModel(scaler=config['scaler'])
-
def smape(y_true, y_pred):
- """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
+ """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
`loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)`
Args:
y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
@@ -84,55 +90,58 @@ def smape(y_true, y_pred):
# need these two lines, else fit gives error
batch_X, batch_y = next(iter(train_df.batch(2).take(1)))
- pred_y = model(batch_X)
-
+ model(batch_X)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss=tf.keras.losses.MeanSquaredError(),
- metrics=[tf.keras.metrics.MeanAbsolutePercentageError(name='mape'),
- tf.keras.metrics.MeanSquaredError(name='mse'),
- smape,
- ]
+ metrics=[
+ tf.keras.metrics.MeanAbsolutePercentageError(name='mape'),
+ tf.keras.metrics.MeanSquaredError(name='mse'),
+ smape,
+ ],
)
+ fit_id = '.'.join(
+ [config['model_save_name'], datetime.datetime.now().strftime('%Y%m%d-%H%M%S')]
+ )
- fit_id = '.'.join([config["model_save_name"],
- datetime.datetime.now().strftime("%Y%m%d-%H%M%S")])
-
- logdir = f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}"
+ logdir = f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}'
tbCallback = tf.keras.callbacks.TensorBoard(logdir)
- tbCallback._val_dir = logdir+'/validation'
+ tbCallback._val_dir = logdir + '/validation'
callbacks = tf.keras.callbacks.CallbackList(
callbacks=[
tf.keras.callbacks.ModelCheckpoint(
- config["prefix"] + f"models/{fit_id}/ckpts", monitor="loss", verbose=1
+ config['prefix'] + f'models/{fit_id}/ckpts', monitor='loss', verbose=1
),
tf.keras.callbacks.TensorBoard(
- f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}"
+ f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}'
),
# tf.keras.callbacks.LearningRateScheduler(
# lambda epoch, lr: min(0.001, lr * (epoch + 1))
# )
- AdditionalValidationSets([(tourism_yearly_test_df, 'tourism_yearly'),
- (tourism_quarterly_test_df,'tourism_quarterly'),
- (tourism_monthly_test_df,'tourism_monthly'),
- (m3_yearly_test_df, 'm3_yearly'),
- (m3_quarterly_test_df, 'm3_quarterly'),
- (m3_monthly_test_df, 'm3_monthly'),
- (m3_others_test_df, 'm3_others'),
- ],
- tbCallback)
+ AdditionalValidationSets(
+ [
+ (tourism_yearly_test_df, 'tourism_yearly'),
+ (tourism_quarterly_test_df, 'tourism_quarterly'),
+ (tourism_monthly_test_df, 'tourism_monthly'),
+ (m3_yearly_test_df, 'm3_yearly'),
+ (m3_quarterly_test_df, 'm3_quarterly'),
+ (m3_monthly_test_df, 'm3_monthly'),
+ (m3_others_test_df, 'm3_others'),
+ ],
+ tbCallback,
+ ),
],
add_history=True,
add_progbar=True,
model=model,
)
-
model.fit(
- train_df.shuffle(5_000_000, reshuffle_each_iteration=True).batch(
- 1024).prefetch(tf.data.AUTOTUNE),
+ train_df.shuffle(5_000_000, reshuffle_each_iteration=True)
+ .batch(1024)
+ .prefetch(tf.data.AUTOTUNE),
# train_df.take(1000_000).cache().shuffle(100_000).batch(1024).prefetch(tf.data.AUTOTUNE),
validation_data=test_df.batch(1024, drop_remainder=False).cache(),
epochs=700,
@@ -140,8 +149,8 @@ def smape(y_true, y_pred):
callbacks=callbacks,
)
- model.save(config["prefix"] + 'models/'+ config["model_save_name"])
+ model.save(config['prefix'] + 'models/' + config['model_save_name'])
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/src/training/prepare_dataset.py b/src/training/prepare_dataset.py
index 4d5aeba..d0fc867 100644
--- a/src/training/prepare_dataset.py
+++ b/src/training/prepare_dataset.py
@@ -4,13 +4,21 @@
"""
from typing import Dict
-import pandas as pd
+
import numpy as np
+import pandas as pd
import tensorflow as tf
-import tensorflow_io
from config_variables import Config
-from constants import PADDING, HISTORY_LEN, TARGET_LEN, TRIM_LEN, TARGET_INDEX, \
- SINGLE_POINT, MEAN_TO_DATE, STDEV_TO_DATE
+from constants import (
+ HISTORY_LEN,
+ MEAN_TO_DATE,
+ PADDING,
+ SINGLE_POINT,
+ STDEV_TO_DATE,
+ TARGET_INDEX,
+ TARGET_LEN,
+ TRIM_LEN,
+)
def compute_time_features(ts: np.ndarray):
@@ -21,21 +29,22 @@ def compute_time_features(ts: np.ndarray):
"""
ts = pd.to_datetime(ts)
if Config.is_sub_day:
- return np.stack([ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
- return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1)
-
+ return np.stack(
+ [ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1
+ )
+ return np.stack(
+ [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1
+ )
@tf.function
def build_frames(r: Dict[str, tf.Tensor]):
- raw_date_info = tf.numpy_function(compute_time_features, inp=[r['ts']], Tout=tf.int64)
+ raw_date_info = tf.numpy_function(
+ compute_time_features, inp=[r['ts']], Tout=tf.int64
+ )
date_info = tf.signal.frame(
- tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]),
- HISTORY_LEN,
- 1,
- axis=0
- )
-
+ tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]), HISTORY_LEN, 1, axis=0
+ )
history = tf.signal.frame(tf.pad(r['y'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1)
noise = tf.signal.frame(tf.pad(r['noise'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1)
@@ -45,180 +54,220 @@ def build_frames(r: Dict[str, tf.Tensor]):
target_noise = tf.signal.frame(r['noise'], TARGET_LEN, 1, axis=-1)
start_index = target_values.shape[0] - TRIM_LEN
- batch_size = start_index - TARGET_LEN
+ start_index - TARGET_LEN
return (
- date_info[-start_index:-TARGET_LEN],
- history[-start_index:-TARGET_LEN],
- noise[-start_index:-TARGET_LEN],
- target_dates[TARGET_INDEX:],
- target_values[TARGET_INDEX:],
- target_noise[TARGET_INDEX:]
- )
+ date_info[-start_index:-TARGET_LEN],
+ history[-start_index:-TARGET_LEN],
+ noise[-start_index:-TARGET_LEN],
+ target_dates[TARGET_INDEX:],
+ target_values[TARGET_INDEX:],
+ target_noise[TARGET_INDEX:],
+ )
@tf.function
def gen_random_single_point(
- date_info: tf.Tensor,
- history: tf.Tensor,
- noise: tf.Tensor,
- target_dates: tf.Tensor,
- target_values: tf.Tensor,
- target_noise: tf.Tensor
- ):
-
-
+ date_info: tf.Tensor,
+ history: tf.Tensor,
+ noise: tf.Tensor,
+ target_dates: tf.Tensor,
+ target_values: tf.Tensor,
+ target_noise: tf.Tensor,
+):
# To limit to a single date
batch_size = tf.shape(target_dates)[0]
- targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32)
+ targets = tf.random.uniform(
+ shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32
+ )
target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1)
target_value = tf.gather(target_values, targets, axis=1, batch_dims=1)
return dict(
ts=date_info,
- history=history*noise,
+ history=history * noise,
noise=noise,
target_ts=target_date,
- task=tf.fill([batch_size,], SINGLE_POINT),
- target_noise=target_noise
+ task=tf.fill(
+ [
+ batch_size,
+ ],
+ SINGLE_POINT,
+ ),
+ target_noise=target_noise,
), target_value
@tf.function
def gen_mean_to_random_date(
- date_info: tf.Tensor,
- history: tf.Tensor,
- noise: tf.Tensor,
- target_dates: tf.Tensor,
- target_values: tf.Tensor,
- target_noise: tf.Tensor
- ):
+ date_info: tf.Tensor,
+ history: tf.Tensor,
+ noise: tf.Tensor,
+ target_dates: tf.Tensor,
+ target_values: tf.Tensor,
+ target_noise: tf.Tensor,
+):
# To limit to a single date
batch_size = tf.shape(target_dates)[0]
- targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32)
+ targets = tf.random.uniform(
+ shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32
+ )
target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1)
target_value = tf.math.reduce_mean(
- tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
- keepdims=True,
- axis=-1
- )
+ tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
+ keepdims=True,
+ axis=-1,
+ )
return dict(
ts=date_info,
- history=history*noise*.75,
+ history=history * noise * 0.75,
noise=noise,
target_ts=target_date,
- task=tf.fill([batch_size,], MEAN_TO_DATE),
- target_noise=target_noise
+ task=tf.fill(
+ [
+ batch_size,
+ ],
+ MEAN_TO_DATE,
+ ),
+ target_noise=target_noise,
), target_value
@tf.function
def gen_std_to_random_date(
- date_info: tf.Tensor,
- history: tf.Tensor,
- noise: tf.Tensor,
- target_dates: tf.Tensor,
- target_values: tf.Tensor,
- target_noise: tf.Tensor
- ):
+ date_info: tf.Tensor,
+ history: tf.Tensor,
+ noise: tf.Tensor,
+ target_dates: tf.Tensor,
+ target_values: tf.Tensor,
+ target_noise: tf.Tensor,
+):
# To limit to a single date
batch_size = tf.shape(target_dates)[0]
- targets = tf.random.uniform(shape=[batch_size, 1], minval=(TARGET_LEN // 2), maxval=TARGET_LEN, dtype=tf.int32)
+ targets = tf.random.uniform(
+ shape=[batch_size, 1],
+ minval=(TARGET_LEN // 2),
+ maxval=TARGET_LEN,
+ dtype=tf.int32,
+ )
target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1)
target_value = tf.math.reduce_std(
- tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
- keepdims=True,
- axis=-1
- )
+ tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
+ keepdims=True,
+ axis=-1,
+ )
target_noise_std = tf.math.reduce_std(
- tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)),
- keepdims=True,
- axis=-1
- )
+ tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)),
+ keepdims=True,
+ axis=-1,
+ )
target_value = tf.math.sqrt(target_value**2 + target_noise_std**2)
return dict(
ts=date_info,
- history=history*noise,
+ history=history * noise,
noise=noise,
target_ts=target_date,
- task=tf.fill([batch_size,], STDEV_TO_DATE),
- target_noise=target_noise
+ task=tf.fill(
+ [
+ batch_size,
+ ],
+ STDEV_TO_DATE,
+ ),
+ target_noise=target_noise,
), target_value
+
@tf.function
def gen_random_single_point_no_noise(
- date_info: tf.Tensor,
- history: tf.Tensor,
- noise: tf.Tensor,
- target_dates: tf.Tensor,
- target_values: tf.Tensor,
- target_noise: tf.Tensor
- ):
-
-
+ date_info: tf.Tensor,
+ history: tf.Tensor,
+ noise: tf.Tensor,
+ target_dates: tf.Tensor,
+ target_values: tf.Tensor,
+ target_noise: tf.Tensor,
+):
# To limit to a single date
batch_size = tf.shape(target_dates)[0]
- targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32)
+ targets = tf.random.uniform(
+ shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32
+ )
target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1)
target_value = tf.gather(target_values, targets, axis=1, batch_dims=1)
return dict(
ts=date_info,
history=history,
target_ts=target_date,
- task=tf.fill([batch_size,], SINGLE_POINT),
+ task=tf.fill(
+ [
+ batch_size,
+ ],
+ SINGLE_POINT,
+ ),
), target_value
@tf.function
def gen_mean_to_random_date_no_noise(
- date_info: tf.Tensor,
- history: tf.Tensor,
- noise: tf.Tensor,
- target_dates: tf.Tensor,
- target_values: tf.Tensor,
- target_noise: tf.Tensor
- ):
+ date_info: tf.Tensor,
+ history: tf.Tensor,
+ noise: tf.Tensor,
+ target_dates: tf.Tensor,
+ target_values: tf.Tensor,
+ target_noise: tf.Tensor,
+):
# To limit to a single date
batch_size = tf.shape(target_dates)[0]
- targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32)
+ targets = tf.random.uniform(
+ shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32
+ )
target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1)
target_value = tf.math.reduce_mean(
- tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
- keepdims=True,
- axis=-1
- )
+ tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
+ keepdims=True,
+ axis=-1,
+ )
return dict(
ts=date_info,
history=history,
target_ts=target_date,
- task=tf.fill([batch_size,], MEAN_TO_DATE),
+ task=tf.fill(
+ [
+ batch_size,
+ ],
+ MEAN_TO_DATE,
+ ),
), target_value
@tf.function
def gen_std_to_random_date_no_noise(
- date_info: tf.Tensor,
- history: tf.Tensor,
- noise: tf.Tensor,
- target_dates: tf.Tensor,
- target_values: tf.Tensor,
- target_noise: tf.Tensor
- ):
+ date_info: tf.Tensor,
+ history: tf.Tensor,
+ noise: tf.Tensor,
+ target_dates: tf.Tensor,
+ target_values: tf.Tensor,
+ target_noise: tf.Tensor,
+):
# To limit to a single date
batch_size = tf.shape(target_dates)[0]
- targets = tf.random.uniform(shape=[batch_size, 1], minval=(TARGET_LEN // 2), maxval=TARGET_LEN, dtype=tf.int32)
+ targets = tf.random.uniform(
+ shape=[batch_size, 1],
+ minval=(TARGET_LEN // 2),
+ maxval=TARGET_LEN,
+ dtype=tf.int32,
+ )
target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1)
target_value = tf.math.reduce_std(
- tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
- keepdims=True,
- axis=-1
- )
+ tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)),
+ keepdims=True,
+ axis=-1,
+ )
target_noise_std = tf.math.reduce_std(
- tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)),
- keepdims=True,
- axis=-1
- )
+ tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)),
+ keepdims=True,
+ axis=-1,
+ )
target_value = tf.math.sqrt(target_value**2 + target_noise_std**2)
@@ -226,9 +275,15 @@ def gen_std_to_random_date_no_noise(
ts=date_info,
history=history,
target_ts=target_date,
- task=tf.fill([batch_size,], STDEV_TO_DATE),
+ task=tf.fill(
+ [
+ batch_size,
+ ],
+ STDEV_TO_DATE,
+ ),
), target_value
+
@tf.function
def filter_unusable_points(X: Dict[str, tf.Tensor], y: tf.Tensor):
"""
@@ -236,8 +291,17 @@ def filter_unusable_points(X: Dict[str, tf.Tensor], y: tf.Tensor):
"""
return tf.logical_and(tf.reduce_max(X['history']) > 0.1, tf.math.is_finite(y))[0]
+
def position_encoding(periods: int, freqs: int):
- return np.hstack([
- np.fromfunction(lambda i, j: np.sin(np.pi / periods * (2**j) * (i-1)), (periods + 1, freqs)),
- np.fromfunction(lambda i, j: np.cos(np.pi / periods * (2**j) * (i-1)), (periods + 1, freqs))
- ])
\ No newline at end of file
+ return np.hstack(
+ [
+ np.fromfunction(
+ lambda i, j: np.sin(np.pi / periods * (2**j) * (i - 1)),
+ (periods + 1, freqs),
+ ),
+ np.fromfunction(
+ lambda i, j: np.cos(np.pi / periods * (2**j) * (i - 1)),
+ (periods + 1, freqs),
+ ),
+ ]
+ )
diff --git a/src/training/scalers.py b/src/training/scalers.py
index 5eed7f0..ba5c510 100644
--- a/src/training/scalers.py
+++ b/src/training/scalers.py
@@ -2,9 +2,8 @@
Module containing different scaler functions
"""
import tensorflow as tf
-import tensorflow_io
from tensorflow.keras import layers
-import numpy as np
+
def robust_scaler(inputs, epsilon):
# inputs.shape = (batch_size, history_len, 1)
@@ -29,14 +28,10 @@ def robust_scaler(inputs, epsilon):
# calculate mean and std of clipped data
clipped_mean = tf.math.reduce_mean(
- clipped_and_masked,
- axis=1,
- keepdims=True
+ clipped_and_masked, axis=1, keepdims=True
).to_tensor()
clipped_std = tf.math.reduce_std(
- clipped_and_masked,
- axis=1,
- keepdims=True
+ clipped_and_masked, axis=1, keepdims=True
).to_tensor()
# scale is of shape (batch_size,1,1)
@@ -51,10 +46,8 @@ def robust_scaler(inputs, epsilon):
def max_scaling(inputs, epsilon):
-
scaler = layers.GlobalMaxPooling1D(name='MaxScaling', keepdims=1)
scale = scaler(inputs) + epsilon
output = inputs / scale
return scale, output
-
diff --git a/src/training/train.py b/src/training/train.py
index 3271f6e..eb46e7b 100644
--- a/src/training/train.py
+++ b/src/training/train.py
@@ -2,30 +2,28 @@
Module to train the model
"""
-from keras import backend
-import yaml
-import datetime
import argparse
-import tensorflow as tf
-from tensorflow import keras
+import datetime
+
import numpy as np
-import tensorflow_io
-from utils import load_tf_dataset
-from models import TransformerModel
-from create_train_test_df import create_train_test_df
+import tensorflow as tf
+import yaml
from config_variables import Config
-from metalearned_validation import prepare_metalearned_test
+from create_train_test_df import create_train_test_df
+from keras import backend
+from models import TransformerModel
+from utils import load_tf_dataset
def get_combined_ds(config):
- version = config["version"]
+ version = config['version']
# all the datasets we have. Ideally we use only 3 of these for trainig
# adjust the values in this list accordingly
datasets = [
- load_tf_dataset(config["prefix"] + f"{version}/daily.tfrecords"),
- load_tf_dataset(config["prefix"] + f"{version}/weekly.tfrecords"),
- load_tf_dataset(config["prefix"] + f"{version}/monthly.tfrecords"),
+ load_tf_dataset(config['prefix'] + f'{version}/daily.tfrecords'),
+ load_tf_dataset(config['prefix'] + f'{version}/weekly.tfrecords'),
+ load_tf_dataset(config['prefix'] + f'{version}/monthly.tfrecords'),
]
combined_ds = tf.data.Dataset.choose_from_datasets(
@@ -40,21 +38,21 @@ def main():
np.random.seed(42)
parser = argparse.ArgumentParser()
- parser.add_argument("-c", "--config", required=True, help="Path to config file")
+ parser.add_argument('-c', '--config', required=True, help='Path to config file')
args = parser.parse_args()
with open(args.config) as config_file:
config = yaml.load(config_file, yaml.loader.SafeLoader)
- Config.set_sub_day(config["sub_day"])
+ Config.set_sub_day(config['sub_day'])
combined_ds = get_combined_ds(config)
- train_df, test_df = create_train_test_df(combined_ds, config["test_noise"])
+ train_df, test_df = create_train_test_df(combined_ds, config['test_noise'])
model = TransformerModel(scaler=config['scaler'])
def smape(y_true, y_pred):
- """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
+ """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`.
`loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)`
Args:
y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
@@ -72,20 +70,19 @@ def smape(y_true, y_pred):
# need these two lines, else fit gives error
batch_X, batch_y = next(iter(train_df.batch(2).take(1)))
- pred_y = model(batch_X)
-
+ model(batch_X)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss=tf.keras.losses.MeanSquaredError(),
- metrics=[tf.keras.metrics.MeanAbsolutePercentageError(name='mape'),
- tf.keras.metrics.MeanSquaredError(name='mse'),
- smape,
- ]
+ metrics=[
+ tf.keras.metrics.MeanAbsolutePercentageError(name='mape'),
+ tf.keras.metrics.MeanSquaredError(name='mse'),
+ smape,
+ ],
)
class AdditionalValidationSets(tf.keras.callbacks.Callback):
-
def __init__(self, validation_sets, tbCallback, verbose=1, batch_size=1):
"""
:param validation_sets:
@@ -126,35 +123,35 @@ def on_epoch_end(self, epoch, logs=None):
sample_weights = None
else:
raise ValueError()
-
+
print(validation_set_name)
- results = self.model.evaluate(x=validation_data,
- verbose=self.verbose,
- sample_weight=sample_weights,
- batch_size=self.batch_size)
+ results = self.model.evaluate(
+ x=validation_data,
+ verbose=self.verbose,
+ sample_weight=sample_weights,
+ batch_size=self.batch_size,
+ )
for metric, result in zip(self.model.metrics_names, results):
valuename = validation_set_name + '_' + metric
self.history.setdefault(valuename, []).append(result)
with self.tbCallback._val_writer.as_default(step=epoch):
- tf.summary.scalar(valuename, result)
-
-
+ tf.summary.scalar(valuename, result)
+ fit_id = '.'.join(
+ [config['model_save_name'], datetime.datetime.now().strftime('%Y%m%d-%H%M%S')]
+ )
- fit_id = '.'.join([config["model_save_name"],
- datetime.datetime.now().strftime("%Y%m%d-%H%M%S")])
-
- logdir = f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}"
+ logdir = f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}'
tbCallback = tf.keras.callbacks.TensorBoard(logdir)
- tbCallback._val_dir = logdir+'/validation'
+ tbCallback._val_dir = logdir + '/validation'
callbacks = tf.keras.callbacks.CallbackList(
callbacks=[
tf.keras.callbacks.ModelCheckpoint(
- config["prefix"] + f"models/{fit_id}/ckpts", monitor="loss", verbose=1
+ config['prefix'] + f'models/{fit_id}/ckpts', monitor='loss', verbose=1
),
tf.keras.callbacks.TensorBoard(
- f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}"
+ f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}'
),
],
add_history=True,
@@ -162,10 +159,10 @@ def on_epoch_end(self, epoch, logs=None):
model=model,
)
-
model.fit(
- train_df.shuffle(5_000, reshuffle_each_iteration=True).batch(
- 1024).prefetch(tf.data.AUTOTUNE),
+ train_df.shuffle(5_000, reshuffle_each_iteration=True)
+ .batch(1024)
+ .prefetch(tf.data.AUTOTUNE),
# train_df.take(1000_000).cache().shuffle(100_000).batch(1024).prefetch(tf.data.AUTOTUNE),
validation_data=test_df.batch(1024, drop_remainder=False).cache(),
epochs=700,
@@ -173,8 +170,8 @@ def on_epoch_end(self, epoch, logs=None):
callbacks=callbacks,
)
- model.save(config["prefix"] + 'models/'+ config["model_save_name"])
+ model.save(config['prefix'] + 'models/' + config['model_save_name'])
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/src/training/utils.py b/src/training/utils.py
index e34a4d7..3445d5b 100644
--- a/src/training/utils.py
+++ b/src/training/utils.py
@@ -2,9 +2,9 @@
Utility functions for training script
"""
import tensorflow as tf
-import tensorflow_io
from constants import TF_SCHEMA
+
def decode_fn(record_bytes):
"""
Method to process bytes from tfrecord files