diff --git a/README.md b/README.md index 78bec70..386c6b9 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ ForecastPFN is more accurate and faster compared to state-of-the-art forecasting The codebase has these parts: - `./src/` contains all code to replicate the ForecastPFN synthetic data generation and training procedure -- `./benchmark/` contains all the code to replicate the benchmark of ForecastPFN against the the other baselines. +- `./benchmark/` contains all the code to replicate the benchmark of ForecastPFN against the other baselines. # Table of contents 1. [Installation](#installation-) @@ -80,7 +80,7 @@ The arguments that are passed are: See how our model performs: ![alt text](img/fpfn_performance.png?raw=true) -The above figure shows analysis of performance vs. train budget, aggregated across datasets and prediction lengths. We plot the number of total MSE wins (left) where a higher value is better and mean MSE rank (right) where a lower values is better. Error bars show one standard deviation across training runs. ForecastPFN and Meta-N-BEATS are disadvantaged in these comparisons given that they see no training data for these series, only the length 36 input. +The above figure shows an analysis of performance vs. train budget, aggregated across datasets and prediction lengths. We plot the number of total MSE wins (left) where a higher value is better and mean MSE rank (right) where a lower value is better. Error bars show one standard deviation across training runs. ForecastPFN and Meta-N-BEATS are disadvantaged in these comparisons given that they see no training data for these series, only the length 36 input. # Synthetic Data Generation ForecastPFN is completely trained on synthetic data. diff --git a/benchmark/.DS_Store b/benchmark/.DS_Store deleted file mode 100644 index d6bded0..0000000 Binary files a/benchmark/.DS_Store and /dev/null differ diff --git a/benchmark/README.md b/benchmark/README.md index 7b051d3..6ec8da7 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -1,4 +1,4 @@ -This directory is for evaluation of ForecastPFN. We have evaluated ForecastPFN on seven real-world datasets which have been used in the literature. The datasets are in the `../academic_data` folder. The datasets include Illness, Exchange, ECL, ETTh1 and ETTh2, Weather and Traffic. +This directory is for the evaluation of ForecastPFN. We have evaluated ForecastPFN on seven real-world datasets that have been used in the literature. The datasets are in the `../academic_data` folder. The datasets include Illness, Exchange, ECL, ETTh1 and ETTh2, Weather and Traffic. The evaluation has been done against multiple baselines which include Arima, Prophet, Informer, Fedformer-w, Autoformer, Transformer and Metalearn, as well as more simple baselines Mean, Last, and NaiveSeasonal. @@ -24,12 +24,12 @@ The arguments that are passed are: - `root_path` : This denotes the parent directory which contains the required dataset. - `data_path` : This denotes the name of the file which contains the data. Look into the academic_data folder for information regarding other dataset files. - `model` : This is one of (ForecastPFN, Metalearn, Arima, Autoformer, Informer, Transformer, FEDformer-w, Prophet) -- `seq_len` : The length of input sequence to be used. In our default setting, we have this set to 96 for exchange and 36 for all other datasets. +- `seq_len` : The length of the input sequence to be used. In our default setting, we have this set to 96 for exchange and 36 for all other datasets. - `label_len` : In our default setting, we have this set to 48 for exchange and 18 for all other datasets. -- `pred_len` : This is the length of prediction to be made. We have evaluated our model with various prediction lengths. -- `train_budget` : This denotes the number of training examples that are available to the models which they can use for training. ForecastPFN and Metalearn use 0 examples since they are zero-shot. +- `pred_len` : This is the length of the prediction to be made. We have evaluated our model with various prediction lengths. +- `train_budget` : This denotes the number of training examples that are available to the models that they can use for training. ForecastPFN and Metalearn use 0 examples since they are zero-shot. - `itr` : Number of times evaluation should be repeated. This affects the transformer-based models since they are non-deterministic. All experiments that have been run for this paper can be found in `run.sh`. -Repliaction of the paper tables and plots can be found in the jupyter notebook `./analyze_results.ipynb`. \ No newline at end of file +Replication of the paper tables and plots can be found in the jupyter notebook `./analyze_results.ipynb`. \ No newline at end of file diff --git a/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py b/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py index cdbc5aa..e87ab68 100644 --- a/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py +++ b/benchmark/data_provider/UnivariateTimeseriesSampler_WithStamps.py @@ -1,17 +1,17 @@ import numpy as np -import pandas as pd -import datetime + class UnivariateTimeseriesSampler_WithStamps: - def __init__(self, - timeseries: np.ndarray, - time_stamps: np.ndarray, - insample_size: int, - outsample_size: int, - window_sampling_limit: int, - batch_size: int, - time_features, - ): + def __init__( + self, + timeseries: np.ndarray, + time_stamps: np.ndarray, + insample_size: int, + outsample_size: int, + window_sampling_limit: int, + batch_size: int, + time_features, + ): self.timeseries = [ts for ts in timeseries] self.time_stamps = [ts for ts in time_stamps] self.window_sampling_limit = window_sampling_limit @@ -20,7 +20,6 @@ def __init__(self, self.outsample_size = outsample_size self.time_features = time_features self.time_embedding_dim = self.time_features(self.time_stamps[0]).T.shape[0] - def __iter__(self): while True: @@ -28,47 +27,79 @@ def __iter__(self): insample_mask = np.zeros((self.batch_size, self.insample_size)) outsample = np.zeros((self.batch_size, self.outsample_size)) outsample_mask = np.zeros((self.batch_size, self.outsample_size)) - sampled_ts_indices = np.random.randint(len(self.timeseries), size=self.batch_size) + sampled_ts_indices = np.random.randint( + len(self.timeseries), size=self.batch_size + ) insample_time_stamps = np.zeros( - (self.batch_size, self.insample_size, self.time_embedding_dim), dtype=object) + (self.batch_size, self.insample_size, self.time_embedding_dim), + dtype=object, + ) outsample_time_stamps = np.zeros( - (self.batch_size, self.outsample_size, self.time_embedding_dim), dtype=object) + (self.batch_size, self.outsample_size, self.time_embedding_dim), + dtype=object, + ) for i, sampled_index in enumerate(sampled_ts_indices): sampled_timeseries = self.timeseries[sampled_index] - cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit), - high=len(sampled_timeseries), - size=1)[0] + cut_point = np.random.randint( + low=max(1, len(sampled_timeseries) - self.window_sampling_limit), + high=len(sampled_timeseries), + size=1, + )[0] - insample_window = sampled_timeseries[max(0, cut_point - self.insample_size):cut_point] - insample[i, -len(insample_window):] = insample_window - insample_mask[i, -len(insample_window):] = 1.0 + insample_window = sampled_timeseries[ + max(0, cut_point - self.insample_size) : cut_point + ] + insample[i, -len(insample_window) :] = insample_window + insample_mask[i, -len(insample_window) :] = 1.0 outsample_window = sampled_timeseries[ - cut_point:min(len(sampled_timeseries), cut_point + self.outsample_size)] - outsample[i, :len(outsample_window)] = outsample_window - outsample_mask[i, :len(outsample_window)] = 1.0 + cut_point : min( + len(sampled_timeseries), cut_point + self.outsample_size + ) + ] + outsample[i, : len(outsample_window)] = outsample_window + outsample_mask[i, : len(outsample_window)] = 1.0 sampled_timestamps = self.time_stamps[sampled_index] - insample_window_time_stamps = sampled_timestamps[max(0, cut_point - self.insample_size):cut_point] - insample_time_stamps[i, -len(insample_window_time_stamps):] = self.time_features(insample_window_time_stamps) + insample_window_time_stamps = sampled_timestamps[ + max(0, cut_point - self.insample_size) : cut_point + ] + insample_time_stamps[ + i, -len(insample_window_time_stamps) : + ] = self.time_features(insample_window_time_stamps) outsample_window_timestamps = sampled_timestamps[ - cut_point:min(len(sampled_timestamps), cut_point + self.outsample_size)] - outsample_time_stamps[i, :len(outsample_window_timestamps)] = self.time_features(outsample_window_timestamps) - yield insample, insample_mask, outsample, outsample_mask, insample_time_stamps, outsample_time_stamps + cut_point : min( + len(sampled_timestamps), cut_point + self.outsample_size + ) + ] + outsample_time_stamps[ + i, : len(outsample_window_timestamps) + ] = self.time_features(outsample_window_timestamps) + yield ( + insample, + insample_mask, + outsample, + outsample_mask, + insample_time_stamps, + outsample_time_stamps, + ) def sequential_latest_insamples(self): batch_size = len(self.timeseries) insample = np.zeros((batch_size, self.insample_size)) insample_mask = np.zeros((batch_size, self.insample_size)) insample_time_stamps = np.zeros( - (batch_size, self.insample_size, self.time_embedding_dim), dtype=object) + (batch_size, self.insample_size, self.time_embedding_dim), dtype=object + ) for i, (ts, time_stamp) in enumerate(zip(self.timeseries, self.time_stamps)): - ts_last_window = ts[-self.insample_size:] - insample[i, -len(ts):] = ts_last_window - insample_mask[i, -len(ts):] = 1.0 + ts_last_window = ts[-self.insample_size :] + insample[i, -len(ts) :] = ts_last_window + insample_mask[i, -len(ts) :] = 1.0 sampled_timestamps = time_stamp - insample_window_time_stamps = sampled_timestamps[-self.insample_size:] - insample_time_stamps[i, -len(insample_window_time_stamps):] = self.time_features(insample_window_time_stamps) + insample_window_time_stamps = sampled_timestamps[-self.insample_size :] + insample_time_stamps[ + i, -len(insample_window_time_stamps) : + ] = self.time_features(insample_window_time_stamps) return insample, insample_mask, insample_time_stamps diff --git a/benchmark/data_provider/data_factory.py b/benchmark/data_provider/data_factory.py index 42f96ff..6ed4bc4 100644 --- a/benchmark/data_provider/data_factory.py +++ b/benchmark/data_provider/data_factory.py @@ -1,5 +1,7 @@ -from data_provider.data_loader import Dataset_Custom from torch.utils.data import DataLoader + +from data_provider.data_loader import Dataset_Custom + # from metalearned.resources.electricity.dataset import ElectricityDataset, ElectricityMeta # from metalearned.resources.m3.dataset import M3Dataset, M3Meta # from metalearned.resources.m4.dataset import M4Dataset, M4Meta @@ -74,5 +76,6 @@ def data_provider(args, flag): batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, - drop_last=drop_last) + drop_last=drop_last, + ) return data_set, data_loader diff --git a/benchmark/data_provider/data_loader.py b/benchmark/data_provider/data_loader.py index 616590c..566b74c 100644 --- a/benchmark/data_provider/data_loader.py +++ b/benchmark/data_provider/data_loader.py @@ -1,23 +1,33 @@ import os -import numpy as np +import warnings + import pandas as pd -import os -import torch -from torch.utils.data import Dataset, DataLoader -from utils.timefeatures import time_features from sklearn.preprocessing import StandardScaler -import warnings +from torch.utils.data import Dataset + +from utils.timefeatures import time_features warnings.filterwarnings('ignore') + class Dataset_Custom(Dataset): - def __init__(self, root_path, flag='train', size=None, - features='S', data_path='ETTh1.csv', - target='OT', scale=True, timeenc=0, freq='h', - scaler=StandardScaler(), train_budget=None): + def __init__( + self, + root_path, + flag='train', + size=None, + features='S', + data_path='ETTh1.csv', + target='OT', + scale=True, + timeenc=0, + freq='h', + scaler=StandardScaler(), + train_budget=None, + ): # size [seq_len, label_len, pred_len] # info - if size == None: + if size is None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 @@ -43,12 +53,11 @@ def __init__(self, root_path, flag='train', size=None, self.__read_data__() def __read_data__(self): - df_raw = pd.read_csv(os.path.join(self.root_path, - self.data_path)) + df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) - ''' + """ df_raw.columns: ['date', ...(other features), target feature] - ''' + """ cols = list(df_raw.columns) cols.remove(self.target) cols.remove('date') @@ -60,10 +69,13 @@ def __read_data__(self): train_start = 0 if self.train_budget: - train_start = max(train_start, num_train - - self.seq_len - self.train_budget) + train_start = max(train_start, num_train - self.seq_len - self.train_budget) - border1s = [train_start, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] + border1s = [ + train_start, + num_train - self.seq_len, + len(df_raw) - num_test - self.seq_len, + ] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] @@ -75,7 +87,7 @@ def __read_data__(self): df_data = df_raw[[self.target]] if self.scale: - train_data = df_data[0:border2s[0]] + train_data = df_data[0 : border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: @@ -91,7 +103,9 @@ def __read_data__(self): df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: - data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) + data_stamp = time_features( + pd.to_datetime(df_stamp['date'].values), freq=self.freq + ) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] @@ -108,10 +122,10 @@ def __getitem__(self, index): seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] - seq_x_original = self.data_stamp_original['date'].values[s_begin:s_end] - seq_y_original = self.data_stamp_original['date'].values[r_begin:r_end] + # seq_x_original = self.data_stamp_original["date"].values[s_begin:s_end] + # seq_y_original = self.data_stamp_original["date"].values[r_begin:r_end] - return seq_x, seq_y, seq_x_mark, seq_y_mark#, seq_x_original, seq_y_original + return seq_x, seq_y, seq_x_mark, seq_y_mark # , seq_x_original, seq_y_original def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 @@ -121,13 +135,24 @@ def inverse_transform(self, data): class Dataset_Pred(Dataset): - def __init__(self, root_path, flag='pred', size=None, - features='S', data_path='ETTh1.csv', - target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None, - scaler=StandardScaler()): + def __init__( + self, + root_path, + flag='pred', + size=None, + features='S', + data_path='ETTh1.csv', + target='OT', + scale=True, + inverse=False, + timeenc=0, + freq='15min', + cols=None, + scaler=StandardScaler(), + ): # size [seq_len, label_len, pred_len] # info - if size == None: + if size is None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 @@ -151,11 +176,10 @@ def __init__(self, root_path, flag='pred', size=None, self.__read_data__() def __read_data__(self): - df_raw = pd.read_csv(os.path.join(self.root_path, - self.data_path)) - ''' + df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) + """ df_raw.columns: ['date', ...(other features), target feature] - ''' + """ if self.cols: cols = self.cols.copy() cols.remove(self.target) @@ -181,7 +205,9 @@ def __read_data__(self): tmp_stamp = df_raw[['date']][border1:border2] tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date) - pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq) + pred_dates = pd.date_range( + tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq + ) df_stamp = pd.DataFrame(columns=['date']) df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:]) @@ -194,7 +220,9 @@ def __read_data__(self): df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: - data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) + data_stamp = time_features( + pd.to_datetime(df_stamp['date'].values), freq=self.freq + ) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] @@ -212,9 +240,9 @@ def __getitem__(self, index): seq_x = self.data_x[s_begin:s_end] if self.inverse: - seq_y = self.data_x[r_begin:r_begin + self.label_len] + seq_y = self.data_x[r_begin : r_begin + self.label_len] else: - seq_y = self.data_y[r_begin:r_begin + self.label_len] + seq_y = self.data_y[r_begin : r_begin + self.label_len] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] diff --git a/benchmark/exp/exp_ForecastPFN.py b/benchmark/exp/exp_ForecastPFN.py index 2e17d7a..b880da7 100644 --- a/benchmark/exp/exp_ForecastPFN.py +++ b/benchmark/exp/exp_ForecastPFN.py @@ -1,25 +1,26 @@ +import datetime import os +import time import warnings + import numpy as np -import torch -import torch.nn as nn import pandas as pd -import datetime -import time -from data_provider.data_factory import data_provider +import tensorflow as tf +import torch +from sklearn.preprocessing import StandardScaler + from exp.exp_basic import Exp_Basic -from utils.metrics import metric from utils.metrics import smape -import tensorflow as tf -import tensorflow_io -from sklearn.preprocessing import StandardScaler, MinMaxScaler + gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: - try: - tf.config.experimental.set_virtual_device_configuration( - gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)]) - except RuntimeError as e: - print(e) + try: + tf.config.experimental.set_virtual_device_configuration( + gpus[0], + [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)], + ) + except RuntimeError as e: + print(e) warnings.filterwarnings('ignore') @@ -30,24 +31,25 @@ def __init__(self, args): super(Exp_ForecastPFN, self).__init__(args) def _build_model(self): - return + pass def train(self, setting): - return - + pass + def _ForecastPFN_time_features(self, ts: np.ndarray): if type(ts[0]) == datetime.datetime: year = [x.year for x in ts] month = [x.month for x in ts] day = [x.day for x in ts] - day_of_week = [x.weekday()+1 for x in ts] + day_of_week = [x.weekday() + 1 for x in ts] day_of_year = [x.timetuple().tm_yday for x in ts] return np.stack([year, month, day, day_of_week, day_of_year], axis=-1) ts = pd.to_datetime(ts) - return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) + return np.stack( + [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1 + ) - def _process_tuple(self,x,x_mark,y_mark, - model, horizon): + def _process_tuple(self, x, x_mark, y_mark, model, horizon): """ x: tensor of shape (n, 1) x_mark: tensor of shape (n, d) @@ -74,7 +76,7 @@ def _process_tuple(self,x,x_mark,y_mark, history_std = np.nanstd(history[-6:]) # local scale, don't know why defined so - local_scale = (history_mean + history_std + 1e-4) + local_scale = history_mean + history_std + 1e-4 # change history based on local scale, to normalize it between 0 and 1 history = np.clip(history / local_scale, a_min=0, a_max=1) @@ -85,44 +87,60 @@ def _process_tuple(self,x,x_mark,y_mark, target = tf.convert_to_tensor(x_mark)[-100:, :] history = tf.convert_to_tensor(history)[-100:, :] else: - target = tf.pad(x_mark.cpu(), [[100-x.shape[0], 0], [0, 0]]) - history = tf.pad(history, [[100-x.shape[0], 0], [0, 0]]) + target = tf.pad(x_mark.cpu(), [[100 - x.shape[0], 0], [0, 0]]) + history = tf.pad(history, [[100 - x.shape[0], 0], [0, 0]]) - history = tf.repeat(tf.expand_dims(history, axis=0), [ - horizon], axis=0)[:, :, 0] - ts = tf.repeat(tf.expand_dims( - target, axis=0), [horizon], axis=0) + history = tf.repeat(tf.expand_dims(history, axis=0), [horizon], axis=0)[ + :, :, 0 + ] + ts = tf.repeat(tf.expand_dims(target, axis=0), [horizon], axis=0) else: - ts = tf.convert_to_tensor(x_mark.unsqueeze(0).repeat( - horizon, 1, 1), dtype=tf.int64) + ts = tf.convert_to_tensor( + x_mark.unsqueeze(0).repeat(horizon, 1, 1), dtype=tf.int64 + ) history = tf.convert_to_tensor(history, dtype=tf.float32) - task = tf.fill([horizon, ], 1) + task = tf.fill( + [ + horizon, + ], + 1, + ) target_ts = tf.convert_to_tensor( - y_mark.cpu()[-horizon:, :].unsqueeze(1), dtype=tf.int64) - - model_input = {'ts': ts, 'history': history, - 'target_ts': target_ts, 'task': task} + y_mark.cpu()[-horizon:, :].unsqueeze(1), dtype=tf.int64 + ) + + model_input = { + 'ts': ts, + 'history': history, + 'target_ts': target_ts, + 'task': task, + } t1 = time.time() pred_vals = model(model_input) time_diff = time.time() - t1 - scaled_vals = pred_vals['result'].numpy( - ).T.reshape(-1) * pred_vals['scale'].numpy().reshape(-1) + scaled_vals = pred_vals['result'].numpy().T.reshape(-1) * pred_vals[ + 'scale' + ].numpy().reshape(-1) scaled_vals = scaler.inverse_transform([scaled_vals]) return scaled_vals, time_diff - - def _ForecastPFN_process_batch(self, model, batch_x, batch_y, batch_x_mark, batch_y_mark): + + def _ForecastPFN_process_batch( + self, model, batch_x, batch_y, batch_x_mark, batch_y_mark + ): preds = [] trues = [] - for idx, (x, y, x_mark, y_mark) in enumerate(zip(batch_x, batch_y, batch_x_mark, batch_y_mark)): - + for idx, (x, y, x_mark, y_mark) in enumerate( + zip(batch_x, batch_y, batch_x_mark, batch_y_mark) + ): pred, time_diff = self._process_tuple( - x, x_mark, y_mark, model, self.args.pred_len) + x, x_mark, y_mark, model, self.args.pred_len + ) - y = y[-self.args.pred_len:, :].to(self.device) + y = y[-self.args.pred_len :, :].to(self.device) true = y.detach().cpu().numpy() - + preds += [pred] trues += [true] return preds, trues, time_diff @@ -130,11 +148,13 @@ def _ForecastPFN_process_batch(self, model, batch_x, batch_y, batch_x_mark, batc def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') test_data.data_stamp = self._ForecastPFN_time_features( - list(test_data.data_stamp_original['date'])) + list(test_data.data_stamp_original['date']) + ) if test: print('loading model') pretrained = tf.keras.models.load_model( - self.args.model_path, custom_objects={'smape': smape}) + self.args.model_path, custom_objects={'smape': smape} + ) preds = [] trues = [] @@ -145,7 +165,9 @@ def test(self, setting, test=0): self.test_timer.start_timer() timer = 0 with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + test_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) @@ -153,9 +175,10 @@ def test(self, setting, test=0): batch_y_mark = batch_y_mark.float().to(self.device) pred, true, time = self._ForecastPFN_process_batch( - pretrained, batch_x, batch_y, batch_x_mark, batch_y_mark) + pretrained, batch_x, batch_y, batch_x_mark, batch_y_mark + ) timer += time - + preds.append(pred) trues.append(true) diff --git a/benchmark/exp/exp_arima.py b/benchmark/exp/exp_arima.py index d4e007b..03fdd97 100644 --- a/benchmark/exp/exp_arima.py +++ b/benchmark/exp/exp_arima.py @@ -1,15 +1,9 @@ -import os -import time import warnings -import numpy as np -import torch -import torch.nn as nn -from torch import optim -from data_provider.data_factory import data_provider -from exp.exp_basic import Exp_Basic -from utils.metrics import metric -import pmdarima + import pandas as pd +import pmdarima + +from exp.exp_basic import Exp_Basic warnings.filterwarnings('ignore') @@ -21,11 +15,14 @@ def __init__(self, args): def _build_model(self): return pmdarima.auto_arima - def train(self, setting): train_data, train_loader = self._get_data(flag='train') - train_df = pd.DataFrame({'y': train_data.data_y.T[0], 'ds': list( - pd.to_datetime(train_data.data_stamp_original['date']))}) + train_df = pd.DataFrame( + { + 'y': train_data.data_y.T[0], + 'ds': list(pd.to_datetime(train_data.data_stamp_original['date'])), + } + ) self.train_timer.start_timer() self.model = pmdarima.auto_arima(train_df.y.values) self.train_timer.end_timer() @@ -35,23 +32,29 @@ def test(self, setting, test=0): horizon = self.args.pred_len test_data, test_loader = self._get_data(flag='test') - test_df = pd.DataFrame({'y': test_data.data_y.T[0], 'ds': list( - pd.to_datetime(test_data.data_stamp_original['date']))}) - - cmp = pd.DataFrame({ - 'date': test_df['ds'].values, - 'y': test_df['y'].values, - 'yhat': self.model.predict(test_df.shape[0]) - }) + test_df = pd.DataFrame( + { + 'y': test_data.data_y.T[0], + 'ds': list(pd.to_datetime(test_data.data_stamp_original['date'])), + } + ) + + cmp = pd.DataFrame( + { + 'date': test_df['ds'].values, + 'y': test_df['y'].values, + 'yhat': self.model.predict(test_df.shape[0]), + } + ) preds, trues = [], [] self.test_timer.start_timer() - for i in range(self.args.seq_len, cmp.shape[0]-horizon+1): - pred = cmp[i:i+horizon]['yhat'].values - true = cmp[i:i+horizon]['y'].values + for i in range(self.args.seq_len, cmp.shape[0] - horizon + 1): + pred = cmp[i : i + horizon]['yhat'].values + true = cmp[i : i + horizon]['y'].values preds += [pred] trues += [true] self.test_timer.end_timer() - + return self._save_test_data(setting, preds, trues) diff --git a/benchmark/exp/exp_basic.py b/benchmark/exp/exp_basic.py index 81e526e..e9b2b09 100644 --- a/benchmark/exp/exp_basic.py +++ b/benchmark/exp/exp_basic.py @@ -1,9 +1,12 @@ import os -import torch + import numpy as np +import torch + from data_provider.data_factory import data_provider -from utils.tools import TimeBudget from utils.metrics import metric +from utils.tools import TimeBudget + class Exp_Basic(object): def __init__(self, args): @@ -20,8 +23,9 @@ def _build_model(self): def _acquire_device(self): if self.args.use_gpu: - os.environ["CUDA_VISIBLE_DEVICES"] = str( - self.args.gpu) if not self.args.use_multi_gpu else self.args.devices + os.environ['CUDA_VISIBLE_DEVICES'] = ( + str(self.args.gpu) if not self.args.use_multi_gpu else self.args.devices + ) device = torch.device('cuda:{}'.format(self.args.gpu)) print('Use GPU: cuda:{}'.format(self.args.gpu)) else: @@ -48,8 +52,8 @@ def _save_test_data(self, setting, preds, trues): mae, mse, rmse, mape, mspe = metric(preds, trues) print('mse:{}, mae:{}'.format(mse, mae)) - f = open("result.txt", 'a') - f.write(setting + " \n") + f = open('result.txt', 'a') + f.write(setting + ' \n') f.write('mse:{}, mae:{}'.format(mse, mae)) f.write('\n') f.write('\n') @@ -58,15 +62,15 @@ def _save_test_data(self, setting, preds, trues): output = { 'metrics': { 'mae': mae, - 'mse': mse, - 'rmse': rmse, - 'mape': mape, + 'mse': mse, + 'rmse': rmse, + 'mape': mape, 'mspe': mspe, }, 'train_timer': self.train_timer.total_time, 'vali_timer': self.vali_timer.total_time, 'test_timer': self.test_timer.total_time, - 'args': self.args + 'args': self.args, } print(output) diff --git a/benchmark/exp/exp_last.py b/benchmark/exp/exp_last.py index 8be4ec4..7d8311d 100644 --- a/benchmark/exp/exp_last.py +++ b/benchmark/exp/exp_last.py @@ -1,15 +1,9 @@ -import os -import time import warnings -import numpy as np + +import pmdarima import torch -import torch.nn as nn -from torch import optim -from data_provider.data_factory import data_provider + from exp.exp_basic import Exp_Basic -from utils.metrics import metric -import pmdarima -import pandas as pd warnings.filterwarnings('ignore') @@ -21,25 +15,30 @@ def __init__(self, args): def _build_model(self): return pmdarima.auto_arima - def train(self, setting): - return + pass def test(self, setting, test=0): - horizon = self.args.pred_len - test_data, test_loader = self._get_data(flag='test') preds, trues = [], [] self.test_timer.start_timer() with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + test_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) - true = batch_y[:, -self.args.pred_len:].detach().cpu().numpy() - pred = batch_x[:,-1,:].unsqueeze(1).repeat( - 1, true.shape[1], 1).detach().cpu().numpy() + true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy() + pred = ( + batch_x[:, -1, :] + .unsqueeze(1) + .repeat(1, true.shape[1], 1) + .detach() + .cpu() + .numpy() + ) preds.append(pred) trues.append(true) diff --git a/benchmark/exp/exp_mean.py b/benchmark/exp/exp_mean.py index 326acd7..8546e13 100644 --- a/benchmark/exp/exp_mean.py +++ b/benchmark/exp/exp_mean.py @@ -1,15 +1,9 @@ -import os -import time import warnings -import numpy as np + +import pmdarima import torch -import torch.nn as nn -from torch import optim -from data_provider.data_factory import data_provider + from exp.exp_basic import Exp_Basic -from utils.metrics import metric -import pmdarima -import pandas as pd warnings.filterwarnings('ignore') @@ -21,25 +15,30 @@ def __init__(self, args): def _build_model(self): return pmdarima.auto_arima - def train(self, setting): - return + pass def test(self, setting, test=0): - horizon = self.args.pred_len - test_data, test_loader = self._get_data(flag='test') preds, trues = [], [] self.test_timer.start_timer() with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + test_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) - true = batch_y[:, -self.args.pred_len:].detach().cpu().numpy() - pred = batch_x.mean(1).unsqueeze(1).repeat( - 1, true.shape[1], 1).detach().cpu().numpy() + true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy() + pred = ( + batch_x.mean(1) + .unsqueeze(1) + .repeat(1, true.shape[1], 1) + .detach() + .cpu() + .numpy() + ) preds.append(pred) trues.append(true) diff --git a/benchmark/exp/exp_metalearn.py b/benchmark/exp/exp_metalearn.py index 075056e..bb317e1 100644 --- a/benchmark/exp/exp_metalearn.py +++ b/benchmark/exp/exp_metalearn.py @@ -1,36 +1,30 @@ import os +import time import warnings -import numpy as np + +import tensorflow as tf import torch -import torch.nn as nn -import pandas as pd -import datetime -from data_provider.data_factory import data_provider + from exp.exp_basic import Exp_Basic -from utils.metrics import metric -from utils.metrics import smape -import tensorflow as tf -import tensorflow_io -import time -from sklearn.preprocessing import StandardScaler, MinMaxScaler + gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: - try: - tf.config.experimental.set_virtual_device_configuration( - gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)]) - except RuntimeError as e: - print(e) + try: + tf.config.experimental.set_virtual_device_configuration( + gpus[0], + [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)], + ) + except RuntimeError as e: + print(e) import sys + sys.path.append('metalearned') from metalearned.common.experiment import load_experiment_parameters -from metalearned.common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss +from metalearned.common.torch_utils import SnapshotManager from metalearned.models.nbeats_torch import nbeats_generic, nbeats_interpretable - - - warnings.filterwarnings('ignore') @@ -39,7 +33,6 @@ def __init__(self, args): super(Exp_Metalearn, self).__init__(args) def _build_model(self): - self.args.path = f'metalearned/experiments/tl/ForecastPFN/loss_name=MAPE,input_size={self.args.seq_len},horizon={self.args.pred_len}/' experiment_parameters = load_experiment_parameters(self.args.path) @@ -49,35 +42,39 @@ def _build_model(self): horizon = experiment_parameters['horizon'] if experiment_parameters['model_type'] == 'generic': - model = nbeats_generic(input_size=input_size, - output_size=horizon, - blocks=experiment_parameters['blocks'], - stacks=experiment_parameters['stacks'], - fc_layers=experiment_parameters['layers'], - fc_layers_size=experiment_parameters['width'], - scaling=experiment_parameters['scaling'], - mode=experiment_parameters['mode']) + model = nbeats_generic( + input_size=input_size, + output_size=horizon, + blocks=experiment_parameters['blocks'], + stacks=experiment_parameters['stacks'], + fc_layers=experiment_parameters['layers'], + fc_layers_size=experiment_parameters['width'], + scaling=experiment_parameters['scaling'], + mode=experiment_parameters['mode'], + ) else: - model = nbeats_interpretable(input_size=input_size, - output_size=horizon, - trend_blocks=experiment_parameters['trend_blocks'], - trend_fc_layers=experiment_parameters['layers'], - trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'], - degree_of_polynomial=experiment_parameters['degree_of_polynomial'], - seasonality_blocks=experiment_parameters['seasonality_blocks'], - seasonality_fc_layers=experiment_parameters['layers'], - seasonality_fc_layers_size=experiment_parameters[ - 'seasonality_fc_layers_size'], - num_of_harmonics=experiment_parameters['num_of_harmonics'], - scaling=experiment_parameters['scaling'], - mode=experiment_parameters['mode']) + model = nbeats_interpretable( + input_size=input_size, + output_size=horizon, + trend_blocks=experiment_parameters['trend_blocks'], + trend_fc_layers=experiment_parameters['layers'], + trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'], + degree_of_polynomial=experiment_parameters['degree_of_polynomial'], + seasonality_blocks=experiment_parameters['seasonality_blocks'], + seasonality_fc_layers=experiment_parameters['layers'], + seasonality_fc_layers_size=experiment_parameters[ + 'seasonality_fc_layers_size' + ], + num_of_harmonics=experiment_parameters['num_of_harmonics'], + scaling=experiment_parameters['scaling'], + mode=experiment_parameters['mode'], + ) return model.to(self.device) def train(self, setting): - return - - + pass + def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') @@ -90,14 +87,15 @@ def test(self, setting, test=0): experiment_parameters = self.args.experiment_parameters snapshot_dir = os.path.join(path, 'snapshots', time_freq) - snapshot_manager = SnapshotManager(snapshot_dir=snapshot_dir, - logging_frequency=experiment_parameters['logging_frequency'], - snapshot_frequency=experiment_parameters['snapshot_frequency']) + snapshot_manager = SnapshotManager( + snapshot_dir=snapshot_dir, + logging_frequency=experiment_parameters['logging_frequency'], + snapshot_frequency=experiment_parameters['snapshot_frequency'], + ) self.model.load_state_dict(torch.load(snapshot_manager.model_snapshot_file)) self.model.to(self.device) - preds = [] trues = [] folder_path = './test_results/' + setting + '/' @@ -107,9 +105,11 @@ def test(self, setting, test=0): self.test_timer.start_timer() timer = 0 with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): - batch_x = batch_x.float().to(self.device)[:,:,0] - batch_y = batch_y.float().to(self.device)[:,:,0] + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + test_loader + ): + batch_x = batch_x.float().to(self.device)[:, :, 0] + batch_y = batch_y.float().to(self.device)[:, :, 0] print(batch_x.shape, batch_y.shape) @@ -118,10 +118,10 @@ def test(self, setting, test=0): t1 = time.time() pred = self.model(batch_x, torch.ones(batch_x.shape).to(self.device)) - timer += time.time()-t1 + timer += time.time() - t1 pred = pred.detach().cpu().numpy() - true = batch_y[:,-self.args.pred_len:].detach().cpu().numpy() + true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy() preds.append(pred) trues.append(true) diff --git a/benchmark/exp/exp_prophet.py b/benchmark/exp/exp_prophet.py index a7e30fb..b0710b6 100644 --- a/benchmark/exp/exp_prophet.py +++ b/benchmark/exp/exp_prophet.py @@ -1,13 +1,9 @@ -import os -import time import warnings -import numpy as np -import torch -import torch.nn as nn -from torch import optim -from exp.exp_basic import Exp_Basic -import prophet + import pandas as pd +import prophet + +from exp.exp_basic import Exp_Basic warnings.filterwarnings('ignore') @@ -21,8 +17,12 @@ def _build_model(self): def train(self, setting): train_data, train_loader = self._get_data(flag='train') - train_df = pd.DataFrame({'y': train_data.data_y.T[0], 'ds': list( - pd.to_datetime(train_data.data_stamp_original['date']))}) + train_df = pd.DataFrame( + { + 'y': train_data.data_y.T[0], + 'ds': list(pd.to_datetime(train_data.data_stamp_original['date'])), + } + ) self.train_timer.start_timer() self.model.fit(train_df) self.train_timer.end_timer() @@ -32,24 +32,29 @@ def test(self, setting, test=0): horizon = self.args.pred_len test_data, test_loader = self._get_data(flag='test') - test_df = pd.DataFrame({'y': test_data.data_y.T[0], 'ds': list( - pd.to_datetime(test_data.data_stamp_original['date']))}) - predict_frame = self.model.make_future_dataframe( - test_data.data_x.shape[0]) + test_df = pd.DataFrame( + { + 'y': test_data.data_y.T[0], + 'ds': list(pd.to_datetime(test_data.data_stamp_original['date'])), + } + ) + self.model.make_future_dataframe(test_data.data_x.shape[0]) forecast = self.model.predict(test_df) - cmp = pd.DataFrame({ - 'date': test_df['ds'].values, - 'ds': forecast.ds.values, - 'y': test_df['y'].values, - 'yhat': forecast.yhat.values - }) + cmp = pd.DataFrame( + { + 'date': test_df['ds'].values, + 'ds': forecast.ds.values, + 'y': test_df['y'].values, + 'yhat': forecast.yhat.values, + } + ) preds, trues = [], [] self.test_timer.start_timer() - for i in range(self.args.seq_len, cmp.shape[0]-horizon+1): - pred = cmp[i:i+horizon]['yhat'].values - true = cmp[i:i+horizon]['y'].values + for i in range(self.args.seq_len, cmp.shape[0] - horizon + 1): + pred = cmp[i : i + horizon]['yhat'].values + true = cmp[i : i + horizon]['y'].values preds += [pred] trues += [true] diff --git a/benchmark/exp/exp_resolver.py b/benchmark/exp/exp_resolver.py index 6af7dba..e761189 100644 --- a/benchmark/exp/exp_resolver.py +++ b/benchmark/exp/exp_resolver.py @@ -1,14 +1,14 @@ import warnings -from exp.exp_transformer import Exp_Transformer -from exp.exp_transformer_metalearn import Exp_Transformer_Meta -from exp.exp_ForecastPFN import Exp_ForecastPFN + from exp.exp_arima import Exp_Arima -from exp.exp_prophet import Exp_Prophet -from exp.exp_metalearn import Exp_Metalearn -from exp.exp_mean import Exp_Mean +from exp.exp_ForecastPFN import Exp_ForecastPFN from exp.exp_last import Exp_Last +from exp.exp_mean import Exp_Mean +from exp.exp_metalearn import Exp_Metalearn +from exp.exp_prophet import Exp_Prophet from exp.exp_seasonalNaive import Exp_SeasonalNaive - +from exp.exp_transformer import Exp_Transformer +from exp.exp_transformer_metalearn import Exp_Transformer_Meta warnings.filterwarnings('ignore') diff --git a/benchmark/exp/exp_seasonalNaive.py b/benchmark/exp/exp_seasonalNaive.py index 8a06136..4668e9f 100644 --- a/benchmark/exp/exp_seasonalNaive.py +++ b/benchmark/exp/exp_seasonalNaive.py @@ -1,15 +1,9 @@ -import os -import time import warnings -import numpy as np + +import pmdarima import torch -import torch.nn as nn -from torch import optim -from data_provider.data_factory import data_provider + from exp.exp_basic import Exp_Basic -from utils.metrics import metric -import pmdarima -import pandas as pd warnings.filterwarnings('ignore') @@ -21,25 +15,29 @@ def __init__(self, args): def _build_model(self): return pmdarima.auto_arima - def train(self, setting): - return + pass def test(self, setting, test=0): - horizon = self.args.pred_len - test_data, test_loader = self._get_data(flag='test') preds, trues = [], [] self.test_timer.start_timer() with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + test_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) - true = batch_y[:, -self.args.pred_len:].detach().cpu().numpy() - pred = batch_x[:,-7:,:].repeat( - 1,int(true.shape[1]/7)+1,1)[:,:true.shape[1],:].detach().cpu().numpy() + true = batch_y[:, -self.args.pred_len :].detach().cpu().numpy() + pred = ( + batch_x[:, -7:, :] + .repeat(1, int(true.shape[1] / 7) + 1, 1)[:, : true.shape[1], :] + .detach() + .cpu() + .numpy() + ) preds.append(pred) trues.append(true) diff --git a/benchmark/exp/exp_transformer.py b/benchmark/exp/exp_transformer.py index 8281fb1..f53c411 100644 --- a/benchmark/exp/exp_transformer.py +++ b/benchmark/exp/exp_transformer.py @@ -1,16 +1,15 @@ import os import time import warnings + import numpy as np import torch import torch.nn as nn from torch import optim -from data_provider.data_factory import data_provider -from exp.exp_basic import Exp_Basic -from transformer_models.models import FEDformer, Autoformer, Informer, Transformer -from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate, visual -from utils.metrics import metric +from exp.exp_basic import Exp_Basic +from transformer_models.models import Autoformer, FEDformer, Informer, Transformer +from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate warnings.filterwarnings('ignore') @@ -38,8 +37,7 @@ def _build_model(self): return model.to(self.device) def _select_optimizer(self): - model_optim = optim.Adam( - self.model.parameters(), lr=self.args.learning_rate) + model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): @@ -51,7 +49,9 @@ def vali(self, vali_data, vali_loader, criterion): self.model.eval() self.vali_timer.start_timer() with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + vali_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float() @@ -59,29 +59,34 @@ def vali(self, vali_data, vali_loader, criterion): batch_y_mark = batch_y_mark.float().to(self.device) # decoder input - dec_inp = torch.zeros_like( - batch_y[:, -self.args.pred_len:, :]).float() - dec_inp = torch.cat( - [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() + dec_inp = ( + torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) + .float() + .to(self.device) + ) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) else: if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device) pred = outputs.detach().cpu() true = batch_y.detach().cpu() @@ -106,11 +111,10 @@ def train(self, setting): else: raise NotImplementedError - time_now = time.time() + time.time() train_steps = len(train_loader) - early_stopping = EarlyStopping( - patience=self.args.patience, verbose=False) + early_stopping = EarlyStopping(patience=self.args.patience, verbose=False) model_optim = self._select_optimizer() criterion = self._select_criterion() @@ -126,7 +130,9 @@ def train(self, setting): self.model.train() epoch_time = time.time() - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + train_loader + ): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) @@ -136,37 +142,43 @@ def train(self, setting): batch_y_mark = batch_y_mark.float().to(self.device) # decoder input - dec_inp = torch.zeros_like( - batch_y[:, -self.args.pred_len:, :]).float() - dec_inp = torch.cat( - [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() + dec_inp = ( + torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) + .float() + .to(self.device) + ) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to( + self.device + ) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) else: if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) @@ -190,17 +202,19 @@ def train(self, setting): return self.model - print("Epoch: {} cost time: {}".format( - epoch + 1, time.time() - epoch_time)) + print('Epoch: {} cost time: {}'.format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) - print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( - epoch + 1, train_steps, train_loss, vali_loss, test_loss)) + print( + 'Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}'.format( + epoch + 1, train_steps, train_loss, vali_loss, test_loss + ) + ) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: - print("Early stopping") + print('Early stopping') break adjust_learning_rate(model_optim, epoch + 1, self.args) @@ -217,11 +231,18 @@ def test(self, setting, test=0): if test: print('loading model') if self.args.use_gpu: - self.model.load_state_dict(torch.load(os.path.join( - './checkpoints/' + setting, 'checkpoint.pth'))) + self.model.load_state_dict( + torch.load( + os.path.join('./checkpoints/' + setting, 'checkpoint.pth') + ) + ) else: - self.model.load_state_dict(torch.load(os.path.join( - './checkpoints/' + setting, 'checkpoint.pth'), map_location=torch.device('cpu'))) + self.model.load_state_dict( + torch.load( + os.path.join('./checkpoints/' + setting, 'checkpoint.pth'), + map_location=torch.device('cpu'), + ) + ) preds = [] trues = [] @@ -230,10 +251,11 @@ def test(self, setting, test=0): os.makedirs(folder_path) self.model.eval() - j = 0 self.test_timer.start_timer() with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + test_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) @@ -241,32 +263,37 @@ def test(self, setting, test=0): batch_y_mark = batch_y_mark.float().to(self.device) # decoder input - dec_inp = torch.zeros_like( - batch_y[:, -self.args.pred_len:, :]).float() - dec_inp = torch.cat( - [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() + dec_inp = ( + torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) + .float() + .to(self.device) + ) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) else: if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device) outputs = outputs.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() @@ -300,33 +327,41 @@ def predict(self, setting, load=False): self.model.eval() with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + pred_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float() batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input - dec_inp = torch.zeros_like( - batch_y[:, -self.args.pred_len:, :]).float() - dec_inp = torch.cat( - [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() + dec_inp = ( + torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) + .float() + .to(self.device) + ) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) else: if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) pred = outputs.detach().cpu().numpy() # .squeeze() preds.append(pred) diff --git a/benchmark/exp/exp_transformer_metalearn.py b/benchmark/exp/exp_transformer_metalearn.py index 6081d12..1a62e89 100644 --- a/benchmark/exp/exp_transformer_metalearn.py +++ b/benchmark/exp/exp_transformer_metalearn.py @@ -1,33 +1,32 @@ +import sys import time + import yaml -import sys + sys.path.append('/home/ubuntu/ForecastPFN/academic_comparison/') import os -import time import warnings +from typing import Dict + import numpy as np import pandas as pd import torch import torch.nn as nn -from tqdm import tqdm from torch import optim -from typing import Dict +from tqdm import tqdm + from data_provider.data_factory import data_provider from exp.exp_basic import Exp_Basic from exp.torch_utils import * -from transformer_models.models import FEDformer, Autoformer, Informer, Transformer -from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate, visual -from utils.metrics import metric +from transformer_models.models import Autoformer, FEDformer, Informer, Transformer +from utils.tools import EarlyStopping, TimeBudget, adjust_learning_rate sys.path.append('/home/ubuntu/ForecastPFN/src/') sys.path.append('/home/ubuntu/ForecastPFN/src/training/') -from training.create_train_test_df import create_train_test_df import tensorflow as tf - - from training.config_variables import Config -from training.constants import PADDING, HISTORY_LEN, TARGET_LEN, TRIM_LEN, TARGET_INDEX +from training.constants import HISTORY_LEN, PADDING from training.prepare_dataset import filter_unusable_points from training.utils import load_tf_dataset @@ -61,12 +60,9 @@ def _build_model(self): return model.to(self.device) def _get_data(self, flag): - - TARGET_LEN = self.args.label_len + self.args.pred_len TRIM_LEN = self.args.label_len + self.args.pred_len - TARGET_INDEX = 2*TRIM_LEN - + TARGET_INDEX = 2 * TRIM_LEN def compute_time_features(ts: np.ndarray): """ @@ -76,24 +72,26 @@ def compute_time_features(ts: np.ndarray): """ ts = pd.to_datetime(ts) if Config.is_sub_day: - return np.stack([ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) + return np.stack( + [ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], + axis=-1, + ) return np.stack([ts.month, ts.day, ts.day_of_week, ts.hour], axis=-1) - def build_frames(r: Dict[str, tf.Tensor]): raw_date_info = tf.numpy_function( - compute_time_features, inp=[r['ts']], Tout=tf.int64) + compute_time_features, inp=[r['ts']], Tout=tf.int64 + ) date_info = tf.signal.frame( - tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]), - HISTORY_LEN, - 1, - axis=0 + tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]), HISTORY_LEN, 1, axis=0 ) history = tf.signal.frame( - tf.pad(r['y'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1) + tf.pad(r['y'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1 + ) noise = tf.signal.frame( - tf.pad(r['noise'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1) + tf.pad(r['noise'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1 + ) target_dates = tf.signal.frame(raw_date_info, TARGET_LEN, 1, axis=0) target_values = tf.signal.frame(r['y'], TARGET_LEN, 1, axis=-1) @@ -107,7 +105,7 @@ def build_frames(r: Dict[str, tf.Tensor]): noise[-start_index:-TARGET_LEN], target_dates[TARGET_INDEX:], target_values[TARGET_INDEX:], - target_noise[TARGET_INDEX:] + target_noise[TARGET_INDEX:], ) @tf.function @@ -117,15 +115,14 @@ def gen_random_single_point( noise: tf.Tensor, target_dates: tf.Tensor, target_values: tf.Tensor, - target_noise: tf.Tensor + target_noise: tf.Tensor, ): - return dict( ts=date_info, - history=history*noise, + history=history * noise, noise=noise, target_ts=target_dates, - target_noise=target_noise + target_noise=target_noise, ), target_values @tf.function @@ -135,25 +132,24 @@ def gen_random_single_point_no_noise( noise: tf.Tensor, target_dates: tf.Tensor, target_values: tf.Tensor, - target_noise: tf.Tensor + target_noise: tf.Tensor, ): - return dict( ts=date_info, history=history, noise=noise, target_ts=target_dates, - target_noise=target_noise + target_noise=target_noise, ), target_values - def remove_noise(x, y): return ( { 'ts': x['ts'], 'history': x['history'], 'target_ts': x['target_ts'], - }, y + }, + y, ) def create_train_test_df(combined_ds, test_noise=False): @@ -166,9 +162,14 @@ def create_train_test_df(combined_ds, test_noise=False): base_train_df.map(func, num_parallel_calls=tf.data.AUTOTUNE) for func in task_map.values() ] - train_df = tf.data.Dataset.choose_from_datasets( - train_tasks_dfs, tf.data.Dataset.range(len(train_tasks_dfs)).repeat() - ).unbatch().filter(filter_unusable_points) + train_df = ( + tf.data.Dataset.choose_from_datasets( + train_tasks_dfs, + tf.data.Dataset.range(len(train_tasks_dfs)).repeat(), + ) + .unbatch() + .filter(filter_unusable_points) + ) task_map_test = { 'point': gen_random_single_point_no_noise, @@ -185,21 +186,24 @@ def create_train_test_df(combined_ds, test_noise=False): for func in task_map_test.values() ] - test_df = tf.data.Dataset.choose_from_datasets( - test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat() - ).unbatch().filter(filter_unusable_points) + test_df = ( + tf.data.Dataset.choose_from_datasets( + test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat() + ) + .unbatch() + .filter(filter_unusable_points) + ) test_df = test_df.map(remove_noise) return train_df, test_df - def get_combined_ds(config): - version = config["version"] + version = config['version'] datasets = [ # load_tf_dataset(config["prefix"] + f"{version}/minute.tfrecords"), # load_tf_dataset(config["prefix"] + f"{version}/hourly.tfrecords"), - load_tf_dataset(config["prefix"] + f"{version}/daily.tfrecords"), + load_tf_dataset(config['prefix'] + f'{version}/daily.tfrecords'), # load_tf_dataset(config["prefix"] + f"{version}/weekly.tfrecords"), # load_tf_dataset(config["prefix"] + f"{version}/monthly.tfrecords"), ] @@ -209,36 +213,36 @@ def get_combined_ds(config): return combined_ds - - if flag == 'test': data_set, data_loader = data_provider(self.args, flag) elif flag == 'train': - with open('/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml') as config_file: + with open( + '/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml' + ) as config_file: config = yaml.load(config_file, yaml.loader.SafeLoader) combined_ds = get_combined_ds(config) - train_df, vali_df = create_train_test_df( - combined_ds, config["test_noise"]) + train_df, vali_df = create_train_test_df(combined_ds, config['test_noise']) data_loader = TFRecordDataLoader( - train_df, self.args.batch_size, True, 10_000) + train_df, self.args.batch_size, True, 10_000 + ) data_set = None elif flag == 'val': - with open('/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml') as config_file: + with open( + '/home/ubuntu/ForecastPFN/src/training/config_mf_replicate_testnoiseF.yaml' + ) as config_file: config = yaml.load(config_file, yaml.loader.SafeLoader) combined_ds = get_combined_ds(config) - train_df, vali_df = create_train_test_df( - combined_ds, config["test_noise"]) + train_df, vali_df = create_train_test_df(combined_ds, config['test_noise']) data_set = None data_loader = TFRecordDataLoader( - vali_df, self.args.batch_size, True, 10_000) + vali_df, self.args.batch_size, True, 10_000 + ) return data_set, data_loader - def _select_optimizer(self): - model_optim = optim.Adam( - self.model.parameters(), lr=self.args.learning_rate) + model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): @@ -255,37 +259,41 @@ def vali(self, vali_data, vali_loader, criterion): X_batch = numpy_to_torch(batch_data[0], self.device) y_batch = torch.from_numpy(batch_data[1]).to(self.device) - batch_x = X_batch['history'].float().to( - self.device).unsqueeze(2) + batch_x = X_batch['history'].float().to(self.device).unsqueeze(2) batch_y = y_batch.float().to(self.device).unsqueeze(2) batch_x_mark = X_batch['ts'].float().to(self.device) batch_y_mark = X_batch['target_ts'].float().to(self.device) # decoder input - dec_inp = torch.zeros_like( - batch_y[:, -self.args.pred_len:, :]).float() - dec_inp = torch.cat( - [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() + dec_inp = ( + torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) + .float() + .to(self.device) + ) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) else: if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device) pred = outputs.detach().cpu() true = batch_y.detach().cpu() @@ -304,7 +312,9 @@ def test(self, vali_data, vali_loader, criterion): self.model.eval() self.vali_timer.start_timer() with torch.no_grad(): - for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate( + vali_loader + ): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float() @@ -312,29 +322,34 @@ def test(self, vali_data, vali_loader, criterion): batch_y_mark = batch_y_mark.float().to(self.device) # decoder input - dec_inp = torch.zeros_like( - batch_y[:, -self.args.pred_len:, :]).float() - dec_inp = torch.cat( - [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() + dec_inp = ( + torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) + .float() + .to(self.device) + ) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) else: if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device) pred = outputs.detach().cpu() true = batch_y.detach().cpu() @@ -346,7 +361,7 @@ def test(self, vali_data, vali_loader, criterion): self.model.train() self.vali_timer.end_timer() return total_loss - + def train(self, setting): print(setting) @@ -360,11 +375,10 @@ def train(self, setting): else: raise NotImplementedError - time_now = time.time() + time.time() train_steps = -1 - early_stopping = EarlyStopping( - patience=self.args.patience, verbose=False) + early_stopping = EarlyStopping(patience=self.args.patience, verbose=False) model_optim = self._select_optimizer() criterion = self._select_criterion() @@ -387,7 +401,7 @@ def train(self, setting): batch_x = X_batch['history'].float().to(self.device).unsqueeze(2) batch_y = y_batch.float().to(self.device).unsqueeze(2) - + batch_x_mark = X_batch['ts'].float().to(self.device) batch_y_mark = X_batch['target_ts'].float().to(self.device) @@ -395,37 +409,43 @@ def train(self, setting): model_optim.zero_grad() # decoder input - dec_inp = torch.zeros_like( - batch_y[:, -self.args.pred_len:, :]).float() - dec_inp = torch.cat( - [batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() + dec_inp = ( + torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) + .float() + .to(self.device) + ) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to( + self.device + ) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) else: if self.args.output_attention: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + batch_x, batch_x_mark, dec_inp, batch_y_mark + )[0] else: outputs = self.model( - batch_x, batch_x_mark, dec_inp, batch_y_mark) + batch_x, batch_x_mark, dec_inp, batch_y_mark + ) f_dim = -1 if self.args.features == 'MS' else 0 - batch_y = batch_y[:, -self.args.pred_len:, - f_dim:].to(self.device) + batch_y = batch_y[:, -self.args.pred_len :, f_dim:].to(self.device) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) @@ -448,18 +468,20 @@ def train(self, setting): self.model.load_state_dict(torch.load(best_model_path)) return self.model - + if batch_i >= 1_000: break - print("Epoch: {} cost time: {}".format( - epoch + 1, time.time() - epoch_time)) + print('Epoch: {} cost time: {}'.format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.test(test_data, test_loader, criterion) - print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( - epoch + 1, train_steps, train_loss, vali_loss, test_loss)) + print( + 'Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}'.format( + epoch + 1, train_steps, train_loss, vali_loss, test_loss + ) + ) early_stopping(vali_loss, self.model, path) # if early_stopping.early_stop: # print("Early stopping") @@ -473,4 +495,3 @@ def train(self, setting): self.model.load_state_dict(torch.load(best_model_path)) return self.model - diff --git a/benchmark/exp/torch_utils.py b/benchmark/exp/torch_utils.py index ca30a23..aae5e6a 100644 --- a/benchmark/exp/torch_utils.py +++ b/benchmark/exp/torch_utils.py @@ -1,18 +1,21 @@ -import torch import os from collections import OrderedDict from functools import partial + +import tensorflow as tf +import tensorflow_datasets as tfds +import torch from torch.nn import MSELoss from torch.optim import Adam -import tensorflow_datasets as tfds -import tensorflow as tf from tqdm import tqdm DEFAULT_LOSS = MSELoss() DEFAULT_OPTIMIZER = partial(Adam, lr=0.001) -load_dir = 'tensorboard/mf_replicate_testnoiseT_shuffle5Millilon.20230714-133237/models/51' +load_dir = ( + 'tensorboard/mf_replicate_testnoiseT_shuffle5Millilon.20230714-133237/models/51' +) def numpy_to_torch(X, device): @@ -63,12 +66,16 @@ def __next__(self): class AdditionalValidationSets: - def __init__(self, validation_sets, batch_size=1, metrics=[], loss=DEFAULT_LOSS, device=None): + def __init__( + self, validation_sets, batch_size=1, metrics=[], loss=DEFAULT_LOSS, device=None + ): self.validation_sets = [] for validation_set in validation_sets: if len(validation_set) not in [2]: raise ValueError() - self.validation_sets.append([tfds.as_numpy(validation_set[0]), validation_set[1]]) + self.validation_sets.append( + [tfds.as_numpy(validation_set[0]), validation_set[1]] + ) self.epoch = [] self.metrics = metrics self.loss = loss @@ -88,7 +95,16 @@ def on_epoch_end(self, model, epoch, tbCallback=None): validation_data, validation_set_name = validation_set else: raise ValueError() - results = add_metrics_to_log(model, validation_data, self.loss, self.metrics, tbCallback, f'add_valid/{validation_set_name}/', epoch, self.device) + results = add_metrics_to_log( + model, + validation_data, + self.loss, + self.metrics, + tbCallback, + f'add_valid/{validation_set_name}/', + epoch, + self.device, + ) log.update(results) self.logs[epoch] = log return log @@ -106,13 +122,17 @@ def predict(model, data, device, steps_per_epoch=None): y_batch_pred = model(X_batch) y_batch_pred, y_batch = model.transform_output(y_batch_pred, y_batch) y_true = y_batch if y_true is None else torch.concat([y_true, y_batch]) - y_pred = y_batch_pred if y_pred is None else torch.concat([y_pred, y_batch_pred]) + y_pred = ( + y_batch_pred if y_pred is None else torch.concat([y_pred, y_batch_pred]) + ) if steps_per_epoch is not None and batch_i >= steps_per_epoch: break return y_true, y_pred -def add_metrics_to_log(model, data, loss, metrics, writer, prefix, epoch, device, steps_per_epoch=None): +def add_metrics_to_log( + model, data, loss, metrics, writer, prefix, epoch, device, steps_per_epoch=None +): with torch.no_grad(): y_true, y_pred = predict(model, data, device, steps_per_epoch) y_true = y_true.reshape(-1) @@ -130,23 +150,25 @@ def add_metrics_to_log(model, data, loss, metrics, writer, prefix, epoch, device return log -def fit(model, - train_df, - batch_size=1024, - epochs=1, - verbose=1, - valid_df=None, - shuffle=0, - initial_epoch=0, - seed=None, - loss=DEFAULT_LOSS, - optimizer=DEFAULT_OPTIMIZER, - metrics=None, - writer=None, - device='cpu', - steps_per_epoch=None, - logdir=None, - additional_validation_sets=[]): +def fit( + model, + train_df, + batch_size=1024, + epochs=1, + verbose=1, + valid_df=None, + shuffle=0, + initial_epoch=0, + seed=None, + loss=DEFAULT_LOSS, + optimizer=DEFAULT_OPTIMIZER, + metrics=None, + writer=None, + device='cpu', + steps_per_epoch=None, + logdir=None, + additional_validation_sets=[], +): """Trains the model similar to Keras' .fit(...) method # Arguments @@ -183,7 +205,9 @@ def fit(model, # Build DataLoaders valid_data = TFRecordDataLoader(valid_df, batch_size) - additional_valid_data = AdditionalValidationSets(additional_validation_sets, metrics=metrics, loss=loss, device=device) + additional_valid_data = AdditionalValidationSets( + additional_validation_sets, metrics=metrics, loss=loss, device=device + ) # Compile optimizer opt = optimizer(model.parameters()) # load = torch.load(load_dir) @@ -192,11 +216,11 @@ def fit(model, # Run training loop logs = [] for t in tqdm(range(initial_epoch, epochs)): - logfile.write(f"Epoch: {t+1}\n") + logfile.write(f'Epoch: {t+1}\n') train_data = TFRecordDataLoader(train_df, batch_size, True, shuffle) model.train() if verbose and t % 10 == 0: - print("Epoch {0} / {1}".format(t + 1, epochs)) + print('Epoch {0} / {1}'.format(t + 1, epochs)) log = OrderedDict() epoch_loss = 0.0 # Run batches @@ -222,18 +246,27 @@ def fit(model, # train_metric_log = add_metrics_to_log(model, train_data, loss, metrics, writer, prefix='train/metrics/', epoch=t, device=device, steps_per_epoch=steps_per_epoch) # log.update(train_metric_log) if valid_data is not None: - val_metric_log = add_metrics_to_log(model, valid_data, loss, metrics, writer, prefix='valid/metrics/', epoch=t, device=device) + val_metric_log = add_metrics_to_log( + model, + valid_data, + loss, + metrics, + writer, + prefix='valid/metrics/', + epoch=t, + device=device, + ) log.update(val_metric_log) # Additional validation set if t % 10 == 0: add_log = additional_valid_data.on_epoch_end(model, t, writer) - logfile.write(str(add_log)+'\n') + logfile.write(str(add_log) + '\n') to_save = { - "model": model.state_dict(), - "optimizer": opt.state_dict(), + 'model': model.state_dict(), + 'optimizer': opt.state_dict(), } torch.save(to_save, logdir + f'/models/{t+1}') - logfile.write(str(log)+'\n') + logfile.write(str(log) + '\n') logfile.flush() logs.append(log) diff --git a/benchmark/layers/AutoCorrelation.py b/benchmark/layers/AutoCorrelation.py index 2fda13a..6fb6ec6 100644 --- a/benchmark/layers/AutoCorrelation.py +++ b/benchmark/layers/AutoCorrelation.py @@ -1,8 +1,8 @@ +import math import time + import torch import torch.nn as nn -import numpy as np -import math from torch.nn.functional import interpolate @@ -13,6 +13,7 @@ def func2(*args, **kw): t = time.time() - now print('call <{}>, time={}'.format(func.__name__, t)) return y + return func2 @@ -23,7 +24,16 @@ class AutoCorrelation(nn.Module): (2) time delay aggregation This block can replace the self-attention family mechanism seamlessly. """ - def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False, configs=None): + + def __init__( + self, + mask_flag=True, + factor=1, + scale=None, + attention_dropout=0.1, + output_attention=False, + configs=None, + ): super(AutoCorrelation, self).__init__() print('Autocorrelation used !') self.factor = factor @@ -55,8 +65,13 @@ def time_delay_agg_training(self, values, corr): delays_agg = torch.zeros_like(values).float() for i in range(top_k): pattern = torch.roll(tmp_values, -int(index[i]), -1) - delays_agg = delays_agg + pattern * \ - (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) + delays_agg = delays_agg + pattern * ( + tmp_corr[:, i] + .unsqueeze(1) + .unsqueeze(1) + .unsqueeze(1) + .repeat(1, head, channel, length) + ) return delays_agg # size=[B, H, d, S] def time_delay_agg_inference(self, values, corr): @@ -69,7 +84,14 @@ def time_delay_agg_inference(self, values, corr): channel = values.shape[2] length = values.shape[3] # index init - init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() + init_index = ( + torch.arange(length) + .unsqueeze(0) + .unsqueeze(0) + .unsqueeze(0) + .repeat(batch, head, channel, 1) + .cuda() + ) # find top k top_k = int(self.factor * math.log(length)) mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) @@ -81,10 +103,17 @@ def time_delay_agg_inference(self, values, corr): tmp_values = values.repeat(1, 1, 1, 2) delays_agg = torch.zeros_like(values).float() for i in range(top_k): - tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length) + tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze( + 1 + ).repeat(1, head, channel, length) pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) - delays_agg = delays_agg + pattern * \ - (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) + delays_agg = delays_agg + pattern * ( + tmp_corr[:, i] + .unsqueeze(1) + .unsqueeze(1) + .unsqueeze(1) + .repeat(1, head, channel, length) + ) return delays_agg def time_delay_agg_full(self, values, corr): @@ -96,7 +125,14 @@ def time_delay_agg_full(self, values, corr): channel = values.shape[2] length = values.shape[3] # index init - init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() + init_index = ( + torch.arange(length) + .unsqueeze(0) + .unsqueeze(0) + .unsqueeze(0) + .repeat(batch, head, channel, 1) + .cuda() + ) # find top k top_k = int(self.factor * math.log(length)) weights = torch.topk(corr, top_k, dim=-1)[0] @@ -116,7 +152,7 @@ def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape if L > S: - zeros = torch.zeros_like(queries[:, :(L - S), :]).float() + zeros = torch.zeros_like(queries[:, : (L - S), :]).float() values = torch.cat([values, zeros], dim=1) keys = torch.cat([keys, zeros], dim=1) else: @@ -138,20 +174,34 @@ def forward(self, queries, keys, values, attn_mask): for q, k, j in zip(qs, ks, j_list): q_list += [interpolate(q, scale_factor=j, mode='linear')[:, :, -L:]] k_list += [interpolate(k, scale_factor=j, mode='linear')[:, :, -L:]] - queries = torch.stack([i.reshape([B, H, E, L]) for i in q_list], dim=3).reshape([B, H, -1, L]).permute(0, 3, 1, 2) - keys = torch.stack([i.reshape([B, H, E, L]) for i in k_list], dim=3).reshape([B, H, -1, L]).permute(0, 3, 1, 2) + queries = ( + torch.stack([i.reshape([B, H, E, L]) for i in q_list], dim=3) + .reshape([B, H, -1, L]) + .permute(0, 3, 1, 2) + ) + keys = ( + torch.stack([i.reshape([B, H, E, L]) for i in k_list], dim=3) + .reshape([B, H, -1, L]) + .permute(0, 3, 1, 2) + ) else: pass - q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) # size=[B, H, E, L] + q_fft = torch.fft.rfft( + queries.permute(0, 2, 3, 1).contiguous(), dim=-1 + ) # size=[B, H, E, L] k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) res = q_fft * torch.conj(k_fft) - corr = torch.fft.irfft(res, dim=-1) # size=[B, H, E, L] + corr = torch.fft.irfft(res, dim=-1) # size=[B, H, E, L] # time delay agg if self.training: - V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) # [B, L, H, E], [B, H, E, L] -> [B, L, H, E] + V = self.time_delay_agg_training( + values.permute(0, 2, 3, 1).contiguous(), corr + ).permute(0, 3, 1, 2) # [B, L, H, E], [B, H, E, L] -> [B, L, H, E] else: - V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) + V = self.time_delay_agg_inference( + values.permute(0, 2, 3, 1).contiguous(), corr + ).permute(0, 3, 1, 2) else: V_list = [] queries = queries.reshape([B, L, -1]) @@ -172,12 +222,16 @@ def forward(self, queries, keys, values, attn_mask): res = q_fft * torch.conj(k_fft) corr = torch.fft.irfft(res, dim=-1) # [B, H, E, L] if self.training: - V = self.time_delay_agg_training(v.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) + V = self.time_delay_agg_training( + v.permute(0, 2, 3, 1).contiguous(), corr + ).permute(0, 3, 1, 2) else: - V = self.time_delay_agg_inference(v.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) + V = self.time_delay_agg_inference( + v.permute(0, 2, 3, 1).contiguous(), corr + ).permute(0, 3, 1, 2) V_list += [V] - Vl = V_list[-1].reshape([B, -1, H*E]).transpose(1, 2) - Vh_list = [i.reshape([B, -1, H*E]).transpose(1, 2) for i in V_list[:-1]] + Vl = V_list[-1].reshape([B, -1, H * E]).transpose(1, 2) + Vh_list = [i.reshape([B, -1, H * E]).transpose(1, 2) for i in V_list[:-1]] V = self.dwt1div((Vl, Vh_list)).reshape([B, H, E, -1]).permute(0, 3, 1, 2) # corr = self.dwt1div((V_list[-1], V_list[:-1])) @@ -188,8 +242,7 @@ def forward(self, queries, keys, values, attn_mask): class AutoCorrelationLayer(nn.Module): - def __init__(self, correlation, d_model, n_heads, d_keys=None, - d_values=None): + def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None): super(AutoCorrelationLayer, self).__init__() d_keys = d_keys or (d_model // n_heads) @@ -211,12 +264,7 @@ def forward(self, queries, keys, values, attn_mask): keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) - out, attn = self.inner_correlation( - queries, - keys, - values, - attn_mask - ) + out, attn = self.inner_correlation(queries, keys, values, attn_mask) out = out.view(B, L, -1) - return self.out_projection(out), attn \ No newline at end of file + return self.out_projection(out), attn diff --git a/benchmark/layers/Autoformer_EncDec.py b/benchmark/layers/Autoformer_EncDec.py index 5bcae4e..d97c916 100644 --- a/benchmark/layers/Autoformer_EncDec.py +++ b/benchmark/layers/Autoformer_EncDec.py @@ -1,14 +1,15 @@ +import math + import torch import torch.nn as nn import torch.nn.functional as F -import math -from layers.SelfAttention_Family import FullAttention class my_Layernorm(nn.Module): """ Special designed layernorm for the seasonal part """ + def __init__(self, channels): super(my_Layernorm, self).__init__() self.layernorm = nn.LayerNorm(channels) @@ -23,6 +24,7 @@ class moving_avg(nn.Module): """ Moving average block to highlight the trend of time series """ + def __init__(self, kernel_size, stride): super(moving_avg, self).__init__() if type(kernel_size) == list: @@ -32,7 +34,9 @@ def __init__(self, kernel_size, stride): def forward(self, x): # padding on the both ends of time series - front = x[:, 0:1, :].repeat(1, self.kernel_size - 1-math.floor((self.kernel_size - 1) // 2), 1) + front = x[:, 0:1, :].repeat( + 1, self.kernel_size - 1 - math.floor((self.kernel_size - 1) // 2), 1 + ) end = x[:, -1:, :].repeat(1, math.floor((self.kernel_size - 1) // 2), 1) x = torch.cat([front, x, end], dim=1) x = self.avg(x.permute(0, 2, 1)) @@ -44,6 +48,7 @@ class series_decomp(nn.Module): """ Series decomposition block """ + def __init__(self, kernel_size): super(series_decomp, self).__init__() self.moving_avg = moving_avg(kernel_size, stride=1) @@ -58,20 +63,23 @@ class series_decomp_multi(nn.Module): """ Series decomposition block """ + def __init__(self, kernel_size): super(series_decomp_multi, self).__init__() self.moving_avg = [moving_avg(kernel, stride=1) for kernel in kernel_size] self.layer = torch.nn.Linear(1, len(kernel_size)) def forward(self, x): - moving_mean=[] + moving_mean = [] for func in self.moving_avg: moving_avg = func(x) moving_mean.append(moving_avg.unsqueeze(-1)) - moving_mean=torch.cat(moving_mean,dim=-1) - moving_mean = torch.sum(moving_mean*nn.Softmax(-1)(self.layer(x.unsqueeze(-1))),dim=-1) + moving_mean = torch.cat(moving_mean, dim=-1) + moving_mean = torch.sum( + moving_mean * nn.Softmax(-1)(self.layer(x.unsqueeze(-1))), dim=-1 + ) res = x - moving_mean - return res, moving_mean + return res, moving_mean class FourierDecomp(nn.Module): @@ -80,19 +88,32 @@ def __init__(self): pass def forward(self, x): - x_ft = torch.fft.rfft(x, dim=-1) + torch.fft.rfft(x, dim=-1) class EncoderLayer(nn.Module): """ Autoformer encoder layer with the progressive decomposition architecture """ - def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): + + def __init__( + self, + attention, + d_model, + d_ff=None, + moving_avg=25, + dropout=0.1, + activation='relu', + ): super(EncoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.attention = attention - self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) - self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) + self.conv1 = nn.Conv1d( + in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False + ) + self.conv2 = nn.Conv1d( + in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False + ) if isinstance(moving_avg, list): self.decomp1 = series_decomp_multi(moving_avg) @@ -102,13 +123,10 @@ def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, ac self.decomp2 = series_decomp(moving_avg) self.dropout = nn.Dropout(dropout) - self.activation = F.relu if activation == "relu" else F.gelu + self.activation = F.relu if activation == 'relu' else F.gelu def forward(self, x, attn_mask=None): - new_x, attn = self.attention( - x, x, x, - attn_mask=attn_mask - ) + new_x, attn = self.attention(x, x, x, attn_mask=attn_mask) x = x + self.dropout(new_x) x, _ = self.decomp1(x) y = x @@ -122,10 +140,13 @@ class Encoder(nn.Module): """ Autoformer encoder """ + def __init__(self, attn_layers, conv_layers=None, norm_layer=None): super(Encoder, self).__init__() self.attn_layers = nn.ModuleList(attn_layers) - self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None + self.conv_layers = ( + nn.ModuleList(conv_layers) if conv_layers is not None else None + ) self.norm = norm_layer def forward(self, x, attn_mask=None): @@ -152,14 +173,28 @@ class DecoderLayer(nn.Module): """ Autoformer decoder layer with the progressive decomposition architecture """ - def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, - moving_avg=25, dropout=0.1, activation="relu"): + + def __init__( + self, + self_attention, + cross_attention, + d_model, + c_out, + d_ff=None, + moving_avg=25, + dropout=0.1, + activation='relu', + ): super(DecoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.self_attention = self_attention self.cross_attention = cross_attention - self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) - self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) + self.conv1 = nn.Conv1d( + in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False + ) + self.conv2 = nn.Conv1d( + in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False + ) if isinstance(moving_avg, list): self.decomp1 = series_decomp_multi(moving_avg) @@ -171,21 +206,24 @@ def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, self.decomp3 = series_decomp(moving_avg) self.dropout = nn.Dropout(dropout) - self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1, - padding_mode='circular', bias=False) - self.activation = F.relu if activation == "relu" else F.gelu + self.projection = nn.Conv1d( + in_channels=d_model, + out_channels=c_out, + kernel_size=3, + stride=1, + padding=1, + padding_mode='circular', + bias=False, + ) + self.activation = F.relu if activation == 'relu' else F.gelu def forward(self, x, cross, x_mask=None, cross_mask=None): - x = x + self.dropout(self.self_attention( - x, x, x, - attn_mask=x_mask - )[0]) + x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0]) x, trend1 = self.decomp1(x) - x = x + self.dropout(self.cross_attention( - x, cross, cross, - attn_mask=cross_mask - )[0]) + x = x + self.dropout( + self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0] + ) x, trend2 = self.decomp2(x) y = x @@ -194,7 +232,9 @@ def forward(self, x, cross, x_mask=None, cross_mask=None): x, trend3 = self.decomp3(x + y) residual_trend = trend1 + trend2 + trend3 - residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2) + residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose( + 1, 2 + ) return x, residual_trend @@ -202,6 +242,7 @@ class Decoder(nn.Module): """ Autoformer encoder """ + def __init__(self, layers, norm_layer=None, projection=None): super(Decoder, self).__init__() self.layers = nn.ModuleList(layers) diff --git a/benchmark/layers/Embed.py b/benchmark/layers/Embed.py index 1cc5034..1921d66 100644 --- a/benchmark/layers/Embed.py +++ b/benchmark/layers/Embed.py @@ -1,8 +1,7 @@ +import math + import torch import torch.nn as nn -import torch.nn.functional as F -from torch.nn.utils import weight_norm -import math class PositionalEmbedding(nn.Module): @@ -13,7 +12,9 @@ def __init__(self, d_model, max_len=5000): pe.require_grad = False position = torch.arange(0, max_len).float().unsqueeze(1) - div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() + div_term = ( + torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model) + ).exp() pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) @@ -22,18 +23,26 @@ def __init__(self, d_model, max_len=5000): self.register_buffer('pe', pe) def forward(self, x): - return self.pe[:, :x.size(1)] + return self.pe[:, : x.size(1)] class TokenEmbedding(nn.Module): def __init__(self, c_in, d_model): super(TokenEmbedding, self).__init__() padding = 1 if torch.__version__ >= '1.5.0' else 2 - self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, - kernel_size=3, padding=padding, padding_mode='circular', bias=False) + self.tokenConv = nn.Conv1d( + in_channels=c_in, + out_channels=d_model, + kernel_size=3, + padding=padding, + padding_mode='circular', + bias=False, + ) for m in self.modules(): if isinstance(m, nn.Conv1d): - nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu') + nn.init.kaiming_normal_( + m.weight, mode='fan_in', nonlinearity='leaky_relu' + ) def forward(self, x): x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) @@ -48,7 +57,9 @@ def __init__(self, c_in, d_model): w.require_grad = False position = torch.arange(0, c_in).float().unsqueeze(1) - div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() + div_term = ( + torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model) + ).exp() w[:, 0::2] = torch.sin(position * div_term) w[:, 1::2] = torch.cos(position * div_term) @@ -81,7 +92,9 @@ def __init__(self, d_model, embed_type='fixed', freq='h'): def forward(self, x): x = x.long() - minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0. + minute_x = ( + self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0.0 + ) hour_x = self.hour_embed(x[:, :, 3]) weekday_x = self.weekday_embed(x[:, :, 2]) day_x = self.day_embed(x[:, :, 1]) @@ -108,15 +121,22 @@ def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) - self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, - freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( - d_model=d_model, embed_type=embed_type, freq=freq) + self.temporal_embedding = ( + TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) + if embed_type != 'timeF' + else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) + ) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): - x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x) + x = ( + self.value_embedding(x) + + self.temporal_embedding(x_mark) + + self.position_embedding(x) + ) return self.dropout(x) + class DataEmbedding_onlypos(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding_onlypos, self).__init__() @@ -128,16 +148,19 @@ def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): def forward(self, x, x_mark): x = self.value_embedding(x) + self.position_embedding(x) return self.dropout(x) - + + class DataEmbedding_wo_pos(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding_wo_pos, self).__init__() self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) - self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, - freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( - d_model=d_model, embed_type=embed_type, freq=freq) + self.temporal_embedding = ( + TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) + if embed_type != 'timeF' + else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) + ) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): diff --git a/benchmark/layers/FourierCorrelation.py b/benchmark/layers/FourierCorrelation.py index 12892cd..9567d55 100644 --- a/benchmark/layers/FourierCorrelation.py +++ b/benchmark/layers/FourierCorrelation.py @@ -13,7 +13,7 @@ def get_frequency_modes(seq_len, modes=64, mode_select_method='random'): 'random' means sampling randomly; 'else' means sampling the lowest modes; """ - modes = min(modes, seq_len//2) + modes = min(modes, seq_len // 2) if mode_select_method == 'random': index = list(range(0, seq_len // 2)) np.random.shuffle(index) @@ -26,7 +26,9 @@ def get_frequency_modes(seq_len, modes=64, mode_select_method='random'): # ########## fourier layer ############# class FourierBlock(nn.Module): - def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_method='random'): + def __init__( + self, in_channels, out_channels, seq_len, modes=0, mode_select_method='random' + ): super(FourierBlock, self).__init__() print('fourier enhanced block used!') """ @@ -34,17 +36,27 @@ def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_meth it does FFT, linear transform, and Inverse FFT. """ # get modes on frequency domain - self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method) + self.index = get_frequency_modes( + seq_len, modes=modes, mode_select_method=mode_select_method + ) print('modes={}, index={}'.format(modes, self.index)) - self.scale = (1 / (in_channels * out_channels)) + self.scale = 1 / (in_channels * out_channels) self.weights1 = nn.Parameter( - self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index), dtype=torch.cfloat)) + self.scale + * torch.rand( + 8, + in_channels // 8, + out_channels // 8, + len(self.index), + dtype=torch.cfloat, + ) + ) # Complex multiplication def compl_mul1d(self, input, weights): # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x) - return torch.einsum("bhi,hio->bho", input, weights) + return torch.einsum('bhi,hio->bho', input, weights) def forward(self, q, k, v, mask): # size = [B, L, H, E] @@ -55,7 +67,9 @@ def forward(self, q, k, v, mask): # Perform Fourier neural operations out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat) for wi, i in enumerate(self.index): - out_ft[:, :, :, wi] = self.compl_mul1d(x_ft[:, :, :, i], self.weights1[:, :, :, wi]) + out_ft[:, :, :, wi] = self.compl_mul1d( + x_ft[:, :, :, i], self.weights1[:, :, :, wi] + ) # Return to time domain x = torch.fft.irfft(out_ft, n=x.size(-1)) return (x, None) @@ -63,8 +77,17 @@ def forward(self, q, k, v, mask): # ########## Fourier Cross Former #################### class FourierCrossAttention(nn.Module): - def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random', - activation='tanh', policy=0): + def __init__( + self, + in_channels, + out_channels, + seq_len_q, + seq_len_kv, + modes=64, + mode_select_method='random', + activation='tanh', + policy=0, + ): super(FourierCrossAttention, self).__init__() print(' fourier enhanced cross attention used!') """ @@ -74,56 +97,72 @@ def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, m self.in_channels = in_channels self.out_channels = out_channels # get modes for queries and keys (& values) on frequency domain - self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method) - self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method) + self.index_q = get_frequency_modes( + seq_len_q, modes=modes, mode_select_method=mode_select_method + ) + self.index_kv = get_frequency_modes( + seq_len_kv, modes=modes, mode_select_method=mode_select_method + ) print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q)) print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv)) - self.scale = (1 / (in_channels * out_channels)) + self.scale = 1 / (in_channels * out_channels) self.weights1 = nn.Parameter( - self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index_q), dtype=torch.cfloat)) + self.scale + * torch.rand( + 8, + in_channels // 8, + out_channels // 8, + len(self.index_q), + dtype=torch.cfloat, + ) + ) # Complex multiplication def compl_mul1d(self, input, weights): # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x) - return torch.einsum("bhi,hio->bho", input, weights) + return torch.einsum('bhi,hio->bho', input, weights) def forward(self, q, k, v, mask): # size = [B, L, H, E] B, L, H, E = q.shape xq = q.permute(0, 2, 3, 1) # size = [B, H, E, L] xk = k.permute(0, 2, 3, 1) - xv = v.permute(0, 2, 3, 1) + v.permute(0, 2, 3, 1) # Compute Fourier coefficients - xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat) + xq_ft_ = torch.zeros( + B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat + ) xq_ft = torch.fft.rfft(xq, dim=-1) for i, j in enumerate(self.index_q): xq_ft_[:, :, :, i] = xq_ft[:, :, :, j] - xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat) + xk_ft_ = torch.zeros( + B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat + ) xk_ft = torch.fft.rfft(xk, dim=-1) for i, j in enumerate(self.index_kv): xk_ft_[:, :, :, i] = xk_ft[:, :, :, j] # perform attention mechanism on frequency domain - xqk_ft = (torch.einsum("bhex,bhey->bhxy", xq_ft_, xk_ft_)) + xqk_ft = torch.einsum('bhex,bhey->bhxy', xq_ft_, xk_ft_) if self.activation == 'tanh': xqk_ft = xqk_ft.tanh() elif self.activation == 'softmax': xqk_ft = torch.softmax(abs(xqk_ft), dim=-1) xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft)) else: - raise Exception('{} actiation function is not implemented'.format(self.activation)) - xqkv_ft = torch.einsum("bhxy,bhey->bhex", xqk_ft, xk_ft_) - xqkvw = torch.einsum("bhex,heox->bhox", xqkv_ft, self.weights1) + raise Exception( + '{} actiation function is not implemented'.format(self.activation) + ) + xqkv_ft = torch.einsum('bhxy,bhey->bhex', xqk_ft, xk_ft_) + xqkvw = torch.einsum('bhex,heox->bhox', xqkv_ft, self.weights1) out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat) for i, j in enumerate(self.index_q): out_ft[:, :, :, j] = xqkvw[:, :, :, i] # Return to time domain - out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)) + out = torch.fft.irfft( + out_ft / self.in_channels / self.out_channels, n=xq.size(-1) + ) return (out, None) - - - - diff --git a/benchmark/layers/MultiWaveletCorrelation.py b/benchmark/layers/MultiWaveletCorrelation.py index 5d8d7fc..71bfde2 100644 --- a/benchmark/layers/MultiWaveletCorrelation.py +++ b/benchmark/layers/MultiWaveletCorrelation.py @@ -1,20 +1,15 @@ -import torch +import math +from typing import List, Tuple + import numpy as np +import torch import torch.nn as nn import torch.nn.functional as F from torch import Tensor -from typing import List, Tuple -import math -from functools import partial -from einops import rearrange, reduce, repeat -from torch import nn, einsum, diagonal -from math import log2, ceil -import pdb -from utils.masking import LocalMask from layers.utils import get_filter -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') class MultiWaveletTransform(nn.Module): @@ -22,8 +17,17 @@ class MultiWaveletTransform(nn.Module): 1D multiwavelet block. """ - def __init__(self, ich=1, k=8, alpha=16, c=128, - nCZ=1, L=0, base='legendre', attention_dropout=0.1): + def __init__( + self, + ich=1, + k=8, + alpha=16, + c=128, + nCZ=1, + L=0, + base='legendre', + attention_dropout=0.1, + ): super(MultiWaveletTransform, self).__init__() print('base', base) self.k = k @@ -39,7 +43,7 @@ def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape if L > S: - zeros = torch.zeros_like(queries[:, :(L - S), :]).float() + zeros = torch.zeros_like(queries[:, : (L - S), :]).float() values = torch.cat([values, zeros], dim=1) keys = torch.cat([keys, zeros], dim=1) else: @@ -63,13 +67,23 @@ class MultiWaveletCross(nn.Module): 1D Multiwavelet Cross Attention layer. """ - def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes, c=64, - k=8, ich=512, - L=0, - base='legendre', - mode_select_method='random', - initializer=None, activation='tanh', - **kwargs): + def __init__( + self, + in_channels, + out_channels, + seq_len_q, + seq_len_kv, + modes, + c=64, + k=8, + ich=512, + L=0, + base='legendre', + mode_select_method='random', + initializer=None, + activation='tanh', + **kwargs, + ): super(MultiWaveletCross, self).__init__() print('base', base) @@ -88,28 +102,48 @@ def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes, c=64 G1r[np.abs(G1r) < 1e-8] = 0 self.max_item = 3 - self.attn1 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, - seq_len_kv=seq_len_kv, modes=modes, activation=activation, - mode_select_method=mode_select_method) - self.attn2 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, - seq_len_kv=seq_len_kv, modes=modes, activation=activation, - mode_select_method=mode_select_method) - self.attn3 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, - seq_len_kv=seq_len_kv, modes=modes, activation=activation, - mode_select_method=mode_select_method) - self.attn4 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, - seq_len_kv=seq_len_kv, modes=modes, activation=activation, - mode_select_method=mode_select_method) + self.attn1 = FourierCrossAttentionW( + in_channels=in_channels, + out_channels=out_channels, + seq_len_q=seq_len_q, + seq_len_kv=seq_len_kv, + modes=modes, + activation=activation, + mode_select_method=mode_select_method, + ) + self.attn2 = FourierCrossAttentionW( + in_channels=in_channels, + out_channels=out_channels, + seq_len_q=seq_len_q, + seq_len_kv=seq_len_kv, + modes=modes, + activation=activation, + mode_select_method=mode_select_method, + ) + self.attn3 = FourierCrossAttentionW( + in_channels=in_channels, + out_channels=out_channels, + seq_len_q=seq_len_q, + seq_len_kv=seq_len_kv, + modes=modes, + activation=activation, + mode_select_method=mode_select_method, + ) + self.attn4 = FourierCrossAttentionW( + in_channels=in_channels, + out_channels=out_channels, + seq_len_q=seq_len_q, + seq_len_kv=seq_len_kv, + modes=modes, + activation=activation, + mode_select_method=mode_select_method, + ) self.T0 = nn.Linear(k, k) - self.register_buffer('ec_s', torch.Tensor( - np.concatenate((H0.T, H1.T), axis=0))) - self.register_buffer('ec_d', torch.Tensor( - np.concatenate((G0.T, G1.T), axis=0))) + self.register_buffer('ec_s', torch.Tensor(np.concatenate((H0.T, H1.T), axis=0))) + self.register_buffer('ec_d', torch.Tensor(np.concatenate((G0.T, G1.T), axis=0))) - self.register_buffer('rc_e', torch.Tensor( - np.concatenate((H0r, G0r), axis=0))) - self.register_buffer('rc_o', torch.Tensor( - np.concatenate((H1r, G1r), axis=0))) + self.register_buffer('rc_e', torch.Tensor(np.concatenate((H0r, G0r), axis=0))) + self.register_buffer('rc_o', torch.Tensor(np.concatenate((H1r, G1r), axis=0))) self.Lk = nn.Linear(ich, c * k) self.Lq = nn.Linear(ich, c * k) @@ -132,7 +166,7 @@ def forward(self, q, k, v, mask=None): v = v.view(v.shape[0], v.shape[1], self.c, self.k) if N > S: - zeros = torch.zeros_like(q[:, :(N - S), :]).float() + zeros = torch.zeros_like(q[:, : (N - S), :]).float() v = torch.cat([v, zeros], dim=1) k = torch.cat([k, zeros], dim=1) else: @@ -141,9 +175,9 @@ def forward(self, q, k, v, mask=None): ns = math.floor(np.log2(N)) nl = pow(2, math.ceil(np.log2(N))) - extra_q = q[:, 0:nl - N, :, :] - extra_k = k[:, 0:nl - N, :, :] - extra_v = v[:, 0:nl - N, :, :] + extra_q = q[:, 0 : nl - N, :, :] + extra_k = k[:, 0 : nl - N, :, :] + extra_v = v[:, 0 : nl - N, :, :] q = torch.cat([q, extra_q], 1) k = torch.cat([k, extra_k], 1) v = torch.cat([v, extra_v], 1) @@ -177,7 +211,10 @@ def forward(self, q, k, v, mask=None): dk, sk = Ud_k[i], Us_k[i] dq, sq = Ud_q[i], Us_q[i] dv, sv = Ud_v[i], Us_v[i] - Ud += [self.attn1(dq[0], dk[0], dv[0], mask)[0] + self.attn2(dq[1], dk[1], dv[1], mask)[0]] + Ud += [ + self.attn1(dq[0], dk[0], dv[0], mask)[0] + + self.attn2(dq[1], dk[1], dv[1], mask)[0] + ] Us += [self.attn3(sq, sk, sv, mask)[0]] v = self.attn4(q, k, v, mask)[0] @@ -190,9 +227,13 @@ def forward(self, q, k, v, mask=None): return (v.contiguous(), None) def wavelet_transform(self, x): - xa = torch.cat([x[:, ::2, :, :], - x[:, 1::2, :, :], - ], -1) + xa = torch.cat( + [ + x[:, ::2, :, :], + x[:, 1::2, :, :], + ], + -1, + ) d = torch.matmul(xa, self.ec_d) s = torch.matmul(xa, self.ec_s) return d, s @@ -203,16 +244,23 @@ def evenOdd(self, x): x_e = torch.matmul(x, self.rc_e) x_o = torch.matmul(x, self.rc_o) - x = torch.zeros(B, N * 2, c, self.k, - device=x.device) + x = torch.zeros(B, N * 2, c, self.k, device=x.device) x[..., ::2, :, :] = x_e x[..., 1::2, :, :] = x_o return x class FourierCrossAttentionW(nn.Module): - def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=16, activation='tanh', - mode_select_method='random'): + def __init__( + self, + in_channels, + out_channels, + seq_len_q, + seq_len_kv, + modes=16, + activation='tanh', + mode_select_method='random', + ): super(FourierCrossAttentionW, self).__init__() print('corss fourier correlation used!') self.in_channels = in_channels @@ -230,52 +278,58 @@ def forward(self, q, k, v, mask): self.index_k_v = list(range(0, min(int(xv.shape[3] // 2), self.modes1))) # Compute Fourier coefficients - xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat) + xq_ft_ = torch.zeros( + B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat + ) xq_ft = torch.fft.rfft(xq, dim=-1) for i, j in enumerate(self.index_q): xq_ft_[:, :, :, i] = xq_ft[:, :, :, j] - xk_ft_ = torch.zeros(B, H, E, len(self.index_k_v), device=xq.device, dtype=torch.cfloat) + xk_ft_ = torch.zeros( + B, H, E, len(self.index_k_v), device=xq.device, dtype=torch.cfloat + ) xk_ft = torch.fft.rfft(xk, dim=-1) for i, j in enumerate(self.index_k_v): xk_ft_[:, :, :, i] = xk_ft[:, :, :, j] - xqk_ft = (torch.einsum("bhex,bhey->bhxy", xq_ft_, xk_ft_)) + xqk_ft = torch.einsum('bhex,bhey->bhxy', xq_ft_, xk_ft_) if self.activation == 'tanh': xqk_ft = xqk_ft.tanh() elif self.activation == 'softmax': xqk_ft = torch.softmax(abs(xqk_ft), dim=-1) xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft)) else: - raise Exception('{} actiation function is not implemented'.format(self.activation)) - xqkv_ft = torch.einsum("bhxy,bhey->bhex", xqk_ft, xk_ft_) + raise Exception( + '{} actiation function is not implemented'.format(self.activation) + ) + xqkv_ft = torch.einsum('bhxy,bhey->bhex', xqk_ft, xk_ft_) xqkvw = xqkv_ft out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat) for i, j in enumerate(self.index_q): out_ft[:, :, :, j] = xqkvw[:, :, :, i] - out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)).permute(0, 3, 2, 1) + out = torch.fft.irfft( + out_ft / self.in_channels / self.out_channels, n=xq.size(-1) + ).permute(0, 3, 2, 1) # size = [B, L, H, E] return (out, None) class sparseKernelFT1d(nn.Module): - def __init__(self, - k, alpha, c=1, - nl=1, - initializer=None, - **kwargs): + def __init__(self, k, alpha, c=1, nl=1, initializer=None, **kwargs): super(sparseKernelFT1d, self).__init__() self.modes1 = alpha - self.scale = (1 / (c * k * c * k)) - self.weights1 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.cfloat)) + self.scale = 1 / (c * k * c * k) + self.weights1 = nn.Parameter( + self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.cfloat) + ) self.weights1.requires_grad = True self.k = k def compl_mul1d(self, x, weights): # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x) - return torch.einsum("bix,iox->box", x, weights) + return torch.einsum('bix,iox->box', x, weights) def forward(self, x): B, N, c, k = x.shape # (B, N, c, k) @@ -295,12 +349,9 @@ def forward(self, x): # ## class MWT_CZ1d(nn.Module): - def __init__(self, - k=3, alpha=64, - L=0, c=1, - base='legendre', - initializer=None, - **kwargs): + def __init__( + self, k=3, alpha=64, L=0, c=1, base='legendre', initializer=None, **kwargs + ): super(MWT_CZ1d, self).__init__() self.k = k @@ -323,21 +374,17 @@ def __init__(self, self.T0 = nn.Linear(k, k) - self.register_buffer('ec_s', torch.Tensor( - np.concatenate((H0.T, H1.T), axis=0))) - self.register_buffer('ec_d', torch.Tensor( - np.concatenate((G0.T, G1.T), axis=0))) + self.register_buffer('ec_s', torch.Tensor(np.concatenate((H0.T, H1.T), axis=0))) + self.register_buffer('ec_d', torch.Tensor(np.concatenate((G0.T, G1.T), axis=0))) - self.register_buffer('rc_e', torch.Tensor( - np.concatenate((H0r, G0r), axis=0))) - self.register_buffer('rc_o', torch.Tensor( - np.concatenate((H1r, G1r), axis=0))) + self.register_buffer('rc_e', torch.Tensor(np.concatenate((H0r, G0r), axis=0))) + self.register_buffer('rc_o', torch.Tensor(np.concatenate((H1r, G1r), axis=0))) def forward(self, x): B, N, c, k = x.shape # (B, N, k) ns = math.floor(np.log2(N)) nl = pow(2, math.ceil(np.log2(N))) - extra_x = x[:, 0:nl - N, :, :] + extra_x = x[:, 0 : nl - N, :, :] x = torch.cat([x, extra_x], 1) Ud = torch.jit.annotate(List[Tensor], []) Us = torch.jit.annotate(List[Tensor], []) @@ -359,22 +406,24 @@ def forward(self, x): return x def wavelet_transform(self, x): - xa = torch.cat([x[:, ::2, :, :], - x[:, 1::2, :, :], - ], -1) + xa = torch.cat( + [ + x[:, ::2, :, :], + x[:, 1::2, :, :], + ], + -1, + ) d = torch.matmul(xa, self.ec_d) s = torch.matmul(xa, self.ec_s) return d, s def evenOdd(self, x): - B, N, c, ich = x.shape # (B, N, c, k) assert ich == 2 * self.k x_e = torch.matmul(x, self.rc_e) x_o = torch.matmul(x, self.rc_o) - x = torch.zeros(B, N * 2, c, self.k, - device=x.device) + x = torch.zeros(B, N * 2, c, self.k, device=x.device) x[..., ::2, :, :] = x_e x[..., 1::2, :, :] = x_o return x diff --git a/benchmark/layers/SelfAttention_Family.py b/benchmark/layers/SelfAttention_Family.py index c8138e2..8e1c820 100644 --- a/benchmark/layers/SelfAttention_Family.py +++ b/benchmark/layers/SelfAttention_Family.py @@ -1,18 +1,21 @@ +from math import sqrt + +import numpy as np import torch import torch.nn as nn -import torch.nn.functional as F - -import matplotlib.pyplot as plt -import numpy as np -import math -from math import sqrt -from utils.masking import TriangularCausalMask, ProbMask -import os +from utils.masking import ProbMask, TriangularCausalMask class FullAttention(nn.Module): - def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + def __init__( + self, + mask_flag=True, + factor=5, + scale=None, + attention_dropout=0.1, + output_attention=False, + ): super(FullAttention, self).__init__() self.scale = scale self.mask_flag = mask_flag @@ -22,9 +25,9 @@ def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape - scale = self.scale or 1. / sqrt(E) + scale = self.scale or 1.0 / sqrt(E) - scores = torch.einsum("blhe,bshe->bhls", queries, keys) + scores = torch.einsum('blhe,bshe->bhls', queries, keys) if self.mask_flag: if attn_mask is None: @@ -33,7 +36,7 @@ def forward(self, queries, keys, values, attn_mask): scores.masked_fill_(attn_mask.mask, -np.inf) A = self.dropout(torch.softmax(scale * scores, dim=-1)) - V = torch.einsum("bhls,bshd->blhd", A, values) + V = torch.einsum('bhls,bshd->blhd', A, values) if self.output_attention: return (V.contiguous(), A) @@ -42,7 +45,14 @@ def forward(self, queries, keys, values, attn_mask): class ProbAttention(nn.Module): - def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + def __init__( + self, + mask_flag=True, + factor=5, + scale=None, + attention_dropout=0.1, + output_attention=False, + ): super(ProbAttention, self).__init__() self.factor = factor self.scale = scale @@ -57,18 +67,20 @@ def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) # calculate the sampled Q_K K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) - index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q + index_sample = torch.randint( + L_K, (L_Q, sample_k) + ) # real U = U_part(factor*ln(L_k))*L_q K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() - # find the Top_k query with sparisty measurement + # find the Top_k query with sparsity measurement M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) M_top = M.topk(n_top, sorted=False)[1] # use the reduced Q to calculate Q_K - Q_reduce = Q[torch.arange(B)[:, None, None], - torch.arange(H)[None, :, None], - M_top, :] # factor*ln(L_q) + Q_reduce = Q[ + torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], M_top, : + ] # factor*ln(L_q) Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k return Q_K, M_top @@ -80,7 +92,7 @@ def _get_initial_context(self, V, L_Q): V_sum = V.mean(dim=-2) contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() else: # use mask - assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only + assert L_Q == L_V # requires that L_Q == L_V, i.e. for self-attention only contex = V.cumsum(dim=-2) return contex @@ -93,12 +105,14 @@ def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) - context_in[torch.arange(B)[:, None, None], - torch.arange(H)[None, :, None], - index, :] = torch.matmul(attn, V).type_as(context_in) + context_in[ + torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, : + ] = torch.matmul(attn, V).type_as(context_in) if self.output_attention: attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device) - attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn + attns[ + torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, : + ] = attn return (context_in, attns) else: return (context_in, None) @@ -120,20 +134,21 @@ def forward(self, queries, keys, values, attn_mask): scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u) # add scale factor - scale = self.scale or 1. / sqrt(D) + scale = self.scale or 1.0 / sqrt(D) if scale is not None: scores_top = scores_top * scale # get the context context = self._get_initial_context(values, L_Q) # update the context with selected top_k queries - context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask) + context, attn = self._update_context( + context, values, scores_top, index, L_Q, attn_mask + ) return context.contiguous(), attn class AttentionLayer(nn.Module): - def __init__(self, attention, d_model, n_heads, d_keys=None, - d_values=None): + def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None): super(AttentionLayer, self).__init__() d_keys = d_keys or (d_model // n_heads) @@ -155,12 +170,7 @@ def forward(self, queries, keys, values, attn_mask): keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) - out, attn = self.inner_attention( - queries, - keys, - values, - attn_mask - ) + out, attn = self.inner_attention(queries, keys, values, attn_mask) out = out.view(B, L, -1) return self.out_projection(out), attn diff --git a/benchmark/layers/Transformer_EncDec.py b/benchmark/layers/Transformer_EncDec.py index c0c5789..2e614b5 100644 --- a/benchmark/layers/Transformer_EncDec.py +++ b/benchmark/layers/Transformer_EncDec.py @@ -1,4 +1,3 @@ -import torch import torch.nn as nn import torch.nn.functional as F @@ -6,11 +5,13 @@ class ConvLayer(nn.Module): def __init__(self, c_in): super(ConvLayer, self).__init__() - self.downConv = nn.Conv1d(in_channels=c_in, - out_channels=c_in, - kernel_size=3, - padding=2, - padding_mode='circular') + self.downConv = nn.Conv1d( + in_channels=c_in, + out_channels=c_in, + kernel_size=3, + padding=2, + padding_mode='circular', + ) self.norm = nn.BatchNorm1d(c_in) self.activation = nn.ELU() self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) @@ -25,7 +26,7 @@ def forward(self, x): class EncoderLayer(nn.Module): - def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): + def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation='relu'): super(EncoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.attention = attention @@ -34,13 +35,10 @@ def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu" self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) - self.activation = F.relu if activation == "relu" else F.gelu + self.activation = F.relu if activation == 'relu' else F.gelu def forward(self, x, attn_mask=None): - new_x, attn = self.attention( - x, x, x, - attn_mask=attn_mask - ) + new_x, attn = self.attention(x, x, x, attn_mask=attn_mask) x = x + self.dropout(new_x) y = x = self.norm1(x) @@ -54,7 +52,9 @@ class Encoder(nn.Module): def __init__(self, attn_layers, conv_layers=None, norm_layer=None): super(Encoder, self).__init__() self.attn_layers = nn.ModuleList(attn_layers) - self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None + self.conv_layers = ( + nn.ModuleList(conv_layers) if conv_layers is not None else None + ) self.norm = norm_layer def forward(self, x, attn_mask=None): @@ -79,8 +79,15 @@ def forward(self, x, attn_mask=None): class DecoderLayer(nn.Module): - def __init__(self, self_attention, cross_attention, d_model, d_ff=None, - dropout=0.1, activation="relu"): + def __init__( + self, + self_attention, + cross_attention, + d_model, + d_ff=None, + dropout=0.1, + activation='relu', + ): super(DecoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.self_attention = self_attention @@ -91,19 +98,15 @@ def __init__(self, self_attention, cross_attention, d_model, d_ff=None, self.norm2 = nn.LayerNorm(d_model) self.norm3 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) - self.activation = F.relu if activation == "relu" else F.gelu + self.activation = F.relu if activation == 'relu' else F.gelu def forward(self, x, cross, x_mask=None, cross_mask=None): - x = x + self.dropout(self.self_attention( - x, x, x, - attn_mask=x_mask - )[0]) + x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0]) x = self.norm1(x) - x = x + self.dropout(self.cross_attention( - x, cross, cross, - attn_mask=cross_mask - )[0]) + x = x + self.dropout( + self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0] + ) y = x = self.norm2(x) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) diff --git a/benchmark/layers/utils.py b/benchmark/layers/utils.py index abad383..4c8673c 100644 --- a/benchmark/layers/utils.py +++ b/benchmark/layers/utils.py @@ -1,100 +1,130 @@ -import torch -import torch.nn as nn - -import numpy as np from functools import partial +import numpy as np +import torch +import torch.nn as nn from scipy.special import eval_legendre -from sympy import Poly, legendre, Symbol, chebyshevt +from sympy import Poly, Symbol, chebyshevt, legendre def legendreDer(k, x): def _legendre(k, x): - return (2*k+1) * eval_legendre(k, x) + return (2 * k + 1) * eval_legendre(k, x) + out = 0 - for i in np.arange(k-1,-1,-2): + for i in np.arange(k - 1, -1, -2): out += _legendre(i, x) return out -def phi_(phi_c, x, lb = 0, ub = 1): - mask = np.logical_or(xub) * 1.0 - return np.polynomial.polynomial.Polynomial(phi_c)(x) * (1-mask) +def phi_(phi_c, x, lb=0, ub=1): + mask = np.logical_or(x < lb, x > ub) * 1.0 + return np.polynomial.polynomial.Polynomial(phi_c)(x) * (1 - mask) def get_phi_psi(k, base): - x = Symbol('x') - phi_coeff = np.zeros((k,k)) - phi_2x_coeff = np.zeros((k,k)) + phi_coeff = np.zeros((k, k)) + phi_2x_coeff = np.zeros((k, k)) if base == 'legendre': for ki in range(k): - coeff_ = Poly(legendre(ki, 2*x-1), x).all_coeffs() - phi_coeff[ki,:ki+1] = np.flip(np.sqrt(2*ki+1) * np.array(coeff_).astype(np.float64)) - coeff_ = Poly(legendre(ki, 4*x-1), x).all_coeffs() - phi_2x_coeff[ki,:ki+1] = np.flip(np.sqrt(2) * np.sqrt(2*ki+1) * np.array(coeff_).astype(np.float64)) - + coeff_ = Poly(legendre(ki, 2 * x - 1), x).all_coeffs() + phi_coeff[ki, : ki + 1] = np.flip( + np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64) + ) + coeff_ = Poly(legendre(ki, 4 * x - 1), x).all_coeffs() + phi_2x_coeff[ki, : ki + 1] = np.flip( + np.sqrt(2) * np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64) + ) + psi1_coeff = np.zeros((k, k)) psi2_coeff = np.zeros((k, k)) for ki in range(k): - psi1_coeff[ki,:] = phi_2x_coeff[ki,:] + psi1_coeff[ki, :] = phi_2x_coeff[ki, :] for i in range(k): - a = phi_2x_coeff[ki,:ki+1] - b = phi_coeff[i, :i+1] + a = phi_2x_coeff[ki, : ki + 1] + b = phi_coeff[i, : i + 1] prod_ = np.convolve(a, b) - prod_[np.abs(prod_)<1e-8] = 0 - proj_ = (prod_ * 1/(np.arange(len(prod_))+1) * np.power(0.5, 1+np.arange(len(prod_)))).sum() - psi1_coeff[ki,:] -= proj_ * phi_coeff[i,:] - psi2_coeff[ki,:] -= proj_ * phi_coeff[i,:] + prod_[np.abs(prod_) < 1e-8] = 0 + proj_ = ( + prod_ + * 1 + / (np.arange(len(prod_)) + 1) + * np.power(0.5, 1 + np.arange(len(prod_))) + ).sum() + psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :] + psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :] for j in range(ki): - a = phi_2x_coeff[ki,:ki+1] + a = phi_2x_coeff[ki, : ki + 1] b = psi1_coeff[j, :] prod_ = np.convolve(a, b) - prod_[np.abs(prod_)<1e-8] = 0 - proj_ = (prod_ * 1/(np.arange(len(prod_))+1) * np.power(0.5, 1+np.arange(len(prod_)))).sum() - psi1_coeff[ki,:] -= proj_ * psi1_coeff[j,:] - psi2_coeff[ki,:] -= proj_ * psi2_coeff[j,:] - - a = psi1_coeff[ki,:] + prod_[np.abs(prod_) < 1e-8] = 0 + proj_ = ( + prod_ + * 1 + / (np.arange(len(prod_)) + 1) + * np.power(0.5, 1 + np.arange(len(prod_))) + ).sum() + psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :] + psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :] + + a = psi1_coeff[ki, :] prod_ = np.convolve(a, a) - prod_[np.abs(prod_)<1e-8] = 0 - norm1 = (prod_ * 1/(np.arange(len(prod_))+1) * np.power(0.5, 1+np.arange(len(prod_)))).sum() - - a = psi2_coeff[ki,:] + prod_[np.abs(prod_) < 1e-8] = 0 + norm1 = ( + prod_ + * 1 + / (np.arange(len(prod_)) + 1) + * np.power(0.5, 1 + np.arange(len(prod_))) + ).sum() + + a = psi2_coeff[ki, :] prod_ = np.convolve(a, a) - prod_[np.abs(prod_)<1e-8] = 0 - norm2 = (prod_ * 1/(np.arange(len(prod_))+1) * (1-np.power(0.5, 1+np.arange(len(prod_))))).sum() + prod_[np.abs(prod_) < 1e-8] = 0 + norm2 = ( + prod_ + * 1 + / (np.arange(len(prod_)) + 1) + * (1 - np.power(0.5, 1 + np.arange(len(prod_)))) + ).sum() norm_ = np.sqrt(norm1 + norm2) - psi1_coeff[ki,:] /= norm_ - psi2_coeff[ki,:] /= norm_ - psi1_coeff[np.abs(psi1_coeff)<1e-8] = 0 - psi2_coeff[np.abs(psi2_coeff)<1e-8] = 0 - - phi = [np.poly1d(np.flip(phi_coeff[i,:])) for i in range(k)] - psi1 = [np.poly1d(np.flip(psi1_coeff[i,:])) for i in range(k)] - psi2 = [np.poly1d(np.flip(psi2_coeff[i,:])) for i in range(k)] - + psi1_coeff[ki, :] /= norm_ + psi2_coeff[ki, :] /= norm_ + psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0 + psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0 + + phi = [np.poly1d(np.flip(phi_coeff[i, :])) for i in range(k)] + psi1 = [np.poly1d(np.flip(psi1_coeff[i, :])) for i in range(k)] + psi2 = [np.poly1d(np.flip(psi2_coeff[i, :])) for i in range(k)] + elif base == 'chebyshev': for ki in range(k): if ki == 0: - phi_coeff[ki,:ki+1] = np.sqrt(2/np.pi) - phi_2x_coeff[ki,:ki+1] = np.sqrt(2/np.pi) * np.sqrt(2) + phi_coeff[ki, : ki + 1] = np.sqrt(2 / np.pi) + phi_2x_coeff[ki, : ki + 1] = np.sqrt(2 / np.pi) * np.sqrt(2) else: - coeff_ = Poly(chebyshevt(ki, 2*x-1), x).all_coeffs() - phi_coeff[ki,:ki+1] = np.flip(2/np.sqrt(np.pi) * np.array(coeff_).astype(np.float64)) - coeff_ = Poly(chebyshevt(ki, 4*x-1), x).all_coeffs() - phi_2x_coeff[ki,:ki+1] = np.flip(np.sqrt(2) * 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64)) - - phi = [partial(phi_, phi_coeff[i,:]) for i in range(k)] - + coeff_ = Poly(chebyshevt(ki, 2 * x - 1), x).all_coeffs() + phi_coeff[ki, : ki + 1] = np.flip( + 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64) + ) + coeff_ = Poly(chebyshevt(ki, 4 * x - 1), x).all_coeffs() + phi_2x_coeff[ki, : ki + 1] = np.flip( + np.sqrt(2) + * 2 + / np.sqrt(np.pi) + * np.array(coeff_).astype(np.float64) + ) + + phi = [partial(phi_, phi_coeff[i, :]) for i in range(k)] + x = Symbol('x') - kUse = 2*k - roots = Poly(chebyshevt(kUse, 2*x-1)).all_roots() + kUse = 2 * k + roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots() x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64) # x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1) # not needed for our purpose here, we use even k always to avoid wm = np.pi / kUse / 2 - + psi1_coeff = np.zeros((k, k)) psi2_coeff = np.zeros((k, k)) @@ -102,71 +132,82 @@ def get_phi_psi(k, base): psi2 = [[] for _ in range(k)] for ki in range(k): - psi1_coeff[ki,:] = phi_2x_coeff[ki,:] + psi1_coeff[ki, :] = phi_2x_coeff[ki, :] for i in range(k): - proj_ = (wm * phi[i](x_m) * np.sqrt(2)* phi[ki](2*x_m)).sum() - psi1_coeff[ki,:] -= proj_ * phi_coeff[i,:] - psi2_coeff[ki,:] -= proj_ * phi_coeff[i,:] + proj_ = (wm * phi[i](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum() + psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :] + psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :] for j in range(ki): - proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2*x_m)).sum() - psi1_coeff[ki,:] -= proj_ * psi1_coeff[j,:] - psi2_coeff[ki,:] -= proj_ * psi2_coeff[j,:] + proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum() + psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :] + psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :] - psi1[ki] = partial(phi_, psi1_coeff[ki,:], lb = 0, ub = 0.5) - psi2[ki] = partial(phi_, psi2_coeff[ki,:], lb = 0.5, ub = 1) + psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5) + psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5, ub=1) norm1 = (wm * psi1[ki](x_m) * psi1[ki](x_m)).sum() norm2 = (wm * psi2[ki](x_m) * psi2[ki](x_m)).sum() norm_ = np.sqrt(norm1 + norm2) - psi1_coeff[ki,:] /= norm_ - psi2_coeff[ki,:] /= norm_ - psi1_coeff[np.abs(psi1_coeff)<1e-8] = 0 - psi2_coeff[np.abs(psi2_coeff)<1e-8] = 0 - - psi1[ki] = partial(phi_, psi1_coeff[ki,:], lb = 0, ub = 0.5+1e-16) - psi2[ki] = partial(phi_, psi2_coeff[ki,:], lb = 0.5+1e-16, ub = 1) - + psi1_coeff[ki, :] /= norm_ + psi2_coeff[ki, :] /= norm_ + psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0 + psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0 + + psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5 + 1e-16) + psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5 + 1e-16, ub=1) + return phi, psi1, psi2 def get_filter(base, k): - def psi(psi1, psi2, i, inp): - mask = (inp<=0.5) * 1.0 - return psi1[i](inp) * mask + psi2[i](inp) * (1-mask) - + mask = (inp <= 0.5) * 1.0 + return psi1[i](inp) * mask + psi2[i](inp) * (1 - mask) + if base not in ['legendre', 'chebyshev']: raise Exception('Base not supported') - + x = Symbol('x') - H0 = np.zeros((k,k)) - H1 = np.zeros((k,k)) - G0 = np.zeros((k,k)) - G1 = np.zeros((k,k)) - PHI0 = np.zeros((k,k)) - PHI1 = np.zeros((k,k)) + H0 = np.zeros((k, k)) + H1 = np.zeros((k, k)) + G0 = np.zeros((k, k)) + G1 = np.zeros((k, k)) + PHI0 = np.zeros((k, k)) + PHI1 = np.zeros((k, k)) phi, psi1, psi2 = get_phi_psi(k, base) if base == 'legendre': - roots = Poly(legendre(k, 2*x-1)).all_roots() + roots = Poly(legendre(k, 2 * x - 1)).all_roots() x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64) - wm = 1/k/legendreDer(k,2*x_m-1)/eval_legendre(k-1,2*x_m-1) - + wm = 1 / k / legendreDer(k, 2 * x_m - 1) / eval_legendre(k - 1, 2 * x_m - 1) + for ki in range(k): for kpi in range(k): - H0[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki](x_m/2) * phi[kpi](x_m)).sum() - G0[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m/2) * phi[kpi](x_m)).sum() - H1[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki]((x_m+1)/2) * phi[kpi](x_m)).sum() - G1[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m+1)/2) * phi[kpi](x_m)).sum() - + H0[ki, kpi] = ( + 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum() + ) + G0[ki, kpi] = ( + 1 + / np.sqrt(2) + * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum() + ) + H1[ki, kpi] = ( + 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum() + ) + G1[ki, kpi] = ( + 1 + / np.sqrt(2) + * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum() + ) + PHI0 = np.eye(k) PHI1 = np.eye(k) - + elif base == 'chebyshev': x = Symbol('x') - kUse = 2*k - roots = Poly(chebyshevt(kUse, 2*x-1)).all_roots() + kUse = 2 * k + roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots() x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64) # x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1) # not needed for our purpose here, we use even k always to avoid @@ -174,72 +215,97 @@ def psi(psi1, psi2, i, inp): for ki in range(k): for kpi in range(k): - H0[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki](x_m/2) * phi[kpi](x_m)).sum() - G0[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m/2) * phi[kpi](x_m)).sum() - H1[ki, kpi] = 1/np.sqrt(2) * (wm * phi[ki]((x_m+1)/2) * phi[kpi](x_m)).sum() - G1[ki, kpi] = 1/np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m+1)/2) * phi[kpi](x_m)).sum() - - PHI0[ki, kpi] = (wm * phi[ki](2*x_m) * phi[kpi](2*x_m)).sum() * 2 - PHI1[ki, kpi] = (wm * phi[ki](2*x_m-1) * phi[kpi](2*x_m-1)).sum() * 2 - - PHI0[np.abs(PHI0)<1e-8] = 0 - PHI1[np.abs(PHI1)<1e-8] = 0 - - H0[np.abs(H0)<1e-8] = 0 - H1[np.abs(H1)<1e-8] = 0 - G0[np.abs(G0)<1e-8] = 0 - G1[np.abs(G1)<1e-8] = 0 - + H0[ki, kpi] = ( + 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum() + ) + G0[ki, kpi] = ( + 1 + / np.sqrt(2) + * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum() + ) + H1[ki, kpi] = ( + 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum() + ) + G1[ki, kpi] = ( + 1 + / np.sqrt(2) + * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum() + ) + + PHI0[ki, kpi] = (wm * phi[ki](2 * x_m) * phi[kpi](2 * x_m)).sum() * 2 + PHI1[ki, kpi] = ( + wm * phi[ki](2 * x_m - 1) * phi[kpi](2 * x_m - 1) + ).sum() * 2 + + PHI0[np.abs(PHI0) < 1e-8] = 0 + PHI1[np.abs(PHI1) < 1e-8] = 0 + + H0[np.abs(H0) < 1e-8] = 0 + H1[np.abs(H1) < 1e-8] = 0 + G0[np.abs(G0) < 1e-8] = 0 + G1[np.abs(G1) < 1e-8] = 0 + return H0, H1, G0, G1, PHI0, PHI1 -def train(model, train_loader, optimizer, epoch, device, verbose = 0, - lossFn = None, lr_schedule=None, - post_proc = lambda args: args): - +def train( + model, + train_loader, + optimizer, + epoch, + device, + verbose=0, + lossFn=None, + lr_schedule=None, + post_proc=lambda args: args, +): if lossFn is None: lossFn = nn.MSELoss() model.train() - - total_loss = 0. + + total_loss = 0.0 for batch_idx, (data, target) in enumerate(train_loader): - bs = len(data) data, target = data.to(device), target.to(device) optimizer.zero_grad() - + output = model(data) - + target = post_proc(target) output = post_proc(output) loss = lossFn(output.view(bs, -1), target.view(bs, -1)) - + loss.backward() optimizer.step() total_loss += loss.sum().item() - if lr_schedule is not None: lr_schedule.step() - - if verbose>0: - print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( - epoch, batch_idx * len(data), len(train_loader.dataset), - 100. * batch_idx / len(train_loader), loss.item())) - - return total_loss/len(train_loader.dataset) - - -def test(model, test_loader, device, verbose=0, lossFn=None, - post_proc = lambda args: args): - + if lr_schedule is not None: + lr_schedule.step() + + if verbose > 0: + print( + 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, + batch_idx * len(data), + len(train_loader.dataset), + 100.0 * batch_idx / len(train_loader), + loss.item(), + ) + ) + + return total_loss / len(train_loader.dataset) + + +def test( + model, test_loader, device, verbose=0, lossFn=None, post_proc=lambda args: args +): model.eval() if lossFn is None: lossFn = nn.MSELoss() - - - total_loss = 0. - predictions = [] - + + total_loss = 0.0 + with torch.no_grad(): for data, target in test_loader: bs = len(data) @@ -247,17 +313,18 @@ def test(model, test_loader, device, verbose=0, lossFn=None, data, target = data.to(device), target.to(device) output = model(data) output = post_proc(output) - + loss = lossFn(output.view(bs, -1), target.view(bs, -1)) total_loss += loss.sum().item() - - return total_loss/len(test_loader.dataset) + + return total_loss / len(test_loader.dataset) # Till EoF # taken from FNO paper: # https://github.com/zongyi-li/fourier_neural_operator + # normalization, pointwise gaussian class UnitGaussianNormalizer(object): def __init__(self, x, eps=0.00001): @@ -274,15 +341,15 @@ def encode(self, x): def decode(self, x, sample_idx=None): if sample_idx is None: - std = self.std + self.eps # n + std = self.std + self.eps # n mean = self.mean else: if len(self.mean.shape) == len(sample_idx[0].shape): std = self.std[sample_idx] + self.eps # batch*n mean = self.mean[sample_idx] if len(self.mean.shape) > len(sample_idx[0].shape): - std = self.std[:,sample_idx]+ self.eps # T*batch*n - mean = self.mean[:,sample_idx] + std = self.std[:, sample_idx] + self.eps # T*batch*n + mean = self.mean[:, sample_idx] # x is in shape of batch*n or T*batch*n x = (x * std) + mean @@ -330,28 +397,29 @@ def __init__(self, x, low=0.0, high=1.0): mymin = torch.min(x, 0)[0].view(-1) mymax = torch.max(x, 0)[0].view(-1) - self.a = (high - low)/(mymax - mymin) - self.b = -self.a*mymax + high + self.a = (high - low) / (mymax - mymin) + self.b = -self.a * mymax + high def encode(self, x): s = x.size() x = x.view(s[0], -1) - x = self.a*x + self.b + x = self.a * x + self.b x = x.view(s) return x def decode(self, x): s = x.size() x = x.view(s[0], -1) - x = (x - self.b)/self.a + x = (x - self.b) / self.a x = x.view(s) return x - + + class LpLoss(object): def __init__(self, d=2, p=2, size_average=True, reduction=True): super(LpLoss, self).__init__() - #Dimension and Lp-norm type are postive + # Dimension and Lp-norm type are postive assert d > 0 and p > 0 self.d = d @@ -365,7 +433,9 @@ def abs(self, x, y): # Assume uniform mesh h = 1.0 / (x.size()[1] - 1.0) - all_norms = (h**(self.d/self.p))*torch.norm(x.view(num_examples,-1) - y.view(num_examples,-1), self.p, 1) + all_norms = (h ** (self.d / self.p)) * torch.norm( + x.view(num_examples, -1) - y.view(num_examples, -1), self.p, 1 + ) if self.reduction: if self.size_average: @@ -378,16 +448,18 @@ def abs(self, x, y): def rel(self, x, y): num_examples = x.size()[0] - diff_norms = torch.norm(x.reshape(num_examples,-1) - y.reshape(num_examples,-1), self.p, 1) - y_norms = torch.norm(y.reshape(num_examples,-1), self.p, 1) + diff_norms = torch.norm( + x.reshape(num_examples, -1) - y.reshape(num_examples, -1), self.p, 1 + ) + y_norms = torch.norm(y.reshape(num_examples, -1), self.p, 1) if self.reduction: if self.size_average: - return torch.mean(diff_norms/y_norms) + return torch.mean(diff_norms / y_norms) else: - return torch.sum(diff_norms/y_norms) + return torch.sum(diff_norms / y_norms) - return diff_norms/y_norms + return diff_norms / y_norms def __call__(self, x, y): - return self.rel(x, y) \ No newline at end of file + return self.rel(x, y) diff --git a/benchmark/metalearned/common/evaluator.py b/benchmark/metalearned/common/evaluator.py index 6ed209e..bb092b0 100644 --- a/benchmark/metalearned/common/evaluator.py +++ b/benchmark/metalearned/common/evaluator.py @@ -1,10 +1,13 @@ from dataclasses import dataclass + from common.timeseries import TimeseriesBundle + @dataclass class Evaluator: test_set: TimeseriesBundle + @dataclass class EvaluationResult: - test_set: TimeseriesBundle \ No newline at end of file + test_set: TimeseriesBundle diff --git a/benchmark/metalearned/common/experiment.py b/benchmark/metalearned/common/experiment.py index 21c28ff..80235d7 100644 --- a/benchmark/metalearned/common/experiment.py +++ b/benchmark/metalearned/common/experiment.py @@ -12,10 +12,12 @@ command_file_name = 'experiment.cmd' -def create_experiment(experiment_path: str, - parameters: Dict, - command: Callable[[str, Dict], str], - callback: Callable[[str, Dict], None] = lambda path, params: None) -> None: +def create_experiment( + experiment_path: str, + parameters: Dict, + command: Callable[[str, Dict], str], + callback: Callable[[str, Dict], None] = lambda path, params: None, +) -> None: """ Create experiment. If parameters contain keys with multiple values, then multiple sub-experiments will be created. @@ -40,8 +42,13 @@ def create_experiment(experiment_path: str, logging.info('Generating experiments ...') for variables_instance in tqdm(product(*experiment_variables)): sub_experiment_name = ','.join( - ['%s=%.4g' % (name, value) if isinstance(value, float) else '%s=%s' % (name, str(value).replace(' ', '_')) - for name, value in dict(variables_instance).items()]) + [ + '%s=%.4g' % (name, value) + if isinstance(value, float) + else '%s=%s' % (name, str(value).replace(' ', '_')) + for name, value in dict(variables_instance).items() + ] + ) sub_experiment_path = os.path.join(experiment_path, sub_experiment_name) Path(sub_experiment_path).mkdir(parents=True, exist_ok=False) @@ -51,7 +58,9 @@ def create_experiment(experiment_path: str, # write command file with open(os.path.join(sub_experiment_path, command_file_name), 'w') as f: f.write(command(sub_experiment_path, dict(variables_instance))) - callback(sub_experiment_path, dict(**{**parameters, **dict(variables_instance)})) + callback( + sub_experiment_path, dict(**{**parameters, **dict(variables_instance)}) + ) def load_experiment_parameters(experiment_path: str) -> Dict: diff --git a/benchmark/metalearned/common/metrics.py b/benchmark/metalearned/common/metrics.py index 93402b5..c6e7d37 100644 --- a/benchmark/metalearned/common/metrics.py +++ b/benchmark/metalearned/common/metrics.py @@ -4,7 +4,9 @@ Target = np.ndarray -def mase(forecast: Forecast, insample: np.ndarray, outsample: Target, frequency: int) -> np.ndarray: +def mase( + forecast: Forecast, insample: np.ndarray, outsample: Target, frequency: int +) -> np.ndarray: """ Calculate MASE of each point for each timeseries. https://en.wikipedia.org/wiki/Mean_absolute_scaled_error @@ -15,7 +17,9 @@ def mase(forecast: Forecast, insample: np.ndarray, outsample: Target, frequency: :param frequency: :return: """ - return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) + return np.mean(np.abs(forecast - outsample)) / np.mean( + np.abs(insample[:-frequency] - insample[frequency:]) + ) def nd(forecast: Forecast, target: Target) -> float: @@ -37,7 +41,9 @@ def nrmse(forecast: Forecast, target: Target) -> float: :param target: :return: """ - return np.sqrt(np.mean(np.power((forecast - target), 2))) / (np.mean(np.abs(target))) + return np.sqrt(np.mean(np.power((forecast - target), 2))) / ( + np.mean(np.abs(target)) + ) def mape(forecast: Forecast, target: Target) -> np.ndarray: @@ -77,30 +83,29 @@ def smape_2(forecast: Forecast, target: Target) -> np.ndarray: :return: Same shape array with sMAPE calculated for each time step of each timeseries. """ denom = np.abs(target) + np.abs(forecast) - denom[denom == 0.0] = 1.0 # divide by 1.0 instead of 0.0, in case when denom is zero the enum will be 0.0 anyways. + denom[ + denom == 0.0 + ] = 1.0 # divide by 1.0 instead of 0.0, in case when denom is zero the enum will be 0.0 anyways. return 200 * np.abs(forecast - target) / denom - - import tensorflow as tf from keras import backend def smape(y_true, y_pred): - """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. - `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)` - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - Returns: - Symmetric mean absolute percentage error values. shape = `[batch_size, d0, .. - dN-1]`. - """ + """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. + `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)` + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + Returns: + Symmetric mean absolute percentage error values. shape = `[batch_size, d0, .. + dN-1]`. + """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) diff = tf.abs( - (y_true - y_pred) / - backend.maximum(y_true + y_pred, backend.epsilon()) + (y_true - y_pred) / backend.maximum(y_true + y_pred, backend.epsilon()) ) return 200.0 * backend.mean(diff, axis=-1) diff --git a/benchmark/metalearned/common/samplers.py b/benchmark/metalearned/common/samplers.py index ebb2306..498a62f 100644 --- a/benchmark/metalearned/common/samplers.py +++ b/benchmark/metalearned/common/samplers.py @@ -2,12 +2,14 @@ class UnivariateTimeseriesSampler: - def __init__(self, - timeseries: np.ndarray, - insample_size: int, - outsample_size: int, - window_sampling_limit: int, - batch_size: int): + def __init__( + self, + timeseries: np.ndarray, + insample_size: int, + outsample_size: int, + window_sampling_limit: int, + batch_size: int, + ): self.timeseries = [ts for ts in timeseries] self.window_sampling_limit = window_sampling_limit self.batch_size = batch_size @@ -20,20 +22,29 @@ def __iter__(self): insample_mask = np.zeros((self.batch_size, self.insample_size)) outsample = np.zeros((self.batch_size, self.outsample_size)) outsample_mask = np.zeros((self.batch_size, self.outsample_size)) - sampled_ts_indices = np.random.randint(len(self.timeseries), size=self.batch_size) + sampled_ts_indices = np.random.randint( + len(self.timeseries), size=self.batch_size + ) for i, sampled_index in enumerate(sampled_ts_indices): sampled_timeseries = self.timeseries[sampled_index] - cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit), - high=len(sampled_timeseries), - size=1)[0] + cut_point = np.random.randint( + low=max(1, len(sampled_timeseries) - self.window_sampling_limit), + high=len(sampled_timeseries), + size=1, + )[0] - insample_window = sampled_timeseries[max(0, cut_point - self.insample_size):cut_point] - insample[i, -len(insample_window):] = insample_window - insample_mask[i, -len(insample_window):] = 1.0 + insample_window = sampled_timeseries[ + max(0, cut_point - self.insample_size) : cut_point + ] + insample[i, -len(insample_window) :] = insample_window + insample_mask[i, -len(insample_window) :] = 1.0 outsample_window = sampled_timeseries[ - cut_point:min(len(sampled_timeseries), cut_point + self.outsample_size)] - outsample[i, :len(outsample_window)] = outsample_window - outsample_mask[i, :len(outsample_window)] = 1.0 + cut_point : min( + len(sampled_timeseries), cut_point + self.outsample_size + ) + ] + outsample[i, : len(outsample_window)] = outsample_window + outsample_mask[i, : len(outsample_window)] = 1.0 yield insample, insample_mask, outsample, outsample_mask def sequential_latest_insamples(self): @@ -41,7 +52,7 @@ def sequential_latest_insamples(self): insample = np.zeros((batch_size, self.insample_size)) insample_mask = np.zeros((batch_size, self.insample_size)) for i, ts in enumerate(self.timeseries): - ts_last_window = ts[-self.insample_size:] - insample[i, -len(ts):] = ts_last_window - insample_mask[i, -len(ts):] = 1.0 + ts_last_window = ts[-self.insample_size :] + insample[i, -len(ts) :] = ts_last_window + insample_mask[i, -len(ts) :] = 1.0 return insample, insample_mask diff --git a/benchmark/metalearned/common/summary_utils.py b/benchmark/metalearned/common/summary_utils.py index 704f16b..a784ad3 100644 --- a/benchmark/metalearned/common/summary_utils.py +++ b/benchmark/metalearned/common/summary_utils.py @@ -42,40 +42,69 @@ def __init__(self, filter_path: str, evaluator): self.groups[parameter_key] = {} if parameter_value not in self.groups[parameter_key]: self.groups[parameter_key][parameter_value] = [] - self.groups[parameter_key][parameter_value].append(len(self.predictions) - 1) + self.groups[parameter_key][parameter_value].append( + len(self.predictions) - 1 + ) self.group_names = ', '.join(self.groups.keys()) logging.debug(f'Loaded {len(self.predictions)} predictions') logging.debug(f'Parameters: {self.group_names}') - def bootstrap(self, - ensemble_keys: List[str], - bootstrap_key: str, - bootstrap_size: int, - number_of_samples: int): + def bootstrap( + self, + ensemble_keys: List[str], + bootstrap_key: str, + bootstrap_size: int, + number_of_samples: int, + ): group_keys = self.groups.keys() - set(ensemble_keys) - group_values = list(itertools.product(*map(lambda g: self.groups[g].keys(), group_keys))) + group_values = list( + itertools.product(*map(lambda g: self.groups[g].keys(), group_keys)) + ) results = [] for group_instance in tqdm(group_values): - group_ids = [set(self.groups[group_key][group_value]) for group_key, group_value in - list(zip(group_keys, group_instance))] + group_ids = [ + set(self.groups[group_key][group_value]) + for group_key, group_value in list(zip(group_keys, group_instance)) + ] group_filter = set.intersection(*group_ids) if len(group_ids) > 0 else None - if group_instance != () and (group_filter is None or len(group_filter) == 0): + if group_instance != () and ( + group_filter is None or len(group_filter) == 0 + ): continue for _ in range(number_of_samples): sampled_ids = set( - itertools.chain(*random.sample(list(self.groups[bootstrap_key].values()), k=bootstrap_size))) - ensemble_ids = sampled_ids.intersection(group_filter) if group_filter is not None else sampled_ids + itertools.chain( + *random.sample( + list(self.groups[bootstrap_key].values()), k=bootstrap_size + ) + ) + ) + ensemble_ids = ( + sampled_ids.intersection(group_filter) + if group_filter is not None + else sampled_ids + ) if ensemble_ids is None or len(ensemble_ids) == 0: continue - ensemble_predictions = pd.concat([self.predictions[i] - for i in ensemble_ids], - sort=False).groupby(level='id', sort=False).median() + ensemble_predictions = ( + pd.concat([self.predictions[i] for i in ensemble_ids], sort=False) + .groupby(level='id', sort=False) + .median() + ) group_columns = dict(zip(group_keys, group_instance)) - evaluation_results = self.evaluator.evaluate(ensemble_predictions.values) + evaluation_results = self.evaluator.evaluate( + ensemble_predictions.values + ) for evaluation_key, evaluation_value in evaluation_results.items(): - results.append(pd.DataFrame({ - 'metric': evaluation_value, - 'evaluation_key': evaluation_key, - **group_columns}, index=[0])) + results.append( + pd.DataFrame( + { + 'metric': evaluation_value, + 'evaluation_key': evaluation_key, + **group_columns, + }, + index=[0], + ) + ) return pd.concat(results, sort=False).reset_index() diff --git a/benchmark/metalearned/common/timeseries.py b/benchmark/metalearned/common/timeseries.py index 33428e4..e65c907 100644 --- a/benchmark/metalearned/common/timeseries.py +++ b/benchmark/metalearned/common/timeseries.py @@ -5,9 +5,9 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Tuple +import dill import numpy as np from dateutil.relativedelta import relativedelta -import dill class TimeUnit(ABC): @@ -98,25 +98,33 @@ class Timeseries: meta: Dict[str, Any] def copy(self, start_date: datetime, values: np.ndarray) -> 'Timeseries': - return Timeseries(id=self.id, - start_date=start_date, - time_unit=self.time_unit, - frequency=self.frequency, - period=self.period, - values=values, - meta=self.meta) + return Timeseries( + id=self.id, + start_date=start_date, + time_unit=self.time_unit, + frequency=self.frequency, + period=self.period, + values=values, + meta=self.meta, + ) def future_values(self, values: np.ndarray) -> 'Timeseries': - return self.copy(start_date=self.time_unit.add(self.start_date, len(self.values)), values=values) + return self.copy( + start_date=self.time_unit.add(self.start_date, len(self.values)), + values=values, + ) def split(self, n: int) -> TimeseriesSplit: time_shift = n if n >= 0 else len(self.values) + n split_time = self.time_unit.add(self.start_date, time_shift * self.frequency) - return self.copy(start_date=self.start_date, values=self.values[:n]), self.copy(start_date=split_time, - values=self.values[n:]) + return self.copy(start_date=self.start_date, values=self.values[:n]), self.copy( + start_date=split_time, values=self.values[n:] + ) def split_by_time(self, split_date: datetime) -> TimeseriesSplit: - points_to_include = int(self.time_unit.delta(split_date, self.start_date) // self.frequency) + points_to_include = int( + self.time_unit.delta(split_date, self.start_date) // self.frequency + ) if points_to_include < 0: before = self.copy(split_date, np.empty(0)) on_and_after = self @@ -135,8 +143,12 @@ def values(self) -> List[np.ndarray]: def time_stamps(self) -> List[np.ndarray]: def _make_time_stamps(ts): - return np.array([ts.time_unit.add(ts.start_date, ts.frequency*i) - for i in range(len(ts.values))]) + return np.array( + [ + ts.time_unit.add(ts.start_date, ts.frequency * i) + for i in range(len(ts.values)) + ] + ) return list(map(_make_time_stamps, self.timeseries)) @@ -152,7 +164,9 @@ def filter(self, f: Callable[[Timeseries], bool]) -> 'TimeseriesBundle': def map(self, f: Callable[[Timeseries], Timeseries]) -> 'TimeseriesBundle': return TimeseriesBundle(list(map(f, self.timeseries))) - def split(self, f: Callable[[Timeseries], TimeseriesSplit]) -> Tuple['TimeseriesBundle', 'TimeseriesBundle']: + def split( + self, f: Callable[[Timeseries], TimeseriesSplit] + ) -> Tuple['TimeseriesBundle', 'TimeseriesBundle']: bucket_1 = [] bucket_2 = [] for timeseries in self.timeseries: @@ -161,14 +175,20 @@ def split(self, f: Callable[[Timeseries], TimeseriesSplit]) -> Tuple['Timeseries bucket_2.append(part_2) return TimeseriesBundle(bucket_1), TimeseriesBundle(bucket_2) - def intersection_by_id(self, bundle: 'TimeseriesBundle') -> Tuple['TimeseriesBundle', 'TimeseriesBundle']: + def intersection_by_id( + self, bundle: 'TimeseriesBundle' + ) -> Tuple['TimeseriesBundle', 'TimeseriesBundle']: bundle_ids = bundle.ids() ids = [ts_id for ts_id in self.ids() if ts_id in bundle_ids] - return self.filter(lambda ts: ts.id in ids), bundle.filter(lambda ts: ts.id in ids) + return self.filter(lambda ts: ts.id in ids), bundle.filter( + lambda ts: ts.id in ids + ) def future_values(self, values: np.array) -> 'TimeseriesBundle': assert len(values) == len(self.timeseries) - return TimeseriesBundle([ts.future_values(values[i]) for i, ts in enumerate(self.timeseries)]) + return TimeseriesBundle( + [ts.future_values(values[i]) for i, ts in enumerate(self.timeseries)] + ) class TimeseriesLoader(ABC): @@ -192,4 +212,3 @@ def download(self) -> TimeseriesBundle: :return: Training and test splits. """ pass - diff --git a/benchmark/metalearned/common/torch_utils.py b/benchmark/metalearned/common/torch_utils.py index cdaf534..1ed055e 100644 --- a/benchmark/metalearned/common/torch_utils.py +++ b/benchmark/metalearned/common/torch_utils.py @@ -22,8 +22,8 @@ def to_device(module: t.nn.Module, use_cuda: bool = True): def div_no_nan(a, b): result = a / b - result[result != result] = .0 - result[result == np.inf] = .0 + result[result != result] = 0.0 + result[result == np.inf] = 0.0 return result @@ -33,11 +33,16 @@ def mape_loss(forecast, target, mask): def smape_1_loss(forecast, target, mask): - return 200 * t.mean(div_no_nan(t.abs(forecast - target), forecast.data + target.data) * mask) + return 200 * t.mean( + div_no_nan(t.abs(forecast - target), forecast.data + target.data) * mask + ) def smape_2_loss(forecast, target, mask): - return 200 * t.mean(div_no_nan(t.abs(forecast - target), t.abs(forecast.data) + t.abs(target.data)) * mask) + return 200 * t.mean( + div_no_nan(t.abs(forecast - target), t.abs(forecast.data) + t.abs(target.data)) + * mask + ) def mase_loss(insample, freq, forecast, target, mask): @@ -47,7 +52,9 @@ def mase_loss(insample, freq, forecast, target, mask): class SnapshotManager: - def __init__(self, snapshot_dir: str, logging_frequency: int, snapshot_frequency: int): + def __init__( + self, snapshot_dir: str, logging_frequency: int, snapshot_frequency: int + ): self.model_snapshot_file = os.path.join(snapshot_dir, 'model') self.optimizer_snapshot_file = os.path.join(snapshot_dir, 'optimizer') self.losses_file = os.path.join(snapshot_dir, 'losses') @@ -59,16 +66,26 @@ def __init__(self, snapshot_dir: str, logging_frequency: int, snapshot_frequency self.losses = {'training': {}, 'validation': {}} self.time_track = {} - def restore(self, model: Optional[t.nn.Module], optimizer: Optional[t.optim.Optimizer]) -> int: + def restore( + self, model: Optional[t.nn.Module], optimizer: Optional[t.optim.Optimizer] + ) -> int: if model is not None and os.path.isfile(self.model_snapshot_file): model.load_state_dict(t.load(self.model_snapshot_file)) if optimizer is not None and os.path.isfile(self.optimizer_snapshot_file): optimizer.load_state_dict(t.load(self.optimizer_snapshot_file)) - iteration = t.load(self.iteration_file)['iteration'] if os.path.isfile(self.iteration_file) else 0 + iteration = ( + t.load(self.iteration_file)['iteration'] + if os.path.isfile(self.iteration_file) + else 0 + ) if os.path.isfile(self.losses_file): losses = t.load(self.losses_file) - training_losses = {k: v for k, v in losses['training'].items() if k <= iteration} - validation_losses = {k: v for k, v in losses['validation'].items() if k <= iteration} + training_losses = { + k: v for k, v in losses['training'].items() if k <= iteration + } + validation_losses = { + k: v for k, v in losses['validation'].items() if k <= iteration + } # when restoring remove losses which were after the last snapshot self.losses = {'training': training_losses, 'validation': validation_losses} self.snapshot(self.losses_file, self.losses) @@ -86,12 +103,14 @@ def load_training_losses(self) -> pd.DataFrame: def enable_time_tracking(self): self.start_time = time.time() - def register(self, - iteration: int, - training_loss: float, - validation_loss: float, - model: t.nn.Module, - optimizer: Optional[t.optim.Optimizer]) -> None: + def register( + self, + iteration: int, + training_loss: float, + validation_loss: float, + model: t.nn.Module, + optimizer: Optional[t.optim.Optimizer], + ) -> None: if iteration == 1 or iteration % self.logging_frequency == 0: self.losses['training'][iteration] = training_loss self.losses['validation'][iteration] = validation_loss diff --git a/benchmark/metalearned/common/utils.py b/benchmark/metalearned/common/utils.py index c456c46..2da7bdf 100644 --- a/benchmark/metalearned/common/utils.py +++ b/benchmark/metalearned/common/utils.py @@ -4,15 +4,15 @@ import pathlib import sys import urllib -from decimal import Decimal, ROUND_HALF_UP +from decimal import ROUND_HALF_UP, Decimal from glob import glob from itertools import dropwhile, takewhile +from math import pow from typing import Any, Callable, List from urllib import request import numpy as np import pandas as pd -from math import pow from tqdm import tqdm @@ -24,17 +24,32 @@ def get_module_path(): def round_half_up(n, precision): - return int(Decimal(n * pow(10, precision)).to_integral_value(rounding=ROUND_HALF_UP)) / pow(10, precision) - - -def median_ensemble(experiment_path: str, - summary_filter: str = '**', - forecast_file: str = 'forecast.csv', - group_by: str = 'id'): - return pd.concat([pd.read_csv(file) - for file in - tqdm(glob(os.path.join(experiment_path, summary_filter, forecast_file)))], sort=False) \ - .set_index(group_by).groupby(level=group_by, sort=False).median().values + return int( + Decimal(n * pow(10, precision)).to_integral_value(rounding=ROUND_HALF_UP) + ) / pow(10, precision) + + +def median_ensemble( + experiment_path: str, + summary_filter: str = '**', + forecast_file: str = 'forecast.csv', + group_by: str = 'id', +): + return ( + pd.concat( + [ + pd.read_csv(file) + for file in tqdm( + glob(os.path.join(experiment_path, summary_filter, forecast_file)) + ) + ], + sort=False, + ) + .set_index(group_by) + .groupby(level=group_by, sort=False) + .median() + .values + ) def group_values(values: np.ndarray, groups: np.ndarray, group_name: str): @@ -50,8 +65,11 @@ def download_url(url: str, file_path: str) -> None: """ def progress(count, block_size, total_size): - sys.stdout.write('\rDownloading {} from {} {:.1f}%'.format(file_path, url, float(count * block_size) / float( - total_size) * 100.0)) + sys.stdout.write( + '\rDownloading {} from {} {:.1f}%'.format( + file_path, url, float(count * block_size) / float(total_size) * 100.0 + ) + ) sys.stdout.flush() if not os.path.isfile(file_path): @@ -63,7 +81,9 @@ def progress(count, block_size, total_size): sys.stdout.write('\n') sys.stdout.flush() file_info = os.stat(f) - logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.') + logging.info( + f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.' + ) else: file_info = os.stat(file_path) logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.') @@ -107,5 +127,8 @@ def ordered_insert(ordered_stack: List, value, f: Callable[[Any, Any], bool]): (and truncated if necessary). :return: New instance of stack with inserted element. """ - return (list(takewhile(lambda x: f(x, value), ordered_stack)) + [value] + - list(dropwhile(lambda x: f(x, value), ordered_stack)))[:len(ordered_stack)] + return ( + list(takewhile(lambda x: f(x, value), ordered_stack)) + + [value] + + list(dropwhile(lambda x: f(x, value), ordered_stack)) + )[: len(ordered_stack)] diff --git a/benchmark/metalearned/dataset.py b/benchmark/metalearned/dataset.py index 6b41842..d3aad4c 100644 --- a/benchmark/metalearned/dataset.py +++ b/benchmark/metalearned/dataset.py @@ -7,11 +7,18 @@ import numpy as np import pandas as pd import patoolib -from tqdm import tqdm - from common.settings import RESOURCES_DIR -from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month, Day, Hour +from common.timeseries import ( + Day, + Hour, + Month, + Timeseries, + TimeseriesBundle, + TimeseriesLoader, + Year, +) from common.utils import download_url +from tqdm import tqdm @dataclass(frozen=True) @@ -31,29 +38,37 @@ def period_map(self): class M4Dataset(TimeseriesLoader): def download(self) -> TimeseriesBundle: url_template = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/{}/{}-{}.csv' - m4_info_url = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv' + m4_info_url = ( + 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv' + ) m4_info_path = os.path.join(self.path, 'M4info.csv') ssl._create_default_https_context = ssl._create_unverified_context download_url(m4_info_url, m4_info_path) for sp in M4Meta.seasonal_patterns: - training_url = url_template.format("Train", sp, "train") - download_url(training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')) - test_url = url_template.format("Test", sp, "test") + training_url = url_template.format('Train', sp, 'train') + download_url( + training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv') + ) + test_url = url_template.format('Test', sp, 'test') download_url(test_url, os.path.join(M4Meta.dataset_path, f'{sp}-test.csv')) # Download naive2 forecasts, needed for OWA metric m4_naive2_archive = os.path.join(self.path, 'naive2.rar') - download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar', - m4_naive2_archive) + download_url( + 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar', + m4_naive2_archive, + ) patoolib.extract_archive(m4_naive2_archive, outdir=self.path) os.remove(m4_naive2_archive) # Download m4 competition winner predictions, for summary testing purposes only m4_winner_archive = os.path.join(self.path, 'submission-118.rar') - download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar', - m4_winner_archive) + download_url( + 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar', + m4_winner_archive, + ) patoolib.extract_archive(m4_winner_archive, outdir=self.path) os.remove(m4_winner_archive) @@ -66,12 +81,14 @@ def download(self) -> TimeseriesBundle: 'Monthly': (Month(), 1), 'Weekly': (Day(), 7), 'Daily': (Day(), 1), - 'Hourly': (Hour(), 1) + 'Hourly': (Hour(), 1), } all_timeseries = [] for sp in M4Meta.seasonal_patterns: - training_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')) + training_set = pd.read_csv( + os.path.join(M4Meta.dataset_path, f'{sp}-train.csv') + ) test_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-test.csv')) time_unit, frequency = time_units_mapping[sp] @@ -89,11 +106,15 @@ def download(self) -> TimeseriesBundle: parsed_date = None for parsing_format in parsing_formats: try: - parsed_date = datetime.strptime(timeseries_info.StartingDate, parsing_format) + parsed_date = datetime.strptime( + timeseries_info.StartingDate, parsing_format + ) except Exception: continue if parsed_date is None: - raise ValueError(f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}') + raise ValueError( + f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}' + ) # all M4 years are in the 1900s or 1800s if parsed_date.year > 2000: parsed_date = parsed_date.replace(year=parsed_date.year - 100) @@ -101,14 +122,15 @@ def download(self) -> TimeseriesBundle: if parsed_date.year > 2000: print('over') - timeseries = Timeseries(id=timeseries_id, - start_date=parsed_date, - time_unit=time_unit, - frequency=frequency, - period=int(timeseries_info.Frequency), - values=np.concatenate([training_values, test_values]), - meta={'seasonal_pattern': sp} - ) + timeseries = Timeseries( + id=timeseries_id, + start_date=parsed_date, + time_unit=time_unit, + frequency=frequency, + period=int(timeseries_info.Frequency), + values=np.concatenate([training_values, test_values]), + meta={'seasonal_pattern': sp}, + ) all_timeseries.append(timeseries) return TimeseriesBundle(all_timeseries) @@ -116,7 +138,9 @@ def download(self) -> TimeseriesBundle: def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]: bundle = self.load_cache() horizons_map = M4Meta().horizons_map() - return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])) + return bundle.split( + lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]) + ) @staticmethod def filter(bundle: TimeseriesBundle, seasonal_pattern: str) -> TimeseriesBundle: diff --git a/benchmark/metalearned/experiments/tl/main.py b/benchmark/metalearned/experiments/tl/main.py index 04173ec..1249558 100644 --- a/benchmark/metalearned/experiments/tl/main.py +++ b/benchmark/metalearned/experiments/tl/main.py @@ -4,18 +4,21 @@ import numpy as np import pandas as pd import torch as t -from fire import Fire -from scipy.interpolate import interp1d -from torch import optim - -from common.experiment import create_experiment -from common.experiment import load_experiment_parameters +from common.experiment import create_experiment, load_experiment_parameters from common.samplers import UnivariateTimeseriesSampler from common.settings import experiment_path from common.timeseries import TimeseriesBundle -from common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss +from common.torch_utils import ( + SnapshotManager, + mape_loss, + mase_loss, + smape_2_loss, + to_device, + to_tensor, +) from common.utils import get_module_path from experiments.tl.parameters import parameters +from fire import Fire from models.nbeats_torch import nbeats_generic, nbeats_interpretable from resources.electricity.dataset import ElectricityDataset, ElectricityMeta from resources.fred.dataset import FredDataset, FredMeta @@ -23,19 +26,27 @@ from resources.m4.dataset import M4Dataset, M4Meta from resources.tourism.dataset import TourismDataset, TourismMeta from resources.traffic.dataset import TrafficDataset, TrafficMeta +from scipy.interpolate import interp1d +from torch import optim module_path = get_module_path() def init(name: str): - create_experiment(experiment_path=experiment_path(module_path, name), - parameters=parameters[name], - command=lambda path, params: f'python {module_path}/main.py run --path={path}') + create_experiment( + experiment_path=experiment_path(module_path, name), + parameters=parameters[name], + command=lambda path, params: f'python {module_path}/main.py run --path={path}', + ) def run(path: str): experiment_parameters = load_experiment_parameters(path) - source_dataset_name = experiment_parameters['source_dataset'] if 'source_dataset' in experiment_parameters else 'M4' + source_dataset_name = ( + experiment_parameters['source_dataset'] + if 'source_dataset' in experiment_parameters + else 'M4' + ) loss_name = experiment_parameters['loss_name'] model_horizons = { @@ -95,67 +106,90 @@ def run(path: str): tl_models = {} for model_name, horizon in model_horizons.items(): sp = model_sps[model_name] - training_subset = source_dataset.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) + training_subset = source_dataset.filter( + lambda ts: ts.meta['seasonal_pattern'] == sp + ) training_values = np.array(training_subset.values()) if source_dataset_name == 'FRED': # interpolate monthly data if model_name == 'H24': training_values = [] for values in training_subset.values(): - interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear') - training_values.append(interpolation_fn(np.arange(0, len(values) - 0.5, 0.5))) + interpolation_fn = interp1d( + x=np.array(range(len(values))), y=values, kind='linear' + ) + training_values.append( + interpolation_fn(np.arange(0, len(values) - 0.5, 0.5)) + ) training_values = np.array(training_values) elif model_name == 'H48': training_values = [] for values in training_subset.values(): - interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear') - training_values.append(interpolation_fn(np.arange(0, len(values) - 0.75, 0.25))) + interpolation_fn = interp1d( + x=np.array(range(len(values))), y=values, kind='linear' + ) + training_values.append( + interpolation_fn(np.arange(0, len(values) - 0.75, 0.25)) + ) training_values = np.array(training_values) input_size = experiment_parameters['lookback_period'] * horizon - training_dataset = UnivariateTimeseriesSampler(timeseries=training_values, - insample_size=input_size, - outsample_size=horizon, - window_sampling_limit=int( - experiment_parameters['history_horizons'] * horizon), - batch_size=experiment_parameters['batch_size']) + training_dataset = UnivariateTimeseriesSampler( + timeseries=training_values, + insample_size=input_size, + outsample_size=horizon, + window_sampling_limit=int( + experiment_parameters['history_horizons'] * horizon + ), + batch_size=experiment_parameters['batch_size'], + ) # # Training # snapshot_dir = os.path.join(path, 'snapshots', model_name) - snapshot_manager = SnapshotManager(snapshot_dir=snapshot_dir, - logging_frequency=experiment_parameters['logging_frequency'], - snapshot_frequency=experiment_parameters['snapshot_frequency']) + snapshot_manager = SnapshotManager( + snapshot_dir=snapshot_dir, + logging_frequency=experiment_parameters['logging_frequency'], + snapshot_frequency=experiment_parameters['snapshot_frequency'], + ) if experiment_parameters['model_type'] == 'generic': - model = nbeats_generic(input_size=input_size, - output_size=horizon, - blocks=experiment_parameters['blocks'], - stacks=experiment_parameters['stacks'], - fc_layers=experiment_parameters['layers'], - fc_layers_size=experiment_parameters['width'], - scaling=experiment_parameters['scaling'], - mode=experiment_parameters['mode']) + model = nbeats_generic( + input_size=input_size, + output_size=horizon, + blocks=experiment_parameters['blocks'], + stacks=experiment_parameters['stacks'], + fc_layers=experiment_parameters['layers'], + fc_layers_size=experiment_parameters['width'], + scaling=experiment_parameters['scaling'], + mode=experiment_parameters['mode'], + ) else: - model = nbeats_interpretable(input_size=input_size, - output_size=horizon, - trend_blocks=experiment_parameters['trend_blocks'], - trend_fc_layers=experiment_parameters['layers'], - trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'], - degree_of_polynomial=experiment_parameters['degree_of_polynomial'], - seasonality_blocks=experiment_parameters['seasonality_blocks'], - seasonality_fc_layers=experiment_parameters['layers'], - seasonality_fc_layers_size=experiment_parameters['seasonality_fc_layers_size'], - num_of_harmonics=experiment_parameters['num_of_harmonics'], - scaling=experiment_parameters['scaling'], - mode=experiment_parameters['mode']) + model = nbeats_interpretable( + input_size=input_size, + output_size=horizon, + trend_blocks=experiment_parameters['trend_blocks'], + trend_fc_layers=experiment_parameters['layers'], + trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'], + degree_of_polynomial=experiment_parameters['degree_of_polynomial'], + seasonality_blocks=experiment_parameters['seasonality_blocks'], + seasonality_fc_layers=experiment_parameters['layers'], + seasonality_fc_layers_size=experiment_parameters[ + 'seasonality_fc_layers_size' + ], + num_of_harmonics=experiment_parameters['num_of_harmonics'], + scaling=experiment_parameters['scaling'], + mode=experiment_parameters['mode'], + ) model = to_device(model) - optimizer = optim.Adam(model.parameters(), - lr=experiment_parameters['learning_rate'], - weight_decay=0.0) + optimizer = optim.Adam( + model.parameters(), + lr=experiment_parameters['learning_rate'], + weight_decay=0.0, + ) lr_decay_step = experiment_parameters['iterations'] // 3 if lr_decay_step == 0: @@ -176,7 +210,9 @@ def run(path: str): if loss_name == 'MAPE': training_loss = mape_loss(forecast, y, y_mask) elif loss_name == 'MASE': - training_loss = mase_loss(x, training_subset.timeseries[0].period, forecast, y, y_mask) + training_loss = mase_loss( + x, training_subset.timeseries[0].period, forecast, y, y_mask + ) elif loss_name == 'SMAPE': training_loss = smape_2_loss(forecast, y, y_mask) else: @@ -190,13 +226,22 @@ def run(path: str): optimizer.step() for param_group in optimizer.param_groups: - param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** (i // lr_decay_step) - - snapshot_manager.register(iteration=i, - training_loss=float(training_loss), - validation_loss=np.nan, model=model, - optimizer=optimizer) - tl_models[model_name] = {'p_model': model, 'p_input_size': input_size, 'p_horizon': horizon} + param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** ( + i // lr_decay_step + ) + + snapshot_manager.register( + iteration=i, + training_loss=float(training_loss), + validation_loss=np.nan, + model=model, + optimizer=optimizer, + ) + tl_models[model_name] = { + 'p_model': model, + 'p_input_size': input_size, + 'p_horizon': horizon, + } # # Predictions @@ -205,113 +250,233 @@ def run(path: str): def forecast(bundle: TimeseriesBundle, p_model, p_input_size, p_horizon): forecasts = [] input_set = np.array(bundle.values()) - input_set = UnivariateTimeseriesSampler(timeseries=input_set, - insample_size=p_input_size, - outsample_size=0, - window_sampling_limit=1, - batch_size=1) + input_set = UnivariateTimeseriesSampler( + timeseries=input_set, + insample_size=p_input_size, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + ) p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples()) p_model.eval() with t.no_grad(): forecasts.extend(p_model(p_x, p_x_mask).cpu().detach().numpy()) - forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)]) + forecasts_df = pd.DataFrame( + forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)] + ) forecasts_df.index = bundle.ids() forecasts_df.index.name = 'id' return forecasts_df - def rolling_daily_forecast(base_insample: TimeseriesBundle, rolling_insample: TimeseriesBundle, - p_model, p_input_size, p_horizon): + def rolling_daily_forecast( + base_insample: TimeseriesBundle, + rolling_insample: TimeseriesBundle, + p_model, + p_input_size, + p_horizon, + ): forecasts = [] base_insample_values = np.array(base_insample.values()) rolling_insample_values = np.array(rolling_insample.values()) for window_id in range(7): - insample = np.concatenate([base_insample_values, rolling_insample_values[:, :window_id * p_horizon]], - axis=1) - input_set = UnivariateTimeseriesSampler(timeseries=insample, - insample_size=p_input_size, - outsample_size=0, - window_sampling_limit=1, - batch_size=1) + insample = np.concatenate( + [ + base_insample_values, + rolling_insample_values[:, : window_id * p_horizon], + ], + axis=1, + ) + input_set = UnivariateTimeseriesSampler( + timeseries=insample, + insample_size=p_input_size, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + ) p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples()) p_model.eval() with t.no_grad(): window_forecast = p_model(p_x, p_x_mask).cpu().detach().numpy() - forecasts = window_forecast if len(forecasts) == 0 else np.concatenate([forecasts, window_forecast], - axis=1) - - forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)]) + forecasts = ( + window_forecast + if len(forecasts) == 0 + else np.concatenate([forecasts, window_forecast], axis=1) + ) + + forecasts_df = pd.DataFrame( + forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)] + ) forecasts_df.index = base_insample.ids() forecasts_df.index.name = 'id' - forecasts_df.columns = [f'V{i}' for i in range(1, len(forecasts_df.columns) + 1)] + forecasts_df.columns = [ + f'V{i}' for i in range(1, len(forecasts_df.columns) + 1) + ] return forecasts_df # M4 target_input, _ = M4Dataset(M4Meta.dataset_path).standard_split() - yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y6']) - quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8']) - monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M18']) - weekly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), **tl_models['W13']) - daily = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), **tl_models['D14']) - hourly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'), **tl_models['H48']) + yearly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), + **tl_models['Y6'], + ) + quarterly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), + **tl_models['Q8'], + ) + monthly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), + **tl_models['M18'], + ) + weekly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), + **tl_models['W13'], + ) + daily = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), + **tl_models['D14'], + ) + hourly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'), + **tl_models['H48'], + ) pd.concat([yearly, quarterly, monthly, weekly, daily, hourly], sort=False).to_csv( - os.path.join(os.path.join(path, 'M4.csv'))) + os.path.join(os.path.join(path, 'M4.csv')) + ) # M3 target_input, _ = M3Dataset(M3Meta.dataset_path).standard_split() - yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'), **tl_models['Y6']) - quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'), **tl_models['Q8']) - monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'), **tl_models['M18']) - others = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'), **tl_models['Q8']) - pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(os.path.join(os.path.join(path, 'M3.csv'))) + yearly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'), + **tl_models['Y6'], + ) + quarterly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'), + **tl_models['Q8'], + ) + monthly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'), + **tl_models['M18'], + ) + others = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'), + **tl_models['Q8'], + ) + pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv( + os.path.join(os.path.join(path, 'M3.csv')) + ) # Tourism target_input, _ = TourismDataset(TourismMeta.dataset_path).standard_split() - yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y4']) - quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8']) - monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M24']) - pd.concat([yearly, quarterly, monthly], sort=False).to_csv(os.path.join(os.path.join(path, 'tourism.csv'))) + yearly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), + **tl_models['Y4'], + ) + quarterly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), + **tl_models['Q8'], + ) + monthly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), + **tl_models['M24'], + ) + pd.concat([yearly, quarterly, monthly], sort=False).to_csv( + os.path.join(os.path.join(path, 'tourism.csv')) + ) # Electricity - target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path). \ - load_cache().split(lambda ts: ts.split(-24 * 7)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv'))) - - target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv'))) - - target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv'))) + target_input, rolling_target_input = ( + ElectricityDataset(ElectricityMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split(-24 * 7)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv'))) + + target_input, rolling_target_input = ( + ElectricityDataset(ElectricityMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv'))) + + target_input, rolling_target_input = ( + ElectricityDataset(ElectricityMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv'))) # Traffic - target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache().\ - split(lambda ts: ts.split(-24 * 7)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv'))) - - target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv'))) - - target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv'))) + target_input, rolling_target_input = ( + TrafficDataset(TrafficMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split(-24 * 7)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv'))) + + target_input, rolling_target_input = ( + TrafficDataset(TrafficMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv'))) + + target_input, rolling_target_input = ( + TrafficDataset(TrafficMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv'))) # FRED target_input, _ = FredDataset(FredMeta.dataset_path).standard_split() - yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y6']) - quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8']) - monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M18']) - weekly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), **tl_models['W13']) - daily = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), **tl_models['D14']) - pd.concat([yearly, quarterly, monthly, weekly, daily]).to_csv(os.path.join(os.path.join(path, 'fred.csv'))) + yearly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), + **tl_models['Y6'], + ) + quarterly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), + **tl_models['Q8'], + ) + monthly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), + **tl_models['M18'], + ) + weekly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), + **tl_models['W13'], + ) + daily = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), + **tl_models['D14'], + ) + pd.concat([yearly, quarterly, monthly, weekly, daily]).to_csv( + os.path.join(os.path.join(path, 'fred.csv')) + ) def evaluate(name: str, summary_filter: str, validation_mode: bool = False): diff --git a/benchmark/metalearned/experiments/tl/parameters.py b/benchmark/metalearned/experiments/tl/parameters.py index 789b624..6e70d26 100644 --- a/benchmark/metalearned/experiments/tl/parameters.py +++ b/benchmark/metalearned/experiments/tl/parameters.py @@ -1,31 +1,24 @@ common = { 'repeats': list(range(10)), - 'lookback_period': list(range(2, 8)), 'loss_name': 'MASE', 'scaling': 'max', 'iterations': 15000, 'history_horizons': 10, - 'batch_size': 1024, 'learning_rate': 0.001, - 'mode': 'dress', - 'width': 512, 'layers': 4, 'blocks': 10, 'stacks': 1, - # interpretable 'trend_blocks': 3, 'trend_fc_layers_size': 256, 'degree_of_polynomial': 3, - 'seasonality_blocks': 3, 'seasonality_fc_layers_size': 2048, 'num_of_harmonics': 1, - 'logging_frequency': 500, 'snapshot_frequency': 5000, } @@ -39,7 +32,7 @@ 'blocks': 30, 'stacks': 1, 'iterations': [5000, 15000], - 'loss_name': ['MASE', 'MAPE', 'SMAPE'] + 'loss_name': ['MASE', 'MAPE', 'SMAPE'], }, 'shared_grid': { **common, @@ -76,5 +69,5 @@ 'blocks': 1, 'stacks': 30, 'mode': 'dress', - } + }, } diff --git a/benchmark/metalearned/main.py b/benchmark/metalearned/main.py index dd45654..db0a1eb 100644 --- a/benchmark/metalearned/main.py +++ b/benchmark/metalearned/main.py @@ -4,18 +4,21 @@ import numpy as np import pandas as pd import torch as t -from fire import Fire -from scipy.interpolate import interp1d -from torch import optim - -from common.experiment import create_experiment -from common.experiment import load_experiment_parameters +from common.experiment import create_experiment, load_experiment_parameters from common.samplers import UnivariateTimeseriesSampler from common.settings import experiment_path from common.timeseries import TimeseriesBundle -from common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss +from common.torch_utils import ( + SnapshotManager, + mape_loss, + mase_loss, + smape_2_loss, + to_device, + to_tensor, +) from common.utils import get_module_path from experiments.tl.parameters import parameters +from fire import Fire from models.nbeats_torch import nbeats_generic, nbeats_interpretable from resources.electricity.dataset import ElectricityDataset, ElectricityMeta from resources.fred.dataset import FredDataset, FredMeta @@ -23,19 +26,27 @@ from resources.m4.dataset import M4Dataset, M4Meta from resources.tourism.dataset import TourismDataset, TourismMeta from resources.traffic.dataset import TrafficDataset, TrafficMeta +from scipy.interpolate import interp1d +from torch import optim module_path = get_module_path() def init(name: str): - create_experiment(experiment_path=experiment_path(module_path, name), - parameters=parameters[name], - command=lambda path, params: f'python {module_path}/main.py run --path={path}') + create_experiment( + experiment_path=experiment_path(module_path, name), + parameters=parameters[name], + command=lambda path, params: f'python {module_path}/main.py run --path={path}', + ) def run(path: str): experiment_parameters = load_experiment_parameters(path) - source_dataset_name = experiment_parameters['source_dataset'] if 'source_dataset' in experiment_parameters else 'M4' + source_dataset_name = ( + experiment_parameters['source_dataset'] + if 'source_dataset' in experiment_parameters + else 'M4' + ) loss_name = experiment_parameters['loss_name'] model_horizons = { @@ -95,67 +106,90 @@ def run(path: str): tl_models = {} for model_name, horizon in model_horizons.items(): sp = model_sps[model_name] - training_subset = source_dataset.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) + training_subset = source_dataset.filter( + lambda ts: ts.meta['seasonal_pattern'] == sp + ) training_values = np.array(training_subset.values()) if source_dataset_name == 'FRED': # interpolate monthly data if model_name == 'H24': training_values = [] for values in training_subset.values(): - interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear') - training_values.append(interpolation_fn(np.arange(0, len(values) - 0.5, 0.5))) + interpolation_fn = interp1d( + x=np.array(range(len(values))), y=values, kind='linear' + ) + training_values.append( + interpolation_fn(np.arange(0, len(values) - 0.5, 0.5)) + ) training_values = np.array(training_values) elif model_name == 'H48': training_values = [] for values in training_subset.values(): - interpolation_fn = interp1d(x=np.array(range(len(values))), y=values, kind='linear') - training_values.append(interpolation_fn(np.arange(0, len(values) - 0.75, 0.25))) + interpolation_fn = interp1d( + x=np.array(range(len(values))), y=values, kind='linear' + ) + training_values.append( + interpolation_fn(np.arange(0, len(values) - 0.75, 0.25)) + ) training_values = np.array(training_values) input_size = experiment_parameters['lookback_period'] * horizon - training_dataset = UnivariateTimeseriesSampler(timeseries=training_values, - insample_size=input_size, - outsample_size=horizon, - window_sampling_limit=int( - experiment_parameters['history_horizons'] * horizon), - batch_size=experiment_parameters['batch_size']) + training_dataset = UnivariateTimeseriesSampler( + timeseries=training_values, + insample_size=input_size, + outsample_size=horizon, + window_sampling_limit=int( + experiment_parameters['history_horizons'] * horizon + ), + batch_size=experiment_parameters['batch_size'], + ) # # Training # snapshot_dir = os.path.join(path, 'snapshots', model_name) - snapshot_manager = SnapshotManager(snapshot_dir=snapshot_dir, - logging_frequency=experiment_parameters['logging_frequency'], - snapshot_frequency=experiment_parameters['snapshot_frequency']) + snapshot_manager = SnapshotManager( + snapshot_dir=snapshot_dir, + logging_frequency=experiment_parameters['logging_frequency'], + snapshot_frequency=experiment_parameters['snapshot_frequency'], + ) if experiment_parameters['model_type'] == 'generic': - model = nbeats_generic(input_size=input_size, - output_size=horizon, - blocks=experiment_parameters['blocks'], - stacks=experiment_parameters['stacks'], - fc_layers=experiment_parameters['layers'], - fc_layers_size=experiment_parameters['width'], - scaling=experiment_parameters['scaling'], - mode=experiment_parameters['mode']) + model = nbeats_generic( + input_size=input_size, + output_size=horizon, + blocks=experiment_parameters['blocks'], + stacks=experiment_parameters['stacks'], + fc_layers=experiment_parameters['layers'], + fc_layers_size=experiment_parameters['width'], + scaling=experiment_parameters['scaling'], + mode=experiment_parameters['mode'], + ) else: - model = nbeats_interpretable(input_size=input_size, - output_size=horizon, - trend_blocks=experiment_parameters['trend_blocks'], - trend_fc_layers=experiment_parameters['layers'], - trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'], - degree_of_polynomial=experiment_parameters['degree_of_polynomial'], - seasonality_blocks=experiment_parameters['seasonality_blocks'], - seasonality_fc_layers=experiment_parameters['layers'], - seasonality_fc_layers_size=experiment_parameters['seasonality_fc_layers_size'], - num_of_harmonics=experiment_parameters['num_of_harmonics'], - scaling=experiment_parameters['scaling'], - mode=experiment_parameters['mode']) + model = nbeats_interpretable( + input_size=input_size, + output_size=horizon, + trend_blocks=experiment_parameters['trend_blocks'], + trend_fc_layers=experiment_parameters['layers'], + trend_fc_layers_size=experiment_parameters['trend_fc_layers_size'], + degree_of_polynomial=experiment_parameters['degree_of_polynomial'], + seasonality_blocks=experiment_parameters['seasonality_blocks'], + seasonality_fc_layers=experiment_parameters['layers'], + seasonality_fc_layers_size=experiment_parameters[ + 'seasonality_fc_layers_size' + ], + num_of_harmonics=experiment_parameters['num_of_harmonics'], + scaling=experiment_parameters['scaling'], + mode=experiment_parameters['mode'], + ) model = to_device(model) - optimizer = optim.Adam(model.parameters(), - lr=experiment_parameters['learning_rate'], - weight_decay=0.0) + optimizer = optim.Adam( + model.parameters(), + lr=experiment_parameters['learning_rate'], + weight_decay=0.0, + ) lr_decay_step = experiment_parameters['iterations'] // 3 if lr_decay_step == 0: @@ -176,7 +210,9 @@ def run(path: str): if loss_name == 'MAPE': training_loss = mape_loss(forecast, y, y_mask) elif loss_name == 'MASE': - training_loss = mase_loss(x, training_subset.timeseries[0].period, forecast, y, y_mask) + training_loss = mase_loss( + x, training_subset.timeseries[0].period, forecast, y, y_mask + ) elif loss_name == 'SMAPE': training_loss = smape_2_loss(forecast, y, y_mask) else: @@ -190,13 +226,22 @@ def run(path: str): optimizer.step() for param_group in optimizer.param_groups: - param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** (i // lr_decay_step) - - snapshot_manager.register(iteration=i, - training_loss=float(training_loss), - validation_loss=np.nan, model=model, - optimizer=optimizer) - tl_models[model_name] = {'p_model': model, 'p_input_size': input_size, 'p_horizon': horizon} + param_group['lr'] = experiment_parameters['learning_rate'] * 0.5 ** ( + i // lr_decay_step + ) + + snapshot_manager.register( + iteration=i, + training_loss=float(training_loss), + validation_loss=np.nan, + model=model, + optimizer=optimizer, + ) + tl_models[model_name] = { + 'p_model': model, + 'p_input_size': input_size, + 'p_horizon': horizon, + } # # Predictions @@ -205,104 +250,207 @@ def run(path: str): def forecast(bundle: TimeseriesBundle, p_model, p_input_size, p_horizon): forecasts = [] input_set = np.array(bundle.values()) - input_set = UnivariateTimeseriesSampler(timeseries=input_set, - insample_size=p_input_size, - outsample_size=0, - window_sampling_limit=1, - batch_size=1) + input_set = UnivariateTimeseriesSampler( + timeseries=input_set, + insample_size=p_input_size, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + ) p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples()) p_model.eval() with t.no_grad(): forecasts.extend(p_model(p_x, p_x_mask).cpu().detach().numpy()) - forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)]) + forecasts_df = pd.DataFrame( + forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)] + ) forecasts_df.index = bundle.ids() forecasts_df.index.name = 'id' return forecasts_df - def rolling_daily_forecast(base_insample: TimeseriesBundle, rolling_insample: TimeseriesBundle, - p_model, p_input_size, p_horizon): + def rolling_daily_forecast( + base_insample: TimeseriesBundle, + rolling_insample: TimeseriesBundle, + p_model, + p_input_size, + p_horizon, + ): forecasts = [] base_insample_values = np.array(base_insample.values()) rolling_insample_values = np.array(rolling_insample.values()) for window_id in range(7): - insample = np.concatenate([base_insample_values, rolling_insample_values[:, :window_id * p_horizon]], - axis=1) - input_set = UnivariateTimeseriesSampler(timeseries=insample, - insample_size=p_input_size, - outsample_size=0, - window_sampling_limit=1, - batch_size=1) + insample = np.concatenate( + [ + base_insample_values, + rolling_insample_values[:, : window_id * p_horizon], + ], + axis=1, + ) + input_set = UnivariateTimeseriesSampler( + timeseries=insample, + insample_size=p_input_size, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + ) p_x, p_x_mask = map(to_tensor, input_set.sequential_latest_insamples()) p_model.eval() with t.no_grad(): window_forecast = p_model(p_x, p_x_mask).cpu().detach().numpy() - forecasts = window_forecast if len(forecasts) == 0 else np.concatenate([forecasts, window_forecast], - axis=1) - - forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)]) + forecasts = ( + window_forecast + if len(forecasts) == 0 + else np.concatenate([forecasts, window_forecast], axis=1) + ) + + forecasts_df = pd.DataFrame( + forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon * 7)] + ) forecasts_df.index = base_insample.ids() forecasts_df.index.name = 'id' - forecasts_df.columns = [f'V{i}' for i in range(1, len(forecasts_df.columns) + 1)] + forecasts_df.columns = [ + f'V{i}' for i in range(1, len(forecasts_df.columns) + 1) + ] return forecasts_df # M4 target_input, _ = M4Dataset(M4Meta.dataset_path).standard_split() - yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y6']) - quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8']) - monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M18']) - weekly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), **tl_models['W13']) - daily = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), **tl_models['D14']) - hourly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'), **tl_models['H48']) + yearly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), + **tl_models['Y6'], + ) + quarterly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), + **tl_models['Q8'], + ) + monthly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), + **tl_models['M18'], + ) + weekly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Weekly'), + **tl_models['W13'], + ) + daily = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Daily'), + **tl_models['D14'], + ) + hourly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Hourly'), + **tl_models['H48'], + ) pd.concat([yearly, quarterly, monthly, weekly, daily, hourly], sort=False).to_csv( - os.path.join(os.path.join(path, 'M4.csv'))) + os.path.join(os.path.join(path, 'M4.csv')) + ) # M3 target_input, _ = M3Dataset(M3Meta.dataset_path).standard_split() - yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'), **tl_models['Y6']) - quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'), **tl_models['Q8']) - monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'), **tl_models['M18']) - others = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'), **tl_models['Q8']) - pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(os.path.join(os.path.join(path, 'M3.csv'))) + yearly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Year'), + **tl_models['Y6'], + ) + quarterly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Quart'), + **tl_models['Q8'], + ) + monthly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Month'), + **tl_models['M18'], + ) + others = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'M3Other'), + **tl_models['Q8'], + ) + pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv( + os.path.join(os.path.join(path, 'M3.csv')) + ) # Tourism target_input, _ = TourismDataset(TourismMeta.dataset_path).standard_split() - yearly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), **tl_models['Y4']) - quarterly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), **tl_models['Q8']) - monthly = forecast(target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), **tl_models['M24']) - pd.concat([yearly, quarterly, monthly], sort=False).to_csv(os.path.join(os.path.join(path, 'tourism.csv'))) + yearly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Yearly'), + **tl_models['Y4'], + ) + quarterly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Quarterly'), + **tl_models['Q8'], + ) + monthly = forecast( + target_input.filter(lambda ts: ts.meta['seasonal_pattern'] == 'Monthly'), + **tl_models['M24'], + ) + pd.concat([yearly, quarterly, monthly], sort=False).to_csv( + os.path.join(os.path.join(path, 'tourism.csv')) + ) # Electricity - target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path). \ - load_cache().split(lambda ts: ts.split(-24 * 7)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv'))) - - target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv'))) - - target_input, rolling_target_input = ElectricityDataset(ElectricityMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv'))) + target_input, rolling_target_input = ( + ElectricityDataset(ElectricityMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split(-24 * 7)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'electricity_last_window.csv'))) + + target_input, rolling_target_input = ( + ElectricityDataset(ElectricityMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(ElectricityMeta.deepar_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'electricity_deepar.csv'))) + + target_input, rolling_target_input = ( + ElectricityDataset(ElectricityMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'electricity_deepfactors.csv'))) # Traffic - target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache().\ - split(lambda ts: ts.split(-24 * 7)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv'))) - - target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv'))) - - target_input, rolling_target_input = TrafficDataset(TrafficMeta.dataset_path).load_cache(). \ - split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split)) - rolling_daily_forecast(base_insample=target_input, rolling_insample=rolling_target_input, **tl_models['H24']). \ - to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv'))) + target_input, rolling_target_input = ( + TrafficDataset(TrafficMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split(-24 * 7)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'traffic_last_window.csv'))) + + target_input, rolling_target_input = ( + TrafficDataset(TrafficMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(TrafficMeta.deepar_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'traffic_deepar.csv'))) + + target_input, rolling_target_input = ( + TrafficDataset(TrafficMeta.dataset_path) + .load_cache() + .split(lambda ts: ts.split_by_time(TrafficMeta.deepfact_split)) + ) + rolling_daily_forecast( + base_insample=target_input, + rolling_insample=rolling_target_input, + **tl_models['H24'], + ).to_csv(os.path.join(os.path.join(path, 'traffic_deepfactors.csv'))) # FRED # target_input, _ = FredDataset(FredMeta.dataset_path).standard_split() diff --git a/benchmark/metalearned/main_ForecastPFN.py b/benchmark/metalearned/main_ForecastPFN.py index 0bd365e..ad51509 100644 --- a/benchmark/metalearned/main_ForecastPFN.py +++ b/benchmark/metalearned/main_ForecastPFN.py @@ -1,42 +1,40 @@ -from tqdm import tqdm import pathlib import sys + +from tqdm import tqdm + sys.path.append('..') import logging import os -import numpy as np import pandas as pd -import torch as t import tensorflow as tf -import tensorflow_io -from fire import Fire -from scipy.interpolate import interp1d -from torch import optim - -from common.experiment import create_experiment -from common.experiment import load_experiment_parameters -from data_provider.UnivariateTimeseriesSampler_WithStamps import UnivariateTimeseriesSampler_WithStamps -from exp.exp_ForecastPFN import Exp_ForecastPFN +from common.experiment import create_experiment, load_experiment_parameters +from common.metrics import smape from common.settings import experiment_path from common.timeseries import TimeseriesBundle -from common.torch_utils import SnapshotManager, to_device, to_tensor, mase_loss, mape_loss, smape_2_loss +from common.torch_utils import to_tensor from common.utils import get_module_path -from common.metrics import smape from experiments.tl.parameters import parameters -from models.nbeats_torch import nbeats_generic, nbeats_interpretable +from fire import Fire from resources.m3.dataset import M3Dataset, M3Meta -from resources.m4.dataset import M4Dataset, M4Meta from resources.tourism.dataset import TourismDataset, TourismMeta +from data_provider.UnivariateTimeseriesSampler_WithStamps import ( + UnivariateTimeseriesSampler_WithStamps, +) +from exp.exp_ForecastPFN import Exp_ForecastPFN + module_path = get_module_path() def init(name: str): - create_experiment(experiment_path=experiment_path(module_path, name), - parameters=parameters[name], - command=lambda path, params: f'python {module_path}/main.py run --path={path}') + create_experiment( + experiment_path=experiment_path(module_path, name), + parameters=parameters[name], + command=lambda path, params: f'python {module_path}/main.py run --path={path}', + ) def run(path: str): @@ -59,65 +57,79 @@ def run(path: str): input_size = experiment_parameters['lookback_period'] * horizon model = Exp_ForecastPFN(None) - tl_models[model_name] = {'p_model': model, 'p_input_size': input_size, 'p_horizon': horizon} + tl_models[model_name] = { + 'p_model': model, + 'p_input_size': input_size, + 'p_horizon': horizon, + } # # Predictions # - def forecast(in_bundle: TimeseriesBundle, out_bundle: TimeseriesBundle, - sp: str, - p_model, p_input_size, p_horizon): + def forecast( + in_bundle: TimeseriesBundle, + out_bundle: TimeseriesBundle, + sp: str, + p_model, + p_input_size, + p_horizon, + ): forecasts = [] - in_bundle = in_bundle.filter( - lambda ts: ts.meta['seasonal_pattern'] == sp) - out_bundle = out_bundle.filter( - lambda ts: ts.meta['seasonal_pattern'] == sp) + in_bundle = in_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) + out_bundle = out_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) input_set = in_bundle.values() input_timestamps = in_bundle.time_stamps() - input_set = UnivariateTimeseriesSampler_WithStamps(timeseries=input_set, - time_stamps=input_timestamps, - insample_size=p_input_size, - outsample_size=0, - window_sampling_limit=1, - batch_size=1, - time_features=p_model._ForecastPFN_time_features, - ) + input_set = UnivariateTimeseriesSampler_WithStamps( + timeseries=input_set, + time_stamps=input_timestamps, + insample_size=p_input_size, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + time_features=p_model._ForecastPFN_time_features, + ) p_x, p_x_mask, p_x_timestamps = input_set.sequential_latest_insamples() output_set = out_bundle.values() output_timestamps = out_bundle.time_stamps() - output_set = UnivariateTimeseriesSampler_WithStamps(timeseries=output_set, - time_stamps=output_timestamps, - insample_size=p_horizon, - outsample_size=0, - window_sampling_limit=1, - batch_size=1, - time_features=p_model._ForecastPFN_time_features, - ) + output_set = UnivariateTimeseriesSampler_WithStamps( + timeseries=output_set, + time_stamps=output_timestamps, + insample_size=p_horizon, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + time_features=p_model._ForecastPFN_time_features, + ) p_y, p_y_mask, p_y_timestamps = output_set.sequential_latest_insamples() x, x_mark, y, y_mark = p_x, p_x_timestamps, p_y, p_y_timestamps batch_x, batch_y = to_tensor(x)[:, :, None], to_tensor(y)[:, :, None] - batch_x_mark, batch_y_mark = to_tensor( - x_mark.astype(int)), to_tensor(y_mark.astype(int)) - - + batch_x_mark, batch_y_mark = ( + to_tensor(x_mark.astype(int)), + to_tensor(y_mark.astype(int)), + ) + model = tf.keras.models.load_model( - str(pathlib.Path(path).parent) + '/ckpts/', custom_objects={'smape': smape}) - for idx, (x, y, x_mark, y_mark) in tqdm(enumerate(zip(batch_x, batch_y, batch_x_mark, batch_y_mark))): + str(pathlib.Path(path).parent) + '/ckpts/', custom_objects={'smape': smape} + ) + for idx, (x, y, x_mark, y_mark) in tqdm( + enumerate(zip(batch_x, batch_y, batch_x_mark, batch_y_mark)) + ): pred = p_model._process_tuple(x, x_mark, y_mark, model, p_horizon) forecasts.extend(pred) - forecasts_df = pd.DataFrame(forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)]) + forecasts_df = pd.DataFrame( + forecasts, columns=[f'V{idx + 1}' for idx in range(p_horizon)] + ) forecasts_df.index = in_bundle.ids() forecasts_df.index.name = 'id' return forecasts_df - # M4 # target_input, target_output = M4Dataset( # M4Meta.dataset_path).standard_split() @@ -131,20 +143,26 @@ def forecast(in_bundle: TimeseriesBundle, out_bundle: TimeseriesBundle, # os.path.join(os.path.join(path, 'M4.csv'))) # M3 - target_input, target_output = M3Dataset( - M3Meta.dataset_path).standard_split() + target_input, target_output = M3Dataset(M3Meta.dataset_path).standard_split() yearly = forecast(target_input, target_output, 'M3Year', **tl_models['Y6']) quarterly = forecast(target_input, target_output, 'M3Quart', **tl_models['Q8']) monthly = forecast(target_input, target_output, 'M3Month', **tl_models['M18']) others = forecast(target_input, target_output, 'M3Other', **tl_models['Q8']) - pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv(os.path.join(os.path.join(path, 'M3.csv'))) + pd.concat([yearly, quarterly, monthly, others], sort=False).to_csv( + os.path.join(os.path.join(path, 'M3.csv')) + ) # Tourism - target_input, target_output = TourismDataset(TourismMeta.dataset_path).standard_split() + target_input, target_output = TourismDataset( + TourismMeta.dataset_path + ).standard_split() yearly = forecast(target_input, target_output, 'Yearly', **tl_models['Y4']) quarterly = forecast(target_input, target_output, 'Quarterly', **tl_models['Q8']) monthly = forecast(target_input, target_output, 'Monthly', **tl_models['M24']) - pd.concat([yearly, quarterly, monthly], sort=False).to_csv(os.path.join(os.path.join(path, 'tourism.csv'))) + pd.concat([yearly, quarterly, monthly], sort=False).to_csv( + os.path.join(os.path.join(path, 'tourism.csv')) + ) + def evaluate(name: str, summary_filter: str, validation_mode: bool = False): pass diff --git a/benchmark/metalearned/models/nbeats_torch.py b/benchmark/metalearned/models/nbeats_torch.py index 419b969..9569665 100644 --- a/benchmark/metalearned/models/nbeats_torch.py +++ b/benchmark/metalearned/models/nbeats_torch.py @@ -1,21 +1,21 @@ +from itertools import chain from typing import Tuple import numpy as np import torch as t -from itertools import chain - from common.torch_utils import div_no_nan class NBeatsFC(t.nn.Module): - def __init__(self, - input_size: int, - fc_layers: int, - output_size: int): + def __init__(self, input_size: int, fc_layers: int, output_size: int): super().__init__() - self.fc_layers = t.nn.ModuleList([t.nn.Linear(in_features=input_size, out_features=output_size)] + - [t.nn.Linear(in_features=output_size, out_features=output_size) - for _ in range(fc_layers - 1)]) + self.fc_layers = t.nn.ModuleList( + [t.nn.Linear(in_features=input_size, out_features=output_size)] + + [ + t.nn.Linear(in_features=output_size, out_features=output_size) + for _ in range(fc_layers - 1) + ] + ) def forward(self, x: t.Tensor) -> t.Tensor: output = x @@ -25,16 +25,16 @@ def forward(self, x: t.Tensor) -> t.Tensor: class NBeatsGenericBlock(t.nn.Module): - def __init__(self, - input_size: int, - fc_layers: int, - fc_layers_size: int, - output_size: int): + def __init__( + self, input_size: int, fc_layers: int, fc_layers_size: int, output_size: int + ): super().__init__() - self.fc = NBeatsFC(input_size=input_size, - fc_layers=fc_layers, - output_size=fc_layers_size) - self.basis = t.nn.Linear(in_features=fc_layers_size, out_features=input_size + output_size) + self.fc = NBeatsFC( + input_size=input_size, fc_layers=fc_layers, output_size=fc_layers_size + ) + self.basis = t.nn.Linear( + in_features=fc_layers_size, out_features=input_size + output_size + ) self.output_size = output_size self.backcast_dump = None @@ -51,55 +51,95 @@ def forward(self, x: t.Tensor) -> Tuple[t.Tensor, t.Tensor]: class NBeatsTrendBlock(t.nn.Module): - def __init__(self, - input_size: int, - fc_layers: int, - fc_layers_size: int, - degree_of_polynomial: int, - output_size: int): + def __init__( + self, + input_size: int, + fc_layers: int, + fc_layers_size: int, + degree_of_polynomial: int, + output_size: int, + ): super().__init__() - self.polynomial_size = degree_of_polynomial + 1 # degree of polynomial with constant term - self.fc = NBeatsFC(input_size=input_size, - fc_layers=fc_layers, - output_size=fc_layers_size) - self.basis = t.nn.Linear(in_features=fc_layers_size, out_features=2 * self.polynomial_size) + self.polynomial_size = ( + degree_of_polynomial + 1 + ) # degree of polynomial with constant term + self.fc = NBeatsFC( + input_size=input_size, fc_layers=fc_layers, output_size=fc_layers_size + ) + self.basis = t.nn.Linear( + in_features=fc_layers_size, out_features=2 * self.polynomial_size + ) self.output_size = output_size - self.backcast_time = np.concatenate([np.power(np.arange(input_size, dtype=np.float) / input_size, i)[None, :] - for i in range(self.polynomial_size)]) - self.forecast_time = np.concatenate([np.power(np.arange(output_size, dtype=np.float) / output_size, i)[None, :] - for i in range(self.polynomial_size)]) + self.backcast_time = np.concatenate( + [ + np.power(np.arange(input_size, dtype=np.float) / input_size, i)[None, :] + for i in range(self.polynomial_size) + ] + ) + self.forecast_time = np.concatenate( + [ + np.power(np.arange(output_size, dtype=np.float) / output_size, i)[ + None, : + ] + for i in range(self.polynomial_size) + ] + ) self.backcast_dump = None self.forecast_dump = None def forward(self, x: t.Tensor): thetas = self.basis(self.fc(x)) - backcast = t.einsum('bp,pt->bt', thetas[:, self.polynomial_size:], x.new(self.backcast_time)) - forecast = t.einsum('bp,pt->bt', thetas[:, :self.polynomial_size], x.new(self.forecast_time)) + backcast = t.einsum( + 'bp,pt->bt', thetas[:, self.polynomial_size :], x.new(self.backcast_time) + ) + forecast = t.einsum( + 'bp,pt->bt', thetas[:, : self.polynomial_size], x.new(self.forecast_time) + ) self.backcast_dump = backcast self.forecast_dump = forecast return backcast, forecast class NBeatsSeasonalityBlock(t.nn.Module): - def __init__(self, - input_size: int, - fc_layers: int, - fc_layers_size: int, - num_of_harmonics: int, - output_size: int): + def __init__( + self, + input_size: int, + fc_layers: int, + fc_layers_size: int, + num_of_harmonics: int, + output_size: int, + ): super().__init__() - self.basis_parameters = int(np.ceil(num_of_harmonics / 2 * output_size) - (num_of_harmonics - 1)) - - self.fc = NBeatsFC(input_size=input_size, - fc_layers=fc_layers, - output_size=fc_layers_size) - self.basis = t.nn.Linear(in_features=fc_layers_size, out_features=4 * self.basis_parameters) - - frequency = np.append(np.zeros(1, dtype=np.float32), - np.arange(num_of_harmonics, num_of_harmonics / 2 * output_size, - dtype=np.float32) / num_of_harmonics)[None, :] - backcast_grid = -2 * np.pi * (np.arange(input_size, dtype=np.float32)[:, None] / output_size) * frequency - forecast_grid = 2 * np.pi * (np.arange(output_size, dtype=np.float32)[:, None] / output_size) * frequency + self.basis_parameters = int( + np.ceil(num_of_harmonics / 2 * output_size) - (num_of_harmonics - 1) + ) + + self.fc = NBeatsFC( + input_size=input_size, fc_layers=fc_layers, output_size=fc_layers_size + ) + self.basis = t.nn.Linear( + in_features=fc_layers_size, out_features=4 * self.basis_parameters + ) + + frequency = np.append( + np.zeros(1, dtype=np.float32), + np.arange( + num_of_harmonics, num_of_harmonics / 2 * output_size, dtype=np.float32 + ) + / num_of_harmonics, + )[None, :] + backcast_grid = ( + -2 + * np.pi + * (np.arange(input_size, dtype=np.float32)[:, None] / output_size) + * frequency + ) + forecast_grid = ( + 2 + * np.pi + * (np.arange(output_size, dtype=np.float32)[:, None] / output_size) + * frequency + ) self.backcast_cos_template = np.transpose(np.cos(backcast_grid)) self.backcast_sin_template = np.transpose(np.sin(backcast_grid)) self.forecast_cos_template = np.transpose(np.cos(forecast_grid)) @@ -111,20 +151,28 @@ def __init__(self, def forward(self, x: t.Tensor): harmonics_weights = self.basis(self.fc(x)) - backcast_harmonics_cos = t.einsum('bp,pt->bt', - harmonics_weights[:, 2 * self.basis_parameters:3 * self.basis_parameters], - x.new(self.backcast_cos_template)) - backcast_harmonics_sin = t.einsum('bp,pt->bt', - harmonics_weights[:, 3 * self.basis_parameters:], - x.new(self.backcast_sin_template)) + backcast_harmonics_cos = t.einsum( + 'bp,pt->bt', + harmonics_weights[:, 2 * self.basis_parameters : 3 * self.basis_parameters], + x.new(self.backcast_cos_template), + ) + backcast_harmonics_sin = t.einsum( + 'bp,pt->bt', + harmonics_weights[:, 3 * self.basis_parameters :], + x.new(self.backcast_sin_template), + ) backcast = backcast_harmonics_sin + backcast_harmonics_cos - forecast_harmonics_cos = t.einsum('bp,pt->bt', - harmonics_weights[:, :self.basis_parameters], - x.new(self.forecast_cos_template)) - forecast_harmonics_sin = t.einsum('bp,pt->bt', - harmonics_weights[:, self.basis_parameters:2 * self.basis_parameters], - x.new(self.forecast_sin_template)) + forecast_harmonics_cos = t.einsum( + 'bp,pt->bt', + harmonics_weights[:, : self.basis_parameters], + x.new(self.forecast_cos_template), + ) + forecast_harmonics_sin = t.einsum( + 'bp,pt->bt', + harmonics_weights[:, self.basis_parameters : 2 * self.basis_parameters], + x.new(self.forecast_sin_template), + ) forecast = forecast_harmonics_sin + forecast_harmonics_cos self.backcast_dump = backcast @@ -189,41 +237,67 @@ def forward(self, x: t.Tensor, input_mask: t.Tensor) -> t.Tensor: return forecast -def nbeats_generic(input_size: int, output_size: int, - blocks: int = 1, stacks: int = 30, - fc_layers: int = 4, fc_layers_size: int = 512, - scaling: str = None, mode: str = 'dress'): - modules = [[NBeatsGenericBlock(input_size=input_size, - fc_layers=fc_layers, - fc_layers_size=fc_layers_size, - output_size=output_size)] * blocks for _ in range(stacks)] - - return NBeats(t.nn.ModuleList(list(chain.from_iterable(modules))), - scaling=scaling, - mode=mode) - - -def nbeats_interpretable(input_size: int, output_size: int, - trend_blocks: int = 3, - trend_fc_layers: int = 4, - trend_fc_layers_size: int = 256, - degree_of_polynomial: int = 3, - seasonality_blocks: int = 3, - seasonality_fc_layers: int = 4, - seasonality_fc_layers_size: int = 2048, - num_of_harmonics: int = 1, - scaling: str = None, - mode: str = 'dress'): - trend_block = NBeatsTrendBlock(input_size=input_size, - fc_layers=trend_fc_layers, - fc_layers_size=trend_fc_layers_size, - degree_of_polynomial=degree_of_polynomial, - output_size=output_size) - seasonality_block = NBeatsSeasonalityBlock(input_size=input_size, - fc_layers=seasonality_fc_layers, - fc_layers_size=seasonality_fc_layers_size, - num_of_harmonics=num_of_harmonics, - output_size=output_size) - return NBeats(t.nn.ModuleList( - [trend_block for _ in range(trend_blocks)] + [seasonality_block for _ in range(seasonality_blocks)]), - scaling=scaling, mode=mode) +def nbeats_generic( + input_size: int, + output_size: int, + blocks: int = 1, + stacks: int = 30, + fc_layers: int = 4, + fc_layers_size: int = 512, + scaling: str = None, + mode: str = 'dress', +): + modules = [ + [ + NBeatsGenericBlock( + input_size=input_size, + fc_layers=fc_layers, + fc_layers_size=fc_layers_size, + output_size=output_size, + ) + ] + * blocks + for _ in range(stacks) + ] + + return NBeats( + t.nn.ModuleList(list(chain.from_iterable(modules))), scaling=scaling, mode=mode + ) + + +def nbeats_interpretable( + input_size: int, + output_size: int, + trend_blocks: int = 3, + trend_fc_layers: int = 4, + trend_fc_layers_size: int = 256, + degree_of_polynomial: int = 3, + seasonality_blocks: int = 3, + seasonality_fc_layers: int = 4, + seasonality_fc_layers_size: int = 2048, + num_of_harmonics: int = 1, + scaling: str = None, + mode: str = 'dress', +): + trend_block = NBeatsTrendBlock( + input_size=input_size, + fc_layers=trend_fc_layers, + fc_layers_size=trend_fc_layers_size, + degree_of_polynomial=degree_of_polynomial, + output_size=output_size, + ) + seasonality_block = NBeatsSeasonalityBlock( + input_size=input_size, + fc_layers=seasonality_fc_layers, + fc_layers_size=seasonality_fc_layers_size, + num_of_harmonics=num_of_harmonics, + output_size=output_size, + ) + return NBeats( + t.nn.ModuleList( + [trend_block for _ in range(trend_blocks)] + + [seasonality_block for _ in range(seasonality_blocks)] + ), + scaling=scaling, + mode=mode, + ) diff --git a/benchmark/metalearned/resources/electricity/dataset.py b/benchmark/metalearned/resources/electricity/dataset.py index f6b5282..7336021 100644 --- a/benchmark/metalearned/resources/electricity/dataset.py +++ b/benchmark/metalearned/resources/electricity/dataset.py @@ -5,11 +5,10 @@ import numpy as np import patoolib -from tqdm import tqdm - from common.settings import RESOURCES_DIR -from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Hour +from common.timeseries import Hour, Timeseries, TimeseriesBundle, TimeseriesLoader from common.utils import download_url +from tqdm import tqdm """ Hourly aggregated dataset from https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014 @@ -39,20 +38,29 @@ class ElectricityDataset(TimeseriesLoader): def download(self) -> TimeseriesBundle: archive_file = os.path.join(self.path, 'dataset.zip') raw_file = os.path.join(self.path, 'LD2011_2014.txt') - download_url('https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip', - archive_file) + download_url( + 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip', + archive_file, + ) patoolib.extract_archive(archive_file, outdir=self.path) with open(raw_file, 'r') as f: raw = f.readlines() - parsed_values = np.array(list(map( - lambda raw_line: np.array(raw_line.replace(',', '.').strip().split(';')[1:]).astype(np.float), tqdm(raw[1:]) - ))) + parsed_values = np.array( + list( + map( + lambda raw_line: np.array( + raw_line.replace(',', '.').strip().split(';')[1:] + ).astype(np.float), + tqdm(raw[1:]), + ) + ) + ) aggregated = [] for i in tqdm(range(0, parsed_values.shape[0], 4)): - aggregated.append(parsed_values[i:i + 4, :].sum(axis=0)) + aggregated.append(parsed_values[i : i + 4, :].sum(axis=0)) aggregated = np.array(aggregated) # regarding time labels, in dataset description authors specify @@ -62,19 +70,25 @@ def download(self) -> TimeseriesBundle: # neither for "2012-03-25 01:45:00", thus it's not clear how to deal with daylight saving time change in this # dataset. Taking into account this uncertainty the starting date is treated as UTC (without time changes). - start_date = datetime(2011, 1, 1, 1, 0, 0) # aggregated towards next hour instead of current hour. + start_date = datetime( + 2011, 1, 1, 1, 0, 0 + ) # aggregated towards next hour instead of current hour. dataset = aggregated.T # use time step as second dimension. timeseries = [] for i, values in enumerate(dataset): - timeseries.append(Timeseries(id=str(i), - start_date=start_date, - time_unit=Hour(), - frequency=1, - period=ElectricityMeta.period, - values=values, - meta={})) + timeseries.append( + Timeseries( + id=str(i), + start_date=start_date, + time_unit=Hour(), + frequency=1, + period=ElectricityMeta.period, + values=values, + meta={}, + ) + ) return TimeseriesBundle(timeseries) def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]: diff --git a/benchmark/metalearned/resources/electricity/evaluator.py b/benchmark/metalearned/resources/electricity/evaluator.py index 99e53c9..8f3561d 100644 --- a/benchmark/metalearned/resources/electricity/evaluator.py +++ b/benchmark/metalearned/resources/electricity/evaluator.py @@ -2,8 +2,7 @@ from typing import Callable import numpy as np - -from common.evaluator import Evaluator, EvaluationResult +from common.evaluator import EvaluationResult, Evaluator from common.metrics import nd from common.timeseries import TimeseriesBundle from common.utils import round_half_up @@ -15,6 +14,11 @@ class ElectricityEvaluator(Evaluator): precision: int = 2 def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult: - return {'metric': round_half_up(self.metric_fn(np.array(forecasts.values()), - np.array(self.test_set.values())), - self.precision)} + return { + 'metric': round_half_up( + self.metric_fn( + np.array(forecasts.values()), np.array(self.test_set.values()) + ), + self.precision, + ) + } diff --git a/benchmark/metalearned/resources/fred/api.py b/benchmark/metalearned/resources/fred/api.py index d811291..cc654d0 100644 --- a/benchmark/metalearned/resources/fred/api.py +++ b/benchmark/metalearned/resources/fred/api.py @@ -30,8 +30,10 @@ def __init__(self, dataset_path): self.dataset_path = dataset_path if not os.path.isfile(key_path): - raise Exception(f'Cannot find FRED key file. Create an API key and place it in {key_path}. ' - 'https://research.stlouisfed.org/docs/api/api_key.html') + raise Exception( + f'Cannot find FRED key file. Create an API key and place it in {key_path}. ' + 'https://research.stlouisfed.org/docs/api/api_key.html' + ) with open(key_path, 'r') as f: key = f.readline().strip() @@ -42,7 +44,7 @@ def call(self, api_fn: Callable[[Fred], A], attempt=1) -> A: raise Exception('Maximum retries exceeded') try: return api_fn(self.api) - except Exception as e: + except Exception: # logging.info(f'API Error: {str(e)}. Waiting {self.wait_delay} seconds to retry. Attempt: {attempt}') time.sleep(self.wait_delay) return self.call(api_fn=api_fn, attempt=attempt + 1) @@ -57,14 +59,18 @@ def fetch_categories(self, parent=0): def fetch_observation(self, timeseries_id: str): try: - values = self.api.series.observations(timeseries_id, params={'output_type': 1, - 'realtime_start': '1776-07-04'}) + values = self.api.series.observations( + timeseries_id, params={'output_type': 1, 'realtime_start': '1776-07-04'} + ) values = values.groupby('date').head(1) values = values.set_index('date')['value'] except Exception as e: - if 'The series does not exist in ALFRED but may exist in FRED' in str(e) \ - or 'this exceeds the maximum number of vintage dates allowed' in str(e).lower() \ - or 'bad request' in str(e).lower(): + if ( + 'The series does not exist in ALFRED but may exist in FRED' in str(e) + or 'this exceeds the maximum number of vintage dates allowed' + in str(e).lower() + or 'bad request' in str(e).lower() + ): # There are a couple of situations where ALFRED (vintage data) # would not work properly values = self.api.series.observations(timeseries_id) @@ -88,7 +94,7 @@ def fetch_all(self): categories = pickle.load(f) logging.info(f'Loaded {len(categories)} categories') else: - logging.info(f'Fetching categories') + logging.info('Fetching categories') categories = self.fetch_categories() logging.info(f'Fetched {len(categories)} categories') with open(categories_cache_path, 'wb') as f: @@ -97,7 +103,7 @@ def fetch_all(self): # # Fetch timeseries # - logging.info(f'Fetching timeseries') + logging.info('Fetching timeseries') dataset_file_path = os.path.join(self.dataset_path, 'dataset.pickle') dataset = {'processed_categories': [], 'data': {}} @@ -105,38 +111,46 @@ def fetch_all(self): with open(dataset_file_path, 'rb') as cache_file_name: dataset = pickle.load(cache_file_name) - categories_to_process = [c for c in categories if c not in dataset['processed_categories']] + categories_to_process = [ + c for c in categories if c not in dataset['processed_categories'] + ] limit = 1000 for category_id in tqdm(categories_to_process): offset = 0 while True: - timeseries_meta = self.call(lambda api: api.category.series(category_id, params={'limit': limit, - 'offset': offset})) + timeseries_meta = self.call( + lambda api: api.category.series( + category_id, params={'limit': limit, 'offset': offset} + ) + ) if len(timeseries_meta) == 0: break for _, ts_meta in timeseries_meta.iterrows(): ts_id = str(ts_meta.id) - start_date = datetime.datetime.strptime(str(ts_meta.observation_start), '%Y-%m-%d %H:%M:%S') + start_date = datetime.datetime.strptime( + str(ts_meta.observation_start), '%Y-%m-%d %H:%M:%S' + ) time_unit = str(ts_meta.frequency) if ts_id not in dataset['data']: dataset['data'][ts_id] = { 'start_date': start_date, 'time_unit': time_unit, - 'meta': { - 'categories': [category_id] - }, - 'values': self.call(lambda api: self.fetch_observation(ts_id)) + 'meta': {'categories': [category_id]}, + 'values': self.call( + lambda api: self.fetch_observation(ts_id) + ), } else: dataset['data'][ts_id]['meta']['categories'].append(category_id) offset += 1 dataset['processed_categories'].append(category_id) - temp_file = tempfile.NamedTemporaryFile(dir=self.dataset_path, delete=False, mode='wb') + temp_file = tempfile.NamedTemporaryFile( + dir=self.dataset_path, delete=False, mode='wb' + ) pickle.dump(dataset, temp_file, protocol=pickle.HIGHEST_PROTOCOL) temp_file.flush() os.fsync(temp_file.fileno()) os.rename(temp_file.name, dataset_file_path) - diff --git a/benchmark/metalearned/resources/fred/dataset.py b/benchmark/metalearned/resources/fred/dataset.py index f419063..1fbdda6 100644 --- a/benchmark/metalearned/resources/fred/dataset.py +++ b/benchmark/metalearned/resources/fred/dataset.py @@ -4,11 +4,17 @@ from dataclasses import dataclass from typing import Tuple -from tqdm import tqdm - from common.settings import RESOURCES_DIR -from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month, Day +from common.timeseries import ( + Day, + Month, + Timeseries, + TimeseriesBundle, + TimeseriesLoader, + Year, +) from resources.fred.api import FredAPI +from tqdm import tqdm @dataclass(frozen=True) @@ -36,7 +42,7 @@ def download(self) -> TimeseriesBundle: 'Quarterly': (Month(), 3), 'Monthly': (Month(), 1), 'Weekly': (Day(), 7), - 'Daily': (Day(), 1) + 'Daily': (Day(), 1), } period_map = FredMeta().period_map() @@ -44,26 +50,33 @@ def download(self) -> TimeseriesBundle: timeseries = [] for ts_id, record in tqdm(raw_data.items()): sp = record['time_unit'] - frequency = [frequency_map[s] for s in frequency_map.keys() if sp.startswith(s)] + frequency = [ + frequency_map[s] for s in frequency_map.keys() if sp.startswith(s) + ] period = [period_map[s] for s in period_map.keys() if sp.startswith(s)] if len(frequency) > 0: frequency = frequency[0] else: - raise Exception(f"Cannot match frequency for: {sp}") + raise Exception(f'Cannot match frequency for: {sp}') if len(period) > 0: period = period[0] else: - raise Exception(f"Cannot match frequency for: {sp}") - timeseries.append(Timeseries(id=ts_id, - start_date=record['start_date'], - time_unit=frequency[0], - frequency=frequency[1], - period=period, - values=record['values'], - meta={'seasonal_pattern': sp} - )) - grouped_timeseries = [list(filter(lambda ts: ts.meta['seasonal_pattern'] == sp, timeseries)) - for sp in FredMeta.seasonal_patterns] + raise Exception(f'Cannot match frequency for: {sp}') + timeseries.append( + Timeseries( + id=ts_id, + start_date=record['start_date'], + time_unit=frequency[0], + frequency=frequency[1], + period=period, + values=record['values'], + meta={'seasonal_pattern': sp}, + ) + ) + grouped_timeseries = [ + list(filter(lambda ts: ts.meta['seasonal_pattern'] == sp, timeseries)) + for sp in FredMeta.seasonal_patterns + ] grouped_timeseries = [ts for sp_ts in grouped_timeseries for ts in sp_ts] return TimeseriesBundle(grouped_timeseries) @@ -71,7 +84,9 @@ def download(self) -> TimeseriesBundle: def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]: bundle = self.load_cache() horizons_map = FredMeta().horizons_map() - return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])) + return bundle.split( + lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]) + ) if __name__ == '__main__': diff --git a/benchmark/metalearned/resources/fred/evaluator.py b/benchmark/metalearned/resources/fred/evaluator.py index 7942721..69a48af 100644 --- a/benchmark/metalearned/resources/fred/evaluator.py +++ b/benchmark/metalearned/resources/fred/evaluator.py @@ -1,10 +1,8 @@ from collections import OrderedDict -from collections import OrderedDict from dataclasses import dataclass import numpy as np - -from common.evaluator import Evaluator, EvaluationResult +from common.evaluator import EvaluationResult, Evaluator from common.metrics import smape_2 from common.timeseries import TimeseriesBundle from common.utils import round_half_up @@ -19,11 +17,19 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult: insamples, _ = FredDataset(FredMeta.dataset_path).standard_split() if self.validation: horizons_map = FredMeta().horizons_map() - insamples, _ = insamples.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])) + insamples, _ = insamples.split( + lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]) + ) - grouped_smapes = {sp: np.mean(smape_2(forecast=np.array(self.filter_by_sp(forecast, sp).values()), - target=np.array(self.filter_by_sp(self.test_set, sp).values()))) - for sp in FredMeta.seasonal_patterns} + grouped_smapes = { + sp: np.mean( + smape_2( + forecast=np.array(self.filter_by_sp(forecast, sp).values()), + target=np.array(self.filter_by_sp(self.test_set, sp).values()), + ) + ) + for sp in FredMeta.seasonal_patterns + } grouped_smapes = self.summarize_groups(grouped_smapes) @@ -34,7 +40,9 @@ def summarize_groups(self, scores): weighted_score = {} for sp in ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily']: - weighted_score[sp] = scores[sp] * len(self.filter_by_sp(self.test_set, sp).timeseries) + weighted_score[sp] = scores[sp] * len( + self.filter_by_sp(self.test_set, sp).timeseries + ) scores_summary[sp] = scores[sp] average = np.sum(list(weighted_score.values())) / len(self.test_set.timeseries) @@ -43,7 +51,9 @@ def summarize_groups(self, scores): return scores_summary @staticmethod - def filter_by_sp(bundle: TimeseriesBundle, seasonal_pattern: str) -> TimeseriesBundle: + def filter_by_sp( + bundle: TimeseriesBundle, seasonal_pattern: str + ) -> TimeseriesBundle: return bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == seasonal_pattern) @staticmethod diff --git a/benchmark/metalearned/resources/m3/dataset.py b/benchmark/metalearned/resources/m3/dataset.py index 0904d3c..24e104c 100644 --- a/benchmark/metalearned/resources/m3/dataset.py +++ b/benchmark/metalearned/resources/m3/dataset.py @@ -5,11 +5,17 @@ import numpy as np import pandas as pd -from tqdm import tqdm - from common.settings import RESOURCES_DIR -from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Unknown, Year, Month +from common.timeseries import ( + Month, + Timeseries, + TimeseriesBundle, + TimeseriesLoader, + Unknown, + Year, +) from common.utils import download_url +from tqdm import tqdm @dataclass(frozen=True) @@ -19,9 +25,30 @@ class M3Meta: seasonal_patterns = ['M3Year', 'M3Quart', 'M3Month', 'M3Other'] horizons = [6, 8, 18, 8] frequency = [1, 4, 12, 1] - models = ['NAIVE2', 'SINGLE', 'HOLT', 'DAMPEN', 'WINTER', 'COMB S-H-D', 'B-J auto', 'AutoBox1', 'AutoBox2', - 'AutoBox3', 'ROBUST-Trend', 'ARARMA', 'Auto-ANN', 'Flors-Pearc1', 'Flors-Pearc2', 'PP-Autocast', - 'ForecastPro', 'SMARTFCS', 'THETAsm', 'THETA', 'RBF', 'ForcX'] + models = [ + 'NAIVE2', + 'SINGLE', + 'HOLT', + 'DAMPEN', + 'WINTER', + 'COMB S-H-D', + 'B-J auto', + 'AutoBox1', + 'AutoBox2', + 'AutoBox3', + 'ROBUST-Trend', + 'ARARMA', + 'Auto-ANN', + 'Flors-Pearc1', + 'Flors-Pearc2', + 'PP-Autocast', + 'ForecastPro', + 'SMARTFCS', + 'THETAsm', + 'THETA', + 'RBF', + 'ForcX', + ] def horizons_map(self): return dict(zip(self.seasonal_patterns, self.horizons)) @@ -67,37 +94,48 @@ def download(self) -> TimeseriesBundle: time_unit = Unknown() pass - timeseries.append(Timeseries(id=str(row['Series']), - start_date=starting_date, - time_unit=time_unit, - frequency=frequency, - period=1, - values=row.T[6:row.N + 6].values.astype(np.float32), - meta={'seasonal_pattern': sp} - )) + timeseries.append( + Timeseries( + id=str(row['Series']), + start_date=starting_date, + time_unit=time_unit, + frequency=frequency, + period=1, + values=row.T[6 : row.N + 6].values.astype(np.float32), + meta={'seasonal_pattern': sp}, + ) + ) return TimeseriesBundle(timeseries) def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]: bundle = self.load_cache() horizons_map = M3Meta().horizons_map() - return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])) + return bundle.split( + lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]) + ) class M3Forecasts(TimeseriesLoader): def download(self) -> TimeseriesBundle: raw_file_path = os.path.join(M3Meta.forecasts_path, 'M3Forecast.xls') - download_url('https://forecasters.org/data/m3comp/M3Forecast.xls', raw_file_path) + download_url( + 'https://forecasters.org/data/m3comp/M3Forecast.xls', raw_file_path + ) original_timeseries = M3Dataset(M3Meta().dataset_path).load_cache() horizon_mapping = M3Meta().horizons_map() - training_set, _ = original_timeseries.split(lambda t: t.split(-horizon_mapping[t.meta['seasonal_pattern']])) + training_set, _ = original_timeseries.split( + lambda t: t.split(-horizon_mapping[t.meta['seasonal_pattern']]) + ) training_timeseries = training_set.timeseries models_forecasts = [] for model_name in tqdm(M3Meta.models): forecast = pd.read_excel(raw_file_path, sheet_name=model_name, header=None) for i, row in forecast.iterrows(): - ts = training_timeseries[i].future_values(row.T[2:row[1] + 2].values.astype(np.float32)) + ts = training_timeseries[i].future_values( + row.T[2 : row[1] + 2].values.astype(np.float32) + ) ts.meta = {**ts.meta, 'model': model_name} models_forecasts.append(ts) return TimeseriesBundle(models_forecasts) diff --git a/benchmark/metalearned/resources/m3/evaluator.py b/benchmark/metalearned/resources/m3/evaluator.py index 00cf895..f9d8243 100644 --- a/benchmark/metalearned/resources/m3/evaluator.py +++ b/benchmark/metalearned/resources/m3/evaluator.py @@ -2,8 +2,7 @@ from dataclasses import dataclass import numpy as np - -from common.evaluator import Evaluator, EvaluationResult +from common.evaluator import EvaluationResult, Evaluator from common.metrics import smape_1, smape_2 from common.timeseries import TimeseriesBundle from resources.m3.dataset import M3Meta @@ -22,7 +21,9 @@ def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult: evaluation_function = smape_1 if self.smape_1 else smape_2 for sp in M3Meta.seasonal_patterns: - target_sp = self.test_set.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) + target_sp = self.test_set.filter( + lambda ts: ts.meta['seasonal_pattern'] == sp + ) forecast_sp = forecasts.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) target, forecast = target_sp.intersection_by_id(forecast_sp) diff --git a/benchmark/metalearned/resources/m3/metrics.py b/benchmark/metalearned/resources/m3/metrics.py index edc7578..0419a62 100644 --- a/benchmark/metalearned/resources/m3/metrics.py +++ b/benchmark/metalearned/resources/m3/metrics.py @@ -32,7 +32,9 @@ def smape_m3_dataset_horizon(target_dataset, forecast_dataset, horizon): i = 0 for prediction, target in zip(forecast_dataset.values, target_dataset.values): if target_dataset.horizons[i] >= horizon: - smape_cum += smape_m3(prediction[horizon-1], target[-len(prediction)+horizon-1]).sum() + smape_cum += smape_m3( + prediction[horizon - 1], target[-len(prediction) + horizon - 1] + ).sum() smape_n_points += 1 i += 1 @@ -57,9 +59,12 @@ def smape_m3_dataset_horizon_avg(target_dataset, forecast_dataset, horizon): for prediction, target in zip(forecast_dataset.values, target_dataset.values): horizon_clamped = min(target_dataset.horizons[i], horizon) if horizon_clamped == target_dataset.horizons[i]: - target_clamped = target[-target_dataset.horizons[i]:] + target_clamped = target[-target_dataset.horizons[i] :] else: - target_clamped = target[-target_dataset.horizons[i]:-target_dataset.horizons[i]+horizon_clamped] + target_clamped = target[ + -target_dataset.horizons[i] : -target_dataset.horizons[i] + + horizon_clamped + ] smape_cum += smape_m3(prediction[:horizon_clamped], target_clamped).sum() smape_n_points += len(target_clamped) i += 1 @@ -107,7 +112,9 @@ def smape_m3_dataset_horizon(target_dataset, forecast_dataset, horizon): i = 0 for prediction, target in zip(forecast_dataset.values, target_dataset.values): if target_dataset.horizons[i] >= horizon: - smape_cum += smape_m3(prediction[horizon-1], target[-len(prediction)+horizon-1]).sum() + smape_cum += smape_m3( + prediction[horizon - 1], target[-len(prediction) + horizon - 1] + ).sum() smape_n_points += 1 i += 1 @@ -132,12 +139,14 @@ def smape_m3_dataset_horizon_avg(target_dataset, forecast_dataset, horizon): for prediction, target in zip(forecast_dataset.values, target_dataset.values): horizon_clamped = min(target_dataset.horizons[i], horizon) if horizon_clamped == target_dataset.horizons[i]: - target_clamped = target[-target_dataset.horizons[i]:] + target_clamped = target[-target_dataset.horizons[i] :] else: - target_clamped = target[-target_dataset.horizons[i]:-target_dataset.horizons[i]+horizon_clamped] + target_clamped = target[ + -target_dataset.horizons[i] : -target_dataset.horizons[i] + + horizon_clamped + ] smape_cum += smape_m3(prediction[:horizon_clamped], target_clamped).sum() smape_n_points += len(target_clamped) i += 1 return smape_cum / smape_n_points - diff --git a/benchmark/metalearned/resources/m4/dataset.py b/benchmark/metalearned/resources/m4/dataset.py index a919b1d..199e82e 100644 --- a/benchmark/metalearned/resources/m4/dataset.py +++ b/benchmark/metalearned/resources/m4/dataset.py @@ -7,11 +7,18 @@ import numpy as np import pandas as pd import patoolib -from tqdm import tqdm - from common.settings import RESOURCES_DIR -from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month, Day, Hour +from common.timeseries import ( + Day, + Hour, + Month, + Timeseries, + TimeseriesBundle, + TimeseriesLoader, + Year, +) from common.utils import download_url +from tqdm import tqdm @dataclass(frozen=True) @@ -31,29 +38,37 @@ def period_map(self): class M4Dataset(TimeseriesLoader): def download(self) -> TimeseriesBundle: url_template = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/{}/{}-{}.csv' - m4_info_url = 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv' + m4_info_url = ( + 'https://github.com/Mcompetitions/M4-methods/raw/master/Dataset/M4-info.csv' + ) m4_info_path = os.path.join(self.path, 'M4info.csv') ssl._create_default_https_context = ssl._create_unverified_context download_url(m4_info_url, m4_info_path) for sp in M4Meta.seasonal_patterns: - training_url = url_template.format("Train", sp, "train") - download_url(training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')) - test_url = url_template.format("Test", sp, "test") + training_url = url_template.format('Train', sp, 'train') + download_url( + training_url, os.path.join(M4Meta.dataset_path, f'{sp}-train.csv') + ) + test_url = url_template.format('Test', sp, 'test') download_url(test_url, os.path.join(M4Meta.dataset_path, f'{sp}-test.csv')) # Download naive2 forecasts, needed for OWA metric m4_naive2_archive = os.path.join(self.path, 'naive2.rar') - download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar', - m4_naive2_archive) + download_url( + 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar', + m4_naive2_archive, + ) patoolib.extract_archive(m4_naive2_archive, outdir=self.path) os.remove(m4_naive2_archive) # Download m4 competition winner predictions, for summary testing purposes only m4_winner_archive = os.path.join(self.path, 'submission-118.rar') - download_url('https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar', - m4_winner_archive) + download_url( + 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-118.rar', + m4_winner_archive, + ) patoolib.extract_archive(m4_winner_archive, outdir=self.path) os.remove(m4_winner_archive) @@ -66,12 +81,14 @@ def download(self) -> TimeseriesBundle: 'Monthly': (Month(), 1), 'Weekly': (Day(), 7), 'Daily': (Day(), 1), - 'Hourly': (Hour(), 1) + 'Hourly': (Hour(), 1), } all_timeseries = [] for sp in M4Meta.seasonal_patterns: - training_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-train.csv')) + training_set = pd.read_csv( + os.path.join(M4Meta.dataset_path, f'{sp}-train.csv') + ) test_set = pd.read_csv(os.path.join(M4Meta.dataset_path, f'{sp}-test.csv')) time_unit, frequency = time_units_mapping[sp] @@ -89,23 +106,28 @@ def download(self) -> TimeseriesBundle: parsed_date = None for parsing_format in parsing_formats: try: - parsed_date = datetime.strptime(timeseries_info.StartingDate, parsing_format) + parsed_date = datetime.strptime( + timeseries_info.StartingDate, parsing_format + ) except Exception: continue if parsed_date is None: - raise ValueError(f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}') + raise ValueError( + f'Could not parse {timeseries_info.StartingDate} for {timeseries_id}' + ) # all M4 years are in the 1900s or 1800s if parsed_date.year > 2000: parsed_date = parsed_date.replace(year=parsed_date.year - 100) - timeseries = Timeseries(id=timeseries_id, - start_date=parsed_date, - time_unit=time_unit, - frequency=frequency, - period=int(timeseries_info.Frequency), - values=np.concatenate([training_values, test_values]), - meta={'seasonal_pattern': sp} - ) + timeseries = Timeseries( + id=timeseries_id, + start_date=parsed_date, + time_unit=time_unit, + frequency=frequency, + period=int(timeseries_info.Frequency), + values=np.concatenate([training_values, test_values]), + meta={'seasonal_pattern': sp}, + ) all_timeseries.append(timeseries) return TimeseriesBundle(all_timeseries) @@ -113,7 +135,9 @@ def download(self) -> TimeseriesBundle: def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]: bundle = self.load_cache() horizons_map = M4Meta().horizons_map() - return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])) + return bundle.split( + lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]) + ) @staticmethod def filter(bundle: TimeseriesBundle, seasonal_pattern: str) -> TimeseriesBundle: diff --git a/benchmark/metalearned/resources/m4/evaluator.py b/benchmark/metalearned/resources/m4/evaluator.py index 194f65f..7225056 100644 --- a/benchmark/metalearned/resources/m4/evaluator.py +++ b/benchmark/metalearned/resources/m4/evaluator.py @@ -4,8 +4,7 @@ import numpy as np import pandas as pd - -from common.evaluator import Evaluator, EvaluationResult +from common.evaluator import EvaluationResult, Evaluator from common.metrics import mase, smape_2 from common.timeseries import TimeseriesBundle from common.utils import clean_nans, round_half_up @@ -21,11 +20,19 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult: insamples, _ = M4Dataset(M4Meta.dataset_path).standard_split() if self.validation: horizons_map = M4Meta().horizons_map() - insamples, _ = insamples.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])) - - grouped_smapes = {sp: np.mean(smape_2(forecast=np.array(M4Dataset.filter(forecast, sp).values()), - target=np.array(M4Dataset.filter(self.test_set, sp).values()))) - for sp in M4Meta.seasonal_patterns} + insamples, _ = insamples.split( + lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]) + ) + + grouped_smapes = { + sp: np.mean( + smape_2( + forecast=np.array(M4Dataset.filter(forecast, sp).values()), + target=np.array(M4Dataset.filter(self.test_set, sp).values()), + ) + ) + for sp in M4Meta.seasonal_patterns + } grouped_smapes = self.summarize_groups(grouped_smapes) @@ -33,7 +40,8 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult: grouped_owa = OrderedDict() if not self.validation: naive2_forecasts = pd.read_csv( - os.path.join(M4Meta.dataset_path, 'submission-Naive2.csv')) + os.path.join(M4Meta.dataset_path, 'submission-Naive2.csv') + ) naive2_forecasts.set_index(keys='id', inplace=True) model_mases = {} @@ -41,32 +49,56 @@ def evaluate(self, forecast: TimeseriesBundle) -> EvaluationResult: naive2_mases = {} for sp in M4Meta.seasonal_patterns: model_forecasts = M4Dataset.filter(forecast, sp) - naive2_forecast = clean_nans(naive2_forecasts.loc[model_forecasts.ids()].values) + naive2_forecast = clean_nans( + naive2_forecasts.loc[model_forecasts.ids()].values + ) model_forecast_values = model_forecasts.values() - target = self.test_set.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) + target = self.test_set.filter( + lambda ts: ts.meta['seasonal_pattern'] == sp + ) target_values = np.array(target.values()) # all timeseries within group have same frequency period = target.period()[0] insample = M4Dataset.filter(insamples, sp).values() - model_mases[sp] = np.mean([mase(forecast=model_forecast_values[i], - insample=insample[i], - outsample=target_values[i], - frequency=period) for i in range(len(model_forecast_values))]) - naive2_mases[sp] = np.mean([mase(forecast=naive2_forecast[i], - insample=insample[i], - outsample=target_values[i], - frequency=period) for i in range(len(model_forecast_values))]) + model_mases[sp] = np.mean( + [ + mase( + forecast=model_forecast_values[i], + insample=insample[i], + outsample=target_values[i], + frequency=period, + ) + for i in range(len(model_forecast_values)) + ] + ) + naive2_mases[sp] = np.mean( + [ + mase( + forecast=naive2_forecast[i], + insample=insample[i], + outsample=target_values[i], + frequency=period, + ) + for i in range(len(model_forecast_values)) + ] + ) naive2_smapes[sp] = np.mean(smape_2(naive2_forecast, target_values)) grouped_model_mases = self.summarize_groups(model_mases) grouped_naive2_smapes = self.summarize_groups(naive2_smapes) grouped_naive2_mases = self.summarize_groups(naive2_mases) for k in grouped_model_mases.keys(): - grouped_owa[k] = round_half_up((grouped_model_mases[k] / grouped_naive2_mases[k] + - grouped_smapes[k] / grouped_naive2_smapes[k]) / 2, 3) + grouped_owa[k] = round_half_up( + ( + grouped_model_mases[k] / grouped_naive2_mases[k] + + grouped_smapes[k] / grouped_naive2_smapes[k] + ) + / 2, + 3, + ) return self.round_values(grouped_owa) else: return self.round_values(grouped_smapes) @@ -76,7 +108,9 @@ def summarize_groups(self, scores): weighted_score = {} for sp in ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']: - weighted_score[sp] = scores[sp] * len(M4Dataset.filter(self.test_set, sp).timeseries) + weighted_score[sp] = scores[sp] * len( + M4Dataset.filter(self.test_set, sp).timeseries + ) scores_summary[sp] = scores[sp] others_score = 0 diff --git a/benchmark/metalearned/resources/tourism/dataset.py b/benchmark/metalearned/resources/tourism/dataset.py index 14864a9..f96fc00 100644 --- a/benchmark/metalearned/resources/tourism/dataset.py +++ b/benchmark/metalearned/resources/tourism/dataset.py @@ -6,9 +6,14 @@ import numpy as np import pandas as pd import patoolib - from common.settings import RESOURCES_DIR -from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Year, Month +from common.timeseries import ( + Month, + Timeseries, + TimeseriesBundle, + TimeseriesLoader, + Year, +) from common.utils import download_url @@ -29,74 +34,106 @@ def period_map(self): class TourismDataset(TimeseriesLoader): def download(self) -> TimeseriesBundle: archive_file = os.path.join(self.path, 'm3.zip') - download_url('https://robjhyndman.com/data/27-3-Athanasopoulos1.zip', archive_file) + download_url( + 'https://robjhyndman.com/data/27-3-Athanasopoulos1.zip', archive_file + ) patoolib.extract_archive(archive_file, outdir=self.path) timeseries = [] # Yearly - insample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'yearly_in.csv'), header=0) - outsample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'yearly_oos.csv'), header=0) + insample = pd.read_csv( + os.path.join(TourismMeta.dataset_path, 'yearly_in.csv'), header=0 + ) + outsample = pd.read_csv( + os.path.join(TourismMeta.dataset_path, 'yearly_oos.csv'), header=0 + ) outsampleT = outsample.T for timeseries_id, ts_row in insample.T.iterrows(): outsample_row = outsampleT.loc[timeseries_id].values start_date = datetime.strptime(str(int(ts_row[[1]])), '%Y') - insample_values = ts_row.values[2:2 + int(ts_row[[0]])] - outsample_values = outsample_row[2:2 + int(outsample_row[0])] + insample_values = ts_row.values[2 : 2 + int(ts_row[[0]])] + outsample_values = outsample_row[2 : 2 + int(outsample_row[0])] values = np.concatenate([insample_values, outsample_values]) - timeseries.append(Timeseries(id=timeseries_id, - start_date=start_date, - time_unit=Year(), - frequency=1, - period=1, - values=values, - meta={'seasonal_pattern': 'Yearly'})) + timeseries.append( + Timeseries( + id=timeseries_id, + start_date=start_date, + time_unit=Year(), + frequency=1, + period=1, + values=values, + meta={'seasonal_pattern': 'Yearly'}, + ) + ) # Quarterly - insample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'quarterly_in.csv'), header=0) - outsample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'quarterly_oos.csv'), header=0) + insample = pd.read_csv( + os.path.join(TourismMeta.dataset_path, 'quarterly_in.csv'), header=0 + ) + outsample = pd.read_csv( + os.path.join(TourismMeta.dataset_path, 'quarterly_oos.csv'), header=0 + ) outsampleT = outsample.T for timeseries_id, ts_row in insample.T.iterrows(): outsample_row = outsampleT.loc[timeseries_id].values - start_date = datetime.strptime(f'{str(int(ts_row[[1]]))}-{str((int(ts_row[[2]]) - 1) * 3)}', '%Y-%M') - insample_values = ts_row.values[3:3 + int(ts_row[[0]])] - outsample_values = outsample_row[3:3 + int(outsample_row[0])] + start_date = datetime.strptime( + f'{str(int(ts_row[[1]]))}-{str((int(ts_row[[2]]) - 1) * 3)}', '%Y-%M' + ) + insample_values = ts_row.values[3 : 3 + int(ts_row[[0]])] + outsample_values = outsample_row[3 : 3 + int(outsample_row[0])] values = np.concatenate([insample_values, outsample_values]) - timeseries.append(Timeseries(id=timeseries_id, - start_date=start_date, - time_unit=Month(), - frequency=3, - period=1, - values=values, - meta={'seasonal_pattern': 'Quarterly'})) + timeseries.append( + Timeseries( + id=timeseries_id, + start_date=start_date, + time_unit=Month(), + frequency=3, + period=1, + values=values, + meta={'seasonal_pattern': 'Quarterly'}, + ) + ) # Monthly - insample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'monthly_in.csv'), header=0) - outsample = pd.read_csv(os.path.join(TourismMeta.dataset_path, f'monthly_oos.csv'), header=0) + insample = pd.read_csv( + os.path.join(TourismMeta.dataset_path, 'monthly_in.csv'), header=0 + ) + outsample = pd.read_csv( + os.path.join(TourismMeta.dataset_path, 'monthly_oos.csv'), header=0 + ) outsampleT = outsample.T for timeseries_id, ts_row in insample.T.iterrows(): outsample_row = outsampleT.loc[timeseries_id].values - start_date = datetime.strptime(f'{str(int(ts_row[[1]]))}-{str(int(ts_row[[2]]))}', '%Y-%M') - insample_values = ts_row.values[3:3 + int(ts_row[[0]])] - outsample_values = outsample_row[3:3 + int(outsample_row[0])] + start_date = datetime.strptime( + f'{str(int(ts_row[[1]]))}-{str(int(ts_row[[2]]))}', '%Y-%M' + ) + insample_values = ts_row.values[3 : 3 + int(ts_row[[0]])] + outsample_values = outsample_row[3 : 3 + int(outsample_row[0])] values = np.concatenate([insample_values, outsample_values]) - timeseries.append(Timeseries(id=timeseries_id, - start_date=start_date, - time_unit=Month(), - frequency=1, - period=1, - values=values, - meta={'seasonal_pattern': 'Monthly'})) + timeseries.append( + Timeseries( + id=timeseries_id, + start_date=start_date, + time_unit=Month(), + frequency=1, + period=1, + values=values, + meta={'seasonal_pattern': 'Monthly'}, + ) + ) return TimeseriesBundle(timeseries) def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]: bundle = self.load_cache() horizons_map = TourismMeta().horizons_map() - return bundle.split(lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']])) + return bundle.split( + lambda ts: ts.split(-horizons_map[ts.meta['seasonal_pattern']]) + ) if __name__ == '__main__': diff --git a/benchmark/metalearned/resources/tourism/evaluator.py b/benchmark/metalearned/resources/tourism/evaluator.py index 1f0df89..0763cbb 100644 --- a/benchmark/metalearned/resources/tourism/evaluator.py +++ b/benchmark/metalearned/resources/tourism/evaluator.py @@ -3,8 +3,7 @@ from typing import Callable import numpy as np - -from common.evaluator import Evaluator, EvaluationResult +from common.evaluator import EvaluationResult, Evaluator from common.metrics import mape from common.timeseries import TimeseriesBundle from resources.tourism.dataset import TourismMeta @@ -22,8 +21,12 @@ def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult: offset = 0 for sp in TourismMeta.seasonal_patterns: - target_for_sp = self.test_set.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) - forecast_for_sp = forecasts.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) + target_for_sp = self.test_set.filter( + lambda ts: ts.meta['seasonal_pattern'] == sp + ) + forecast_for_sp = forecasts.filter( + lambda ts: ts.meta['seasonal_pattern'] == sp + ) target = np.array(target_for_sp.values()) forecast = np.array(forecast_for_sp.values()) @@ -39,5 +42,7 @@ def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult: results[sp] = round(float(np.mean(score)), self.precision) offset += len(target) - results['Average'] = round(cumulative_metrics / cumulative_points, self.precision) + results['Average'] = round( + cumulative_metrics / cumulative_points, self.precision + ) return results diff --git a/benchmark/metalearned/resources/traffic/dataset.py b/benchmark/metalearned/resources/traffic/dataset.py index 66709ab..1c4ad5b 100644 --- a/benchmark/metalearned/resources/traffic/dataset.py +++ b/benchmark/metalearned/resources/traffic/dataset.py @@ -5,12 +5,10 @@ import numpy as np import patoolib -from tqdm import tqdm - from common.settings import RESOURCES_DIR -from common.timeseries import Timeseries, TimeseriesBundle, TimeseriesLoader, Hour +from common.timeseries import Hour, Timeseries, TimeseriesBundle, TimeseriesLoader from common.utils import download_url - +from tqdm import tqdm """ Hourly aggregated dataset from https://archive.ics.uci.edu/ml/datasets/PEMS-SF @@ -19,6 +17,8 @@ Dataset was also compared with the one built by the TRMF paper's author: https://github.com/rofuyu/exp-trmf-nips16/blob/master/python/exp-scripts/datasets/download-data.sh """ + + @dataclass(frozen=True) class TrafficMeta: dataset_path = os.path.join(RESOURCES_DIR, 'traffic') @@ -38,8 +38,10 @@ def download(self) -> TimeseriesBundle: train_raw_file = os.path.join(self.path, 'PEMS_train') test_raw_file = os.path.join(self.path, 'PEMS_test') perm_raw_file = os.path.join(self.path, 'randperm') - download_url('https://archive.ics.uci.edu/ml/machine-learning-databases/00204/PEMS-SF.zip', - archive_file) + download_url( + 'https://archive.ics.uci.edu/ml/machine-learning-databases/00204/PEMS-SF.zip', + archive_file, + ) patoolib.extract_archive(archive_file, outdir=self.path) with open(train_raw_file, 'r') as f: train_raw_data = f.readlines() @@ -47,7 +49,9 @@ def download(self) -> TimeseriesBundle: test_raw_data = f.readlines() with open(perm_raw_file, 'r') as f: permutations = f.readlines() - permutations = np.array(permutations[0].rstrip()[1:-1].split(' ')).astype(np.int) + permutations = np.array(permutations[0].rstrip()[1:-1].split(' ')).astype( + np.int + ) raw_data = train_raw_data + test_raw_data @@ -77,7 +81,9 @@ def download(self) -> TimeseriesBundle: # - Mar. 8, 2009 - Anomaly # ------------------------------------------ # Thus 455 - 15 = 440 days from 2008-01-01 to 2008-03-30 (incl.) - start_date = datetime.strptime('2008-01-02', '%Y-%m-%d') # 2008-01-01 is a holiday + start_date = datetime.strptime( + '2008-01-02', '%Y-%m-%d' + ) # 2008-01-01 is a holiday current_date = start_date excluded_dates = [ datetime.strptime('2008-01-21', '%Y-%m-%d'), @@ -110,18 +116,27 @@ def download(self) -> TimeseriesBundle: values = np.concatenate([values, daily], axis=1) else: # should never be in the first 24*7 records. # fill gaps with same day of previous week. - values = np.concatenate([values, values[:, -24 * 7 * 6:-24 * 6 * 6]], axis=1) + values = np.concatenate( + [values, values[:, -24 * 7 * 6 : -24 * 6 * 6]], axis=1 + ) current_date += timedelta(days=1) # aggregate 10 minutes events to hourly - hourly = np.array([list(map(np.mean, zip(*(iter(lane),) * 6))) for lane in tqdm(values)]) - timeseries = [Timeseries(id=str(i), - start_date=start_date, - time_unit=Hour(), - frequency=1, - period=24 * 7, - values=values, - meta={}) for i, values in enumerate(hourly)] + hourly = np.array( + [list(map(np.mean, zip(*(iter(lane),) * 6))) for lane in tqdm(values)] + ) + timeseries = [ + Timeseries( + id=str(i), + start_date=start_date, + time_unit=Hour(), + frequency=1, + period=24 * 7, + values=values, + meta={}, + ) + for i, values in enumerate(hourly) + ] return TimeseriesBundle(timeseries=timeseries) def standard_split(self) -> Tuple[TimeseriesBundle, TimeseriesBundle]: diff --git a/benchmark/metalearned/resources/traffic/evaluator.py b/benchmark/metalearned/resources/traffic/evaluator.py index 08e5ea8..f936f84 100644 --- a/benchmark/metalearned/resources/traffic/evaluator.py +++ b/benchmark/metalearned/resources/traffic/evaluator.py @@ -2,8 +2,7 @@ from typing import Callable import numpy as np - -from common.evaluator import Evaluator, EvaluationResult +from common.evaluator import EvaluationResult, Evaluator from common.metrics import nd from common.timeseries import TimeseriesBundle from common.utils import round_half_up @@ -15,6 +14,11 @@ class TrafficEvaluator(Evaluator): precision: int = 2 def evaluate(self, forecasts: TimeseriesBundle) -> EvaluationResult: - return {'metric': round_half_up(self.metric_fn(np.array(forecasts.values()), - np.array(self.test_set.values())), - self.precision)} + return { + 'metric': round_half_up( + self.metric_fn( + np.array(forecasts.values()), np.array(self.test_set.values()) + ), + self.precision, + ) + } diff --git a/benchmark/run.py b/benchmark/run.py index 9212084..9db5158 100644 --- a/benchmark/run.py +++ b/benchmark/run.py @@ -1,60 +1,89 @@ -import numpy as np +import argparse import random -from exp.exp_resolver import resolve_experiment +import sys + +import numpy as np import torch -import argparse -from utils.arg_resolver import resolve_transformer_args, _model_is_transformer, setting_string, resolve_args -import sys -sys.path.append("metalearned") +from exp.exp_resolver import resolve_experiment +from utils.arg_resolver import ( + _model_is_transformer, + resolve_args, + resolve_transformer_args, + setting_string, +) -def parse(): +sys.path.append('metalearned') + +def parse(): parser = argparse.ArgumentParser( - description='Comparing performance of ForecastPFN to other Time Series Benchmarks') + description='Comparing performance of ForecastPFN to other Time Series Benchmarks' + ) parser.add_argument('--is_training', type=int, default=1, help='status') parser.add_argument('--use_gpu', type=bool, default=True, help='status') parser.add_argument('--itr', type=int, default=1, help='status') # model settings - parser.add_argument('--model', type=str, default='ForecastPFN', - help='model name, options: [ForecastPFN, FEDformer, Autoformer, Informer, Transformer, Arima, Prophet]') + parser.add_argument( + '--model', + type=str, + default='ForecastPFN', + help='model name, options: [ForecastPFN, FEDformer, Autoformer, Informer, Transformer, Arima, Prophet]', + ) # forecasting task - parser.add_argument('--seq_len', type=int, default=96, - help='input sequence length') - parser.add_argument('--label_len', type=int, - default=48, help='start token length') - parser.add_argument('--pred_len', type=int, default=96, - help='prediction sequence length') - - parser.add_argument('--time_budget', type=int, - help='amount of time budget to train the model') - parser.add_argument('--train_budget', type=int, - help='length of training sequence') + parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') + parser.add_argument('--label_len', type=int, default=48, help='start token length') + parser.add_argument( + '--pred_len', type=int, default=96, help='prediction sequence length' + ) + + parser.add_argument( + '--time_budget', type=int, help='amount of time budget to train the model' + ) + parser.add_argument('--train_budget', type=int, help='length of training sequence') # data loader - parser.add_argument('--data', type=str, - default='ETTh1', help='dataset type') - parser.add_argument('--root_path', type=str, - default='./dataset/ETT/', help='root path of the data file') - parser.add_argument('--data_path', type=str, - default='ETTh1.csv', help='data file') - parser.add_argument('--target', type=str, - default='OT', help='name of target column') - parser.add_argument('--scale', type=bool, default=True, - help='scale the time series with sklearn.StandardScale()') + parser.add_argument('--data', type=str, default='ETTh1', help='dataset type') + parser.add_argument( + '--root_path', + type=str, + default='./dataset/ETT/', + help='root path of the data file', + ) + parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') + parser.add_argument( + '--target', type=str, default='OT', help='name of target column' + ) + parser.add_argument( + '--scale', + type=bool, + default=True, + help='scale the time series with sklearn.StandardScale()', + ) # ForecastPFN - parser.add_argument('--model_path', type=str, default='s3://realityengines.datasets/forecasting/pretrained/gurnoor/models/20230202-025828/ckpts', - help='encoder input size') - parser.add_argument('--scaler', type=str, default='standard', - help='scale the test series with sklearn.StandardScale()') + parser.add_argument( + '--model_path', + type=str, + default='s3://realityengines.datasets/forecasting/pretrained/gurnoor/models/20230202-025828/ckpts', + help='encoder input size', + ) + parser.add_argument( + '--scaler', + type=str, + default='standard', + help='scale the test series with sklearn.StandardScale()', + ) # Metalearn - parser.add_argument('--metalearn_freq', type=str, - help='which type of model should be used for the Metalearn model. Typically M, W, or D.') + parser.add_argument( + '--metalearn_freq', + type=str, + help='which type of model should be used for the Metalearn model. Typically M, W, or D.', + ) return parser @@ -71,7 +100,7 @@ def main(): args = resolve_args(args) if _model_is_transformer(args.model): args = resolve_transformer_args(args) - + if args.model != 'ForecastPFN': args.model_name = None else: @@ -85,7 +114,6 @@ def main(): args.device_ids = [int(id_) for id_ in device_ids] args.gpu = args.device_ids[0] - print('Args in experiment:') print(args) @@ -96,10 +124,14 @@ def main(): # setting record of experiments setting = setting_string(args, ii) - print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) + print( + '>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting) + ) exp.train(setting) - print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) + print( + '>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting) + ) exp.test(setting) torch.cuda.empty_cache() @@ -107,12 +139,11 @@ def main(): else: ii = 0 setting = setting_string(args, ii) - print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) exp.test(setting, test=1) torch.cuda.empty_cache() -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/benchmark/transformer_models/model_resolver.py b/benchmark/transformer_models/model_resolver.py index bf5e9a2..4a6f4d3 100644 --- a/benchmark/transformer_models/model_resolver.py +++ b/benchmark/transformer_models/model_resolver.py @@ -1,17 +1,15 @@ -import pandas as pd -import numpy as np -import prophet import pmdarima +import prophet -from transformer_models.models import FEDformer, Autoformer, Informer, Transformer +from transformer_models.models import Autoformer, FEDformer, Informer, Transformer -class Arima(): +class Arima: def __init__(self) -> None: self.model = pmdarima.auto_arima -class Prophet(): +class Prophet: def __init__(self) -> None: self.model = prophet.Prophet() diff --git a/benchmark/transformer_models/models/Autoformer.py b/benchmark/transformer_models/models/Autoformer.py index 352ef14..2cc4f94 100644 --- a/benchmark/transformer_models/models/Autoformer.py +++ b/benchmark/transformer_models/models/Autoformer.py @@ -5,10 +5,17 @@ import torch import torch.nn as nn -import torch.nn.functional as F -from layers.Embed import DataEmbedding, DataEmbedding_wo_pos + from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer -from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp +from layers.Autoformer_EncDec import ( + Decoder, + DecoderLayer, + Encoder, + EncoderLayer, + my_Layernorm, + series_decomp, +) +from layers.Embed import DataEmbedding_wo_pos class Model(nn.Module): @@ -16,6 +23,7 @@ class Model(nn.Module): Autoformer is the first method to achieve the series-wise connection, with inherent O(LlogL) complexity """ + def __init__(self, configs): super(Model, self).__init__() self.seq_len = configs.seq_len @@ -30,40 +38,69 @@ def __init__(self, configs): # Embedding # The series-wise connection inherently contains the sequential information. # Thus, we can discard the position embedding of transformers. - self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) - self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) + self.enc_embedding = DataEmbedding_wo_pos( + configs.enc_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) + self.dec_embedding = DataEmbedding_wo_pos( + configs.dec_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) # Encoder self.encoder = Encoder( [ EncoderLayer( AutoCorrelationLayer( - AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, - output_attention=configs.output_attention), - configs.d_model, configs.n_heads), + AutoCorrelation( + False, + configs.factor, + attention_dropout=configs.dropout, + output_attention=configs.output_attention, + ), + configs.d_model, + configs.n_heads, + ), configs.d_model, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, - activation=configs.activation - ) for l in range(configs.e_layers) + activation=configs.activation, + ) + for l in range(configs.e_layers) ], - norm_layer=my_Layernorm(configs.d_model) + norm_layer=my_Layernorm(configs.d_model), ) # Decoder self.decoder = Decoder( [ DecoderLayer( AutoCorrelationLayer( - AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout, - output_attention=False), - configs.d_model, configs.n_heads), + AutoCorrelation( + True, + configs.factor, + attention_dropout=configs.dropout, + output_attention=False, + ), + configs.d_model, + configs.n_heads, + ), AutoCorrelationLayer( - AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, - output_attention=False), - configs.d_model, configs.n_heads), + AutoCorrelation( + False, + configs.factor, + attention_dropout=configs.dropout, + output_attention=False, + ), + configs.d_model, + configs.n_heads, + ), configs.d_model, configs.c_out, configs.d_ff, @@ -74,29 +111,46 @@ def __init__(self, configs): for l in range(configs.d_layers) ], norm_layer=my_Layernorm(configs.d_model), - projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + projection=nn.Linear(configs.d_model, configs.c_out, bias=True), ) - def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, - enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): + def forward( + self, + x_enc, + x_mark_enc, + x_dec, + x_mark_dec, + enc_self_mask=None, + dec_self_mask=None, + dec_enc_mask=None, + ): # decomp init mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1) - zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device) + zeros = torch.zeros( + [x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device + ) seasonal_init, trend_init = self.decomp(x_enc) # decoder input - trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1) - seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1) + trend_init = torch.cat([trend_init[:, -self.label_len :, :], mean], dim=1) + seasonal_init = torch.cat( + [seasonal_init[:, -self.label_len :, :], zeros], dim=1 + ) # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) # dec dec_out = self.dec_embedding(seasonal_init, x_mark_dec) - seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask, - trend=trend_init) + seasonal_part, trend_part = self.decoder( + dec_out, + enc_out, + x_mask=dec_self_mask, + cross_mask=dec_enc_mask, + trend=trend_init, + ) # final dec_out = trend_part + seasonal_part if self.output_attention: - return dec_out[:, -self.pred_len:, :], attns + return dec_out[:, -self.pred_len :, :], attns else: - return dec_out[:, -self.pred_len:, :] \ No newline at end of file + return dec_out[:, -self.pred_len :, :] diff --git a/benchmark/transformer_models/models/FEDformer.py b/benchmark/transformer_models/models/FEDformer.py index 21c11ce..baf281d 100644 --- a/benchmark/transformer_models/models/FEDformer.py +++ b/benchmark/transformer_models/models/FEDformer.py @@ -1,23 +1,29 @@ import torch import torch.nn as nn import torch.nn.functional as F -from layers.Embed import DataEmbedding, DataEmbedding_wo_pos -from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer + +from layers.AutoCorrelation import AutoCorrelationLayer +from layers.Autoformer_EncDec import ( + Decoder, + DecoderLayer, + Encoder, + EncoderLayer, + my_Layernorm, + series_decomp, + series_decomp_multi, +) +from layers.Embed import DataEmbedding_wo_pos from layers.FourierCorrelation import FourierBlock, FourierCrossAttention from layers.MultiWaveletCorrelation import MultiWaveletCross, MultiWaveletTransform -from layers.SelfAttention_Family import FullAttention, ProbAttention -from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp, series_decomp_multi -import math -import numpy as np - -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') class Model(nn.Module): """ FEDformer performs the attention mechanism on frequency domain and achieved O(N) complexity """ + def __init__(self, configs): super(Model, self).__init__() self.version = configs.version @@ -38,70 +44,94 @@ def __init__(self, configs): # Embedding # The series-wise connection inherently contains the sequential information. # Thus, we can discard the position embedding of transformers. - self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) - self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) + self.enc_embedding = DataEmbedding_wo_pos( + configs.enc_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) + self.dec_embedding = DataEmbedding_wo_pos( + configs.dec_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) if configs.version == 'Wavelets': - encoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=configs.L, base=configs.base) - decoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=configs.L, base=configs.base) - decoder_cross_att = MultiWaveletCross(in_channels=configs.d_model, - out_channels=configs.d_model, - seq_len_q=self.seq_len // 2 + self.pred_len, - seq_len_kv=self.seq_len, - modes=configs.modes, - ich=configs.d_model, - base=configs.base, - activation=configs.cross_activation) + encoder_self_att = MultiWaveletTransform( + ich=configs.d_model, L=configs.L, base=configs.base + ) + decoder_self_att = MultiWaveletTransform( + ich=configs.d_model, L=configs.L, base=configs.base + ) + decoder_cross_att = MultiWaveletCross( + in_channels=configs.d_model, + out_channels=configs.d_model, + seq_len_q=self.seq_len // 2 + self.pred_len, + seq_len_kv=self.seq_len, + modes=configs.modes, + ich=configs.d_model, + base=configs.base, + activation=configs.cross_activation, + ) else: - encoder_self_att = FourierBlock(in_channels=configs.d_model, - out_channels=configs.d_model, - seq_len=self.seq_len, - modes=configs.modes, - mode_select_method=configs.mode_select) - decoder_self_att = FourierBlock(in_channels=configs.d_model, - out_channels=configs.d_model, - seq_len=self.seq_len//2+self.pred_len, - modes=configs.modes, - mode_select_method=configs.mode_select) - decoder_cross_att = FourierCrossAttention(in_channels=configs.d_model, - out_channels=configs.d_model, - seq_len_q=self.seq_len//2+self.pred_len, - seq_len_kv=self.seq_len, - modes=configs.modes, - mode_select_method=configs.mode_select) + encoder_self_att = FourierBlock( + in_channels=configs.d_model, + out_channels=configs.d_model, + seq_len=self.seq_len, + modes=configs.modes, + mode_select_method=configs.mode_select, + ) + decoder_self_att = FourierBlock( + in_channels=configs.d_model, + out_channels=configs.d_model, + seq_len=self.seq_len // 2 + self.pred_len, + modes=configs.modes, + mode_select_method=configs.mode_select, + ) + decoder_cross_att = FourierCrossAttention( + in_channels=configs.d_model, + out_channels=configs.d_model, + seq_len_q=self.seq_len // 2 + self.pred_len, + seq_len_kv=self.seq_len, + modes=configs.modes, + mode_select_method=configs.mode_select, + ) # Encoder - enc_modes = int(min(configs.modes, configs.seq_len//2)) - dec_modes = int(min(configs.modes, (configs.seq_len//2+configs.pred_len)//2)) + enc_modes = int(min(configs.modes, configs.seq_len // 2)) + dec_modes = int( + min(configs.modes, (configs.seq_len // 2 + configs.pred_len) // 2) + ) print('enc_modes: {}, dec_modes: {}'.format(enc_modes, dec_modes)) self.encoder = Encoder( [ EncoderLayer( AutoCorrelationLayer( - encoder_self_att, - configs.d_model, configs.n_heads), - + encoder_self_att, configs.d_model, configs.n_heads + ), configs.d_model, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, - activation=configs.activation - ) for l in range(configs.e_layers) + activation=configs.activation, + ) + for l in range(configs.e_layers) ], - norm_layer=my_Layernorm(configs.d_model) + norm_layer=my_Layernorm(configs.d_model), ) # Decoder self.decoder = Decoder( [ DecoderLayer( AutoCorrelationLayer( - decoder_self_att, - configs.d_model, configs.n_heads), + decoder_self_att, configs.d_model, configs.n_heads + ), AutoCorrelationLayer( - decoder_cross_att, - configs.d_model, configs.n_heads), + decoder_cross_att, configs.d_model, configs.n_heads + ), configs.d_model, configs.c_out, configs.d_ff, @@ -112,35 +142,53 @@ def __init__(self, configs): for l in range(configs.d_layers) ], norm_layer=my_Layernorm(configs.d_model), - projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + projection=nn.Linear(configs.d_model, configs.c_out, bias=True), ) - def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, - enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): + def forward( + self, + x_enc, + x_mark_enc, + x_dec, + x_mark_dec, + enc_self_mask=None, + dec_self_mask=None, + dec_enc_mask=None, + ): # decomp init mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1) - zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]]).to(device) # cuda() + torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]]).to( + device + ) # cuda() seasonal_init, trend_init = self.decomp(x_enc) # decoder input - trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1) - seasonal_init = F.pad(seasonal_init[:, -self.label_len:, :], (0, 0, 0, self.pred_len)) + trend_init = torch.cat([trend_init[:, -self.label_len :, :], mean], dim=1) + seasonal_init = F.pad( + seasonal_init[:, -self.label_len :, :], (0, 0, 0, self.pred_len) + ) # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) # dec dec_out = self.dec_embedding(seasonal_init, x_mark_dec) - seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask, - trend=trend_init) + seasonal_part, trend_part = self.decoder( + dec_out, + enc_out, + x_mask=dec_self_mask, + cross_mask=dec_enc_mask, + trend=trend_init, + ) # final dec_out = trend_part + seasonal_part if self.output_attention: - return dec_out[:, -self.pred_len:, :], attns + return dec_out[:, -self.pred_len :, :], attns else: - return dec_out[:, -self.pred_len:, :] # [B, L, D] + return dec_out[:, -self.pred_len :, :] # [B, L, D] if __name__ == '__main__': + class Configs(object): ab = 0 modes = 32 @@ -177,7 +225,7 @@ class Configs(object): enc = torch.randn([3, configs.seq_len, 7]) enc_mark = torch.randn([3, configs.seq_len, 4]) - dec = torch.randn([3, configs.seq_len//2+configs.pred_len, 7]) - dec_mark = torch.randn([3, configs.seq_len//2+configs.pred_len, 4]) + dec = torch.randn([3, configs.seq_len // 2 + configs.pred_len, 7]) + dec_mark = torch.randn([3, configs.seq_len // 2 + configs.pred_len, 4]) out = model.forward(enc, enc_mark, dec, dec_mark) print(out) diff --git a/benchmark/transformer_models/models/Informer.py b/benchmark/transformer_models/models/Informer.py index 9dfecfe..5d19385 100644 --- a/benchmark/transformer_models/models/Informer.py +++ b/benchmark/transformer_models/models/Informer.py @@ -1,59 +1,93 @@ import torch import torch.nn as nn -import torch.nn.functional as F -from utils.masking import TriangularCausalMask, ProbMask -from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer -from layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer + from layers.Embed import DataEmbedding -import numpy as np +from layers.SelfAttention_Family import AttentionLayer, ProbAttention +from layers.Transformer_EncDec import ( + ConvLayer, + Decoder, + DecoderLayer, + Encoder, + EncoderLayer, +) class Model(nn.Module): """ Informer with Propspare attention in O(LlogL) complexity """ + def __init__(self, configs): super(Model, self).__init__() self.pred_len = configs.pred_len self.output_attention = configs.output_attention # Embedding - self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) - self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) + self.enc_embedding = DataEmbedding( + configs.enc_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) + self.dec_embedding = DataEmbedding( + configs.dec_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( - ProbAttention(False, configs.factor, attention_dropout=configs.dropout, - output_attention=configs.output_attention), - configs.d_model, configs.n_heads), + ProbAttention( + False, + configs.factor, + attention_dropout=configs.dropout, + output_attention=configs.output_attention, + ), + configs.d_model, + configs.n_heads, + ), configs.d_model, configs.d_ff, dropout=configs.dropout, - activation=configs.activation - ) for l in range(configs.e_layers) + activation=configs.activation, + ) + for l in range(configs.e_layers) ], - [ - ConvLayer( - configs.d_model - ) for l in range(configs.e_layers - 1) - ] if configs.distil else None, - norm_layer=torch.nn.LayerNorm(configs.d_model) + [ConvLayer(configs.d_model) for l in range(configs.e_layers - 1)] + if configs.distil + else None, + norm_layer=torch.nn.LayerNorm(configs.d_model), ) # Decoder self.decoder = Decoder( [ DecoderLayer( AttentionLayer( - ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), - configs.d_model, configs.n_heads), + ProbAttention( + True, + configs.factor, + attention_dropout=configs.dropout, + output_attention=False, + ), + configs.d_model, + configs.n_heads, + ), AttentionLayer( - ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), - configs.d_model, configs.n_heads), + ProbAttention( + False, + configs.factor, + attention_dropout=configs.dropout, + output_attention=False, + ), + configs.d_model, + configs.n_heads, + ), configs.d_model, configs.d_ff, dropout=configs.dropout, @@ -62,19 +96,28 @@ def __init__(self, configs): for l in range(configs.d_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model), - projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + projection=nn.Linear(configs.d_model, configs.c_out, bias=True), ) - def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, - enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): - + def forward( + self, + x_enc, + x_mark_enc, + x_dec, + x_mark_dec, + enc_self_mask=None, + dec_self_mask=None, + dec_enc_mask=None, + ): enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) dec_out = self.dec_embedding(x_dec, x_mark_dec) - dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) + dec_out = self.decoder( + dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask + ) if self.output_attention: - return dec_out[:, -self.pred_len:, :], attns + return dec_out[:, -self.pred_len :, :], attns else: - return dec_out[:, -self.pred_len:, :] + return dec_out[:, -self.pred_len :, :] diff --git a/benchmark/transformer_models/models/Transformer.py b/benchmark/transformer_models/models/Transformer.py index 88c4a88..0d1a48c 100644 --- a/benchmark/transformer_models/models/Transformer.py +++ b/benchmark/transformer_models/models/Transformer.py @@ -1,50 +1,83 @@ import torch import torch.nn as nn -import torch.nn.functional as F -from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer -from layers.SelfAttention_Family import FullAttention, AttentionLayer + from layers.Embed import DataEmbedding +from layers.SelfAttention_Family import AttentionLayer, FullAttention +from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer class Model(nn.Module): """ Vanilla Transformer with O(L^2) complexity """ + def __init__(self, configs): super(Model, self).__init__() self.pred_len = configs.pred_len self.output_attention = configs.output_attention # Embedding - self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) - self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, - configs.dropout) + self.enc_embedding = DataEmbedding( + configs.enc_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) + self.dec_embedding = DataEmbedding( + configs.dec_in, + configs.d_model, + configs.embed, + configs.freq, + configs.dropout, + ) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( - FullAttention(False, configs.factor, attention_dropout=configs.dropout, - output_attention=configs.output_attention), configs.d_model, configs.n_heads), + FullAttention( + False, + configs.factor, + attention_dropout=configs.dropout, + output_attention=configs.output_attention, + ), + configs.d_model, + configs.n_heads, + ), configs.d_model, configs.d_ff, dropout=configs.dropout, - activation=configs.activation - ) for l in range(configs.e_layers) + activation=configs.activation, + ) + for l in range(configs.e_layers) ], - norm_layer=torch.nn.LayerNorm(configs.d_model) + norm_layer=torch.nn.LayerNorm(configs.d_model), ) # Decoder self.decoder = Decoder( [ DecoderLayer( AttentionLayer( - FullAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), - configs.d_model, configs.n_heads), + FullAttention( + True, + configs.factor, + attention_dropout=configs.dropout, + output_attention=False, + ), + configs.d_model, + configs.n_heads, + ), AttentionLayer( - FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), - configs.d_model, configs.n_heads), + FullAttention( + False, + configs.factor, + attention_dropout=configs.dropout, + output_attention=False, + ), + configs.d_model, + configs.n_heads, + ), configs.d_model, configs.d_ff, dropout=configs.dropout, @@ -53,19 +86,28 @@ def __init__(self, configs): for l in range(configs.d_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model), - projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + projection=nn.Linear(configs.d_model, configs.c_out, bias=True), ) - def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, - enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): - + def forward( + self, + x_enc, + x_mark_enc, + x_dec, + x_mark_dec, + enc_self_mask=None, + dec_self_mask=None, + dec_enc_mask=None, + ): enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) dec_out = self.dec_embedding(x_dec, x_mark_dec) - dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) + dec_out = self.decoder( + dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask + ) if self.output_attention: - return dec_out[:, -self.pred_len:, :], attns + return dec_out[:, -self.pred_len :, :], attns else: - return dec_out[:, -self.pred_len:, :] + return dec_out[:, -self.pred_len :, :] diff --git a/benchmark/utils/arg_resolver.py b/benchmark/utils/arg_resolver.py index 3b73ca1..32e1ad2 100644 --- a/benchmark/utils/arg_resolver.py +++ b/benchmark/utils/arg_resolver.py @@ -1,21 +1,34 @@ -from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.preprocessing import MinMaxScaler, StandardScaler + def _model_is_transformer(model): - if model in ['FEDformer', 'FEDformer-f', 'FEDformer-w', 'FEDformer_Meta', 'Autoformer', 'Informer', 'Transformer']: + if model in [ + 'FEDformer', + 'FEDformer-f', + 'FEDformer-w', + 'FEDformer_Meta', + 'Autoformer', + 'Informer', + 'Transformer', + ]: return True return False + def setting_string(args, ii): - setting = '{}_{}_sl{}_ll{}_pl{}_timebudget_{}_trainbudget_{}_model-path_{}_itr_{}'.format( - args.model, - args.data, - args.seq_len, - args.label_len, - args.pred_len, - args.time_budget, - args.train_budget, - args.model_name, - ii) + setting = ( + '{}_{}_sl{}_ll{}_pl{}_timebudget_{}_trainbudget_{}_model-path_{}_itr_{}'.format( + args.model, + args.data, + args.seq_len, + args.label_len, + args.pred_len, + args.time_budget, + args.train_budget, + args.model_name, + ii, + ) + ) return setting @@ -36,7 +49,6 @@ def resolve_args(args): return args - def resolve_transformer_args(args): args.mode_select = 'random' args.modes = 64 diff --git a/benchmark/utils/masking.py b/benchmark/utils/masking.py index 4f768bd..6d68561 100644 --- a/benchmark/utils/masking.py +++ b/benchmark/utils/masking.py @@ -1,39 +1,49 @@ -import torch -import numpy as np import math -class TriangularCausalMask(): - def __init__(self, B, L, device="cpu"): +import numpy as np +import torch + + +class TriangularCausalMask: + def __init__(self, B, L, device='cpu'): mask_shape = [B, 1, L, L] with torch.no_grad(): - self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) + self._mask = torch.triu( + torch.ones(mask_shape, dtype=torch.bool), diagonal=1 + ).to(device) @property def mask(self): return self._mask -class ProbMask(): - def __init__(self, B, H, L, index, scores, device="cpu"): +class ProbMask: + def __init__(self, B, H, L, index, scores, device='cpu'): _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) - indicator = _mask_ex[torch.arange(B)[:, None, None], - torch.arange(H)[None, :, None], - index, :].to(device) + indicator = _mask_ex[ + torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, : + ].to(device) self._mask = indicator.view(scores.shape).to(device) @property def mask(self): return self._mask -class LocalMask(): - def __init__(self, B, L,S,device="cpu"): + +class LocalMask: + def __init__(self, B, L, S, device='cpu'): mask_shape = [B, 1, L, S] with torch.no_grad(): self.len = math.ceil(np.log2(L)) - self._mask1 = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) - self._mask2 = ~torch.triu(torch.ones(mask_shape,dtype=torch.bool),diagonal=-self.len).to(device) - self._mask = self._mask1+self._mask2 + self._mask1 = torch.triu( + torch.ones(mask_shape, dtype=torch.bool), diagonal=1 + ).to(device) + self._mask2 = ~torch.triu( + torch.ones(mask_shape, dtype=torch.bool), diagonal=-self.len + ).to(device) + self._mask = self._mask1 + self._mask2 + @property def mask(self): - return self._mask \ No newline at end of file + return self._mask diff --git a/benchmark/utils/metrics.py b/benchmark/utils/metrics.py index 3ab2e13..2befaa7 100644 --- a/benchmark/utils/metrics.py +++ b/benchmark/utils/metrics.py @@ -4,7 +4,9 @@ def RSE(pred, true): - return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) + return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt( + np.sum((true - true.mean()) ** 2) + ) def CORR(pred, true): @@ -42,20 +44,20 @@ def metric(pred, true): return mae, mse, rmse, mape, mspe + def smape(y_true, y_pred): - """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. - `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)` - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - Returns: - Symmetric mean absolute percentage error values. shape = `[batch_size, d0, .. - dN-1]`. - """ + """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. + `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)` + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + Returns: + Symmetric mean absolute percentage error values. shape = `[batch_size, d0, .. + dN-1]`. + """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) diff = tf.abs( - (y_true - y_pred) / - backend.maximum(y_true + y_pred, backend.epsilon()) + (y_true - y_pred) / backend.maximum(y_true + y_pred, backend.epsilon()) ) return 200.0 * backend.mean(diff, axis=-1) diff --git a/benchmark/utils/timefeatures.py b/benchmark/utils/timefeatures.py index 0e93870..165df2d 100644 --- a/benchmark/utils/timefeatures.py +++ b/benchmark/utils/timefeatures.py @@ -14,63 +14,63 @@ def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: pass def __repr__(self): - return self.__class__.__name__ + "()" + return self.__class__.__name__ + '()' class SecondOfMinute(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [x.second / 59.0 - 0.5 if x!=0 else 0 for x in index] + return [x.second / 59.0 - 0.5 if x != 0 else 0 for x in index] class MinuteOfHour(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [x.minute / 59.0 - 0.5 if x!=0 else 0 for x in index] + return [x.minute / 59.0 - 0.5 if x != 0 else 0 for x in index] class HourOfDay(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [x.hour / 23.0 - 0.5 if x!=0 else 0 for x in index] + return [x.hour / 23.0 - 0.5 if x != 0 else 0 for x in index] class DayOfWeek(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [x.dayofweek / 6.0 - 0.5 if x!=0 else 0 for x in index] + return [x.dayofweek / 6.0 - 0.5 if x != 0 else 0 for x in index] class DayOfMonth(TimeFeature): """Day of month encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [(x.day - 1) / 30.0 - 0.5 if x!=0 else 0 for x in index] + return [(x.day - 1) / 30.0 - 0.5 if x != 0 else 0 for x in index] class DayOfYear(TimeFeature): """Day of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [(x.dayofyear - 1) / 365.0 - 0.5 if x!=0 else 0 for x in index] + return [(x.dayofyear - 1) / 365.0 - 0.5 if x != 0 else 0 for x in index] class MonthOfYear(TimeFeature): """Month of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [(x.month - 1) / 11.0 - 0.5 if x!=0 else 0 for x in index] + return [(x.month - 1) / 11.0 - 0.5 if x != 0 else 0 for x in index] class WeekOfYear(TimeFeature): """Week of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - return [(x.isocalendar().week - 1) / 52.0 - 0.5 if x!=0 else 0 for x in index] + return [(x.isocalendar().week - 1) / 52.0 - 0.5 if x != 0 else 0 for x in index] def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: diff --git a/benchmark/utils/tools.py b/benchmark/utils/tools.py index e74b399..d3e018b 100644 --- a/benchmark/utils/tools.py +++ b/benchmark/utils/tools.py @@ -1,7 +1,8 @@ +import time + +import matplotlib.pyplot as plt import numpy as np import torch -import matplotlib.pyplot as plt -import time plt.switch_backend('agg') @@ -11,11 +12,8 @@ def adjust_learning_rate(optimizer, epoch, args): if args.lradj == 'type1': lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} elif args.lradj == 'type2': - lr_adjust = { - 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, - 10: 5e-7, 15: 1e-7, 20: 5e-8 - } - elif args.lradj =='type3': + lr_adjust = {2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 10: 5e-7, 15: 1e-7, 20: 5e-8} + elif args.lradj == 'type3': lr_adjust = {epoch: args.learning_rate} elif args.lradj == 'type4': lr_adjust = {epoch: args.learning_rate * (0.9 ** ((epoch - 1) // 1))} @@ -53,13 +51,16 @@ def __call__(self, val_loss, model, path): def save_checkpoint(self, val_loss, model, path, epoch=None): if self.verbose: - print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') + print( + f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...' + ) if epoch: torch.save(model.state_dict(), path + '/' + f'checkpoint_{epoch}.pth') else: torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') self.val_loss_min = val_loss + class TimeBudget: def __init__(self, budget): self.budget = budget @@ -83,14 +84,16 @@ def step(self): self.end_timer() return + class dotdict(dict): """dot.notation access to dictionary attributes""" + __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ -class StandardScaler(): +class StandardScaler: def __init__(self, mean, std): self.mean = mean self.std = std diff --git a/src/build_validation_dataset.py b/src/build_validation_dataset.py index c365865..8e648ca 100644 --- a/src/build_validation_dataset.py +++ b/src/build_validation_dataset.py @@ -2,22 +2,27 @@ Module to transform different real world datasets into format used for our synthetic dataset """ -import pandas as pd -import numpy as np -import tensorflow as tf import csv from datetime import datetime +from functools import reduce + +import numpy as np +import pandas as pd +import tensorflow as tf from dateutil.relativedelta import relativedelta from tqdm import trange -from functools import reduce HISTORY = 100 HORIZON = 10 NUM_TASKS = 3 + def compute_time_features(ts: np.ndarray): ts = pd.to_datetime(ts) - return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) + return np.stack( + [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1 + ) + def build_input(ts, target_full, task=1): # horizon should be fixed as defined in model @@ -28,11 +33,12 @@ def build_input(ts, target_full, task=1): target = target_full[:-HORIZON] target_to_predict = target_full[-HORIZON:] - if task == 2: target_to_predict = np.cumsum(target_to_predict) / (1 + np.arange(HORIZON)) elif task == 3: - target_to_predict = [np.std(target_to_predict[:i+1]) for i in range(len(target_to_predict))] + target_to_predict = [ + np.std(target_to_predict[: i + 1]) for i in range(len(target_to_predict)) + ] # this is the target value of the data before the horizon target = tf.convert_to_tensor(target, dtype=tf.float32) @@ -49,15 +55,20 @@ def build_input(ts, target_full, task=1): return { 'ts': tf.repeat(tf.expand_dims(date_tensor, axis=0), [HORIZON], axis=0), - # repeat the before horizon values horizon number of times, # so that for each of the predictions for each target_ts, you # have an available set of features 'history': tf.repeat(tf.expand_dims(target, axis=0), [HORIZON], axis=0), 'target_ts': tf.expand_dims(target_dates, axis=1), - 'task': tf.fill([HORIZON,], task) + 'task': tf.fill( + [ + HORIZON, + ], + task, + ), }, tf.expand_dims(tf.convert_to_tensor(target_to_predict, dtype=tf.float32), axis=1) + def read_timeseries_file(filename): """ Function to read the standard datasets for time series. @@ -72,6 +83,7 @@ def read_timeseries_file(filename): return lines + def get_dates(num_days, freq): dates = [] @@ -84,14 +96,15 @@ def get_dates(num_days, freq): for _ in range(num_days): dates.append(pd.to_datetime(current_date)) if freq == 'daily': - current_date += relativedelta(days = 1) + current_date += relativedelta(days=1) elif freq == 'weekly': - current_date += relativedelta(weeks = 1) + current_date += relativedelta(weeks=1) elif freq == 'monthly': - current_date += relativedelta(months = 1) + current_date += relativedelta(months=1) return dates + def split_dataset(dataset): """ If the size of dataset is n * (HISTORY + HORIZON), we split it @@ -105,8 +118,8 @@ def split_dataset(dataset): # otherwise, slide a window starting from the first point # with a stride of HISTORY // 3 until the elements in # window are less than HISTORY + HORIZON - while i == 0 or i+HISTORY+HORIZON < len(dataset): - mini_datasets.append(dataset[i:i+HISTORY+HORIZON]) + while i == 0 or i + HISTORY + HORIZON < len(dataset): + mini_datasets.append(dataset[i : i + HISTORY + HORIZON]) i += HISTORY // 3 return mini_datasets @@ -122,11 +135,11 @@ def build_dataset(dataset, freq): # TODO: change it from 2 # keeping it 2 for testing, as dataset creation takes time for i in trange(100): - # for i in trange(len(dataset)): + # for i in trange(len(dataset)): for X in split_dataset(dataset[i]): dates = get_dates(len(X), freq) - for task in range(1, NUM_TASKS+1): + for task in range(1, NUM_TASKS + 1): built_input, output = build_input(dates, X, task=task) ts_list += [ts for ts in built_input['ts']] @@ -136,15 +149,21 @@ def build_dataset(dataset, freq): outputs += [y for y in output] - dataset_frame = tf.data.Dataset.from_tensor_slices(({ - 'ts': ts_list, - 'history': history_list, - 'target_ts': target_ts_list, - 'task': task_list - }, outputs)) + dataset_frame = tf.data.Dataset.from_tensor_slices( + ( + { + 'ts': ts_list, + 'history': history_list, + 'target_ts': target_ts_list, + 'task': task_list, + }, + outputs, + ) + ) return dataset_frame + def construct_dataframe(train_dataset_and_freq): """ Function to construct the dataframe in accordance with the training format @@ -155,22 +174,29 @@ def construct_dataframe(train_dataset_and_freq): return reduce(lambda df1, df2: df1.concatenate(df2), dfs) + def get_validation_dataset(): """ Function to read data from various sources and feed them as input to build a dataframe for getting the validation dataset """ - wikiweb_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/wikiweb_train.csv") - tourism_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/tourism_train.csv") - exchange_rate_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/exchange_rate_train.csv") - m3_train = read_timeseries_file("/home/ubuntu/notebooks/forecasting/pretraining/m3_train.csv") + wikiweb_train = read_timeseries_file( + '/home/ubuntu/notebooks/forecasting/pretraining/wikiweb_train.csv' + ) + tourism_train = read_timeseries_file( + '/home/ubuntu/notebooks/forecasting/pretraining/tourism_train.csv' + ) + read_timeseries_file( + '/home/ubuntu/notebooks/forecasting/pretraining/exchange_rate_train.csv' + ) + read_timeseries_file('/home/ubuntu/notebooks/forecasting/pretraining/m3_train.csv') # add different datasets and their frequency here # TODO: addition of monthly dataset shoots up # validation loss to ~40k. Need to see how to fix that train_dataset_and_freq = [ - (wikiweb_train, "daily"), - (tourism_train, "monthly"), + (wikiweb_train, 'daily'), + (tourism_train, 'monthly'), # (exchange_rate_train, "daily"), # (m3_train, "monthly") ] @@ -179,8 +205,10 @@ def get_validation_dataset(): # print(len(list(constructed_dataframe))) return constructed_dataframe + def main(): get_validation_dataset() + if __name__ == '__main__': main() diff --git a/src/evaluate_public_datasets/evaluate.py b/src/evaluate_public_datasets/evaluate.py index cf3055d..e7252d6 100644 --- a/src/evaluate_public_datasets/evaluate.py +++ b/src/evaluate_public_datasets/evaluate.py @@ -1,26 +1,29 @@ """ Module to evaluate the model on real world datasets """ -import yaml import argparse -import tensorflow as tf -import tensorflow_io -import pandas as pd + import numpy as np +import pandas as pd +import tensorflow as tf +import yaml from process_data import read_timeseries_file -from tqdm import trange from scipy.stats.mstats import winsorize -from sklearn.metrics import mean_squared_error, mean_absolute_error -from sklearn.preprocessing import StandardScaler, MinMaxScaler - +from sklearn.metrics import mean_absolute_error, mean_squared_error +from sklearn.preprocessing import MinMaxScaler +from tqdm import trange HISTORY = 100 + def compute_time_features(ts: np.ndarray): ts = pd.to_datetime(ts) - return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) + return np.stack( + [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1 + ) # return np.stack([ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) + def build_input(ts, target, task=1): horizon = len(ts) - len(target) all_dates = tf.numpy_function(compute_time_features, inp=[ts], Tout=tf.int64) @@ -41,31 +44,39 @@ def build_input(ts, target, task=1): target = target[-HISTORY:] return { 'ts': tf.repeat(tf.expand_dims(date_tensor, axis=0), [horizon], axis=0), - # repeat the before horizon values horizon number of times, # so that for each of the predictions for each target_ts, you # have an available set of features 'history': tf.repeat(tf.expand_dims(target, axis=0), [horizon], axis=0), 'target_ts': tf.expand_dims(target_dates, axis=1), - 'task': tf.fill([horizon,], task), + 'task': tf.fill( + [ + horizon, + ], + task, + ), } + def evaluate_model(config, train_data, test_data, freq, name): pretrained = tf.keras.models.load_model(config['model_path']) BATCH_SIZE = 100 - item_id, pred_start, actual, pred = [], [], [], [] + _item_id, _pred_start, actual, pred = [], [], [], [] stds = [] wapes = [] for i in trange(0, len(train_data), BATCH_SIZE): - test_points = train_data[i:(i+BATCH_SIZE)] + test_points = train_data[i : (i + BATCH_SIZE)] for idx, current_point in enumerate(test_points): - # timestamps of history - history_ts = pd.date_range(start='2010-01-01', periods=len(train_data[i+idx] + test_data[i+idx]), freq=freq) + history_ts = pd.date_range( + start='2010-01-01', + periods=len(train_data[i + idx] + test_data[i + idx]), + freq=freq, + ) # values of history - history = train_data[i+idx] + history = train_data[i + idx] # mean of history's last 6 values history_mean = np.nanmean(history[-6:]) @@ -74,7 +85,7 @@ def evaluate_model(config, train_data, test_data, freq, name): history_std = np.nanstd(history[-6:]) # local scale, don't know why defined so - local_scale = (history_mean + history_std + 1e-4) + local_scale = history_mean + history_std + 1e-4 # change history based on local scale, to normalize it between 0 and 1 history = np.clip(history / local_scale, a_min=0, a_max=1) @@ -87,19 +98,26 @@ def evaluate_model(config, train_data, test_data, freq, name): pred_vals = pretrained(build_input(history_ts, history, task=1)) # get scaled mean based on the given history - scaled_vals = (pred_vals['result'].numpy().reshape(-1) * pred_vals['scale'].numpy().reshape(-1)) * local_scale - - if np.mean(np.array(test_data[i+idx])): - wape = np.mean(np.abs(scaled_vals - np.array(test_data[i+idx]))) / np.mean(np.array(test_data[i+idx])) + scaled_vals = ( + pred_vals['result'].numpy().reshape(-1) + * pred_vals['scale'].numpy().reshape(-1) + ) * local_scale + + if np.mean(np.array(test_data[i + idx])): + wape = np.mean( + np.abs(scaled_vals - np.array(test_data[i + idx])) + ) / np.mean(np.array(test_data[i + idx])) wapes.append(wape) - assert len(scaled_vals) == len(test_data[i+idx]) + assert len(scaled_vals) == len(test_data[i + idx]) scaler = MinMaxScaler() - scaler.fit(np.array(train_data[i+idx]).reshape(-1, 1)) + scaler.fit(np.array(train_data[i + idx]).reshape(-1, 1)) predicted_scaled = scaler.transform(np.array(scaled_vals).reshape(-1, 1)) - actual_scaled = scaler.transform(np.array(test_data[i+idx]).reshape(-1, 1)) + actual_scaled = scaler.transform( + np.array(test_data[i + idx]).reshape(-1, 1) + ) stds.append(np.std(actual_scaled)) for pred_val, actual_val in zip(predicted_scaled, actual_scaled): @@ -108,31 +126,29 @@ def evaluate_model(config, train_data, test_data, freq, name): pred.append(pred_val) actual.append(actual_val) - - - eval_clipped_df = pd.DataFrame(dict( - actual=actual, - pred=pred - )) + eval_clipped_df = pd.DataFrame(dict(actual=actual, pred=pred)) eval_clipped_df = eval_clipped_df.assign( cmape=lambda df: np.abs(df.actual - df.pred) / df.actual ).assign( winsorized_cmape=lambda df: winsorize(df.cmape, (0.01, 0.01)), - squashed_cmape=lambda df: np.where(df.cmape > 1, 1 + np.log(df.cmape), df.cmape) + squashed_cmape=lambda df: np.where( + df.cmape > 1, 1 + np.log(df.cmape), df.cmape + ), ) print(eval_clipped_df[(eval_clipped_df.actual > 0)].describe()) # print(wapes) # print(np.nanmean(wapes)) - print("MAE:", mean_absolute_error(actual, pred)) - print("MSE:", mean_squared_error(actual, pred)) + print('MAE:', mean_absolute_error(actual, pred)) + print('MSE:', mean_squared_error(actual, pred)) print(np.mean(stds)) + def main(): parser = argparse.ArgumentParser() - parser.add_argument("-c", "--config", required=True, help="Path to config file") + parser.add_argument('-c', '--config', required=True, help='Path to config file') args = parser.parse_args() with open(args.config) as config_file: @@ -141,10 +157,8 @@ def main(): train_data = read_timeseries_file(config['train_file']) test_data = read_timeseries_file(config['test_file']) - evaluate_model(config, train_data, test_data, config['freq'], config['name']) - if __name__ == '__main__': main() diff --git a/src/evaluate_public_datasets/process_data.py b/src/evaluate_public_datasets/process_data.py index 043c75d..fbf58b7 100644 --- a/src/evaluate_public_datasets/process_data.py +++ b/src/evaluate_public_datasets/process_data.py @@ -3,10 +3,10 @@ them as a tfrecords file """ import csv -import tensorflow as tf -import numpy as np + import pandas as pd + def read_timeseries_file(filename): """ Function to read the standard datasets for time series. @@ -27,4 +27,3 @@ def generate_tf_test_examples(name, train_data, test_data, freq): dates = pd.date_range(start='2010-01-01', periods=len_data, freq=freq) return name, dates, train_data, test_data - diff --git a/src/evaluation/evaluate.py b/src/evaluation/evaluate.py index b3158cd..d337d60 100644 --- a/src/evaluation/evaluate.py +++ b/src/evaluation/evaluate.py @@ -2,17 +2,17 @@ Module to evaluate on the customer dataset """ -import yaml import argparse -import pandas as pd + import numpy as np +import pandas as pd +import reainternal.mllibs.pipelinelib as PL import tensorflow as tf -import tensorflow_io +import yaml +from prepare_dataset import build_input from reainternal import environment -import reainternal.mllibs.pipelinelib as PL from scipy.stats.mstats import winsorize from tqdm import trange -from prepare_dataset import build_input def evaluate(config): @@ -29,12 +29,19 @@ def evaluate(config): BATCH_SIZE = 100 item_id, pred_start, actual, pred = [], [], [], [] for i in trange(0, len(record_index), BATCH_SIZE): - test_points = list(model_info.prepared_dataset_instance.get_prediction_records(record_index[i:(i + BATCH_SIZE)])) + test_points = list( + model_info.prepared_dataset_instance.get_prediction_records( + record_index[i : (i + BATCH_SIZE)] + ) + ) for current_point in test_points: # contains the history of available values and the targets - prediction_record, _ = model_info.serving_dataset_instance.dataset_class.prepare_data_for_prediction( - model_info.serving_dataset_instance, - current_point.model_input) + ( + prediction_record, + _, + ) = model_info.serving_dataset_instance.dataset_class.prepare_data_for_prediction( + model_info.serving_dataset_instance, current_point.model_input + ) # timestamps of history history_ts = prediction_record[ts_col] @@ -49,7 +56,7 @@ def evaluate(config): history_std = np.nanstd(history[-6:]) # local scale, don't know why defined so - local_scale = (history_mean + history_std + 1e-4) + local_scale = history_mean + history_std + 1e-4 # change history based on local scale, to normalize it between 0 and 1 history = np.clip(history / local_scale, a_min=0, a_max=1) @@ -58,34 +65,38 @@ def evaluate(config): pred_mean = pretrained(build_input(history_ts, history, task=2)) # get scaled mean based on the given history - scaled_mean = (pred_mean['result'].numpy().reshape(-1) * pred_mean['scale'].numpy().reshape(-1)) * local_scale + scaled_mean = ( + pred_mean['result'].numpy().reshape(-1) + * pred_mean['scale'].numpy().reshape(-1) + ) * local_scale item_id.append(current_point.test_info[0]) pred_start.append(current_point.test_info[1]) actual.append(np.mean(current_point.actual[target_col])) pred.append(scaled_mean[-1]) - eval_clipped_df = pd.DataFrame(dict( - item_id=item_id, - pred_start=pred_start, - actual=actual, - pred=pred - )) + eval_clipped_df = pd.DataFrame( + dict(item_id=item_id, pred_start=pred_start, actual=actual, pred=pred) + ) eval_clipped_df = eval_clipped_df.assign( cmape=lambda df: np.abs(df.actual - df.pred) / df.actual ).assign( winsorized_cmape=lambda df: winsorize(df.cmape, (0.01, 0.01)), - squashed_cmape=lambda df: np.where(df.cmape > 1, 1 + np.log(df.cmape), df.cmape) + squashed_cmape=lambda df: np.where( + df.cmape > 1, 1 + np.log(df.cmape), df.cmape + ), ) - return eval_clipped_df[(eval_clipped_df.actual > 0) & (eval_clipped_df.pred_start == '2021-06-30T00:00:00')].describe() - + return eval_clipped_df[ + (eval_clipped_df.actual > 0) + & (eval_clipped_df.pred_start == '2021-06-30T00:00:00') + ].describe() def main(): parser = argparse.ArgumentParser() - parser.add_argument("-c", "--config", required=True, help="Path to config file") + parser.add_argument('-c', '--config', required=True, help='Path to config file') args = parser.parse_args() with open(args.config) as config_file: @@ -96,5 +107,6 @@ def main(): results_df = evaluate(config) print(results_df) + if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/src/evaluation/prepare_dataset.py b/src/evaluation/prepare_dataset.py index e69f5e9..80de27f 100644 --- a/src/evaluation/prepare_dataset.py +++ b/src/evaluation/prepare_dataset.py @@ -1,16 +1,19 @@ """ Module to prepare customer dataset for evaluation """ -import pandas as pd import numpy as np +import pandas as pd import tensorflow as tf -import tensorflow_io HISTORY = 100 + def compute_time_features(ts: np.ndarray): ts = pd.to_datetime(ts) - return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) + return np.stack( + [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1 + ) + def build_input(ts, target, task=1): horizon = len(ts) - len(target) @@ -31,11 +34,15 @@ def build_input(ts, target, task=1): date_tensor = date_tensor[-HISTORY:] return { 'ts': tf.repeat(tf.expand_dims(date_tensor, axis=0), [horizon], axis=0), - # repeat the before horizon values horizon number of times, # so that for each of the predictions for each target_ts, you # have an available set of features 'history': tf.repeat(tf.expand_dims(target, axis=0), [horizon], axis=0), 'target_ts': tf.expand_dims(target_dates, axis=1), - 'task': tf.fill([horizon,], task), + 'task': tf.fill( + [ + horizon, + ], + task, + ), } diff --git a/src/prepare_public_datasets/constants.py b/src/prepare_public_datasets/constants.py index c14c76e..d81da53 100644 --- a/src/prepare_public_datasets/constants.py +++ b/src/prepare_public_datasets/constants.py @@ -1,2 +1,2 @@ CONTEXT_LENGTH = 200 -WINDOW_STRIDE = 30 \ No newline at end of file +WINDOW_STRIDE = 30 diff --git a/src/prepare_public_datasets/prepare.py b/src/prepare_public_datasets/prepare.py index bca22d7..eb30b10 100644 --- a/src/prepare_public_datasets/prepare.py +++ b/src/prepare_public_datasets/prepare.py @@ -2,16 +2,18 @@ Module to prepare public datasets for training """ -import csv -import yaml import argparse +import csv +from tempfile import NamedTemporaryFile + import numpy as np import pandas as pd import tensorflow as tf -from tempfile import NamedTemporaryFile +import yaml +from constants import CONTEXT_LENGTH, WINDOW_STRIDE from reainternal.cloud import CloudLocation from tqdm import tqdm -from constants import * + def read_timeseries_file(filename): """ @@ -27,6 +29,7 @@ def read_timeseries_file(filename): return lines + def generate_tf_train_examples(name, train_data_list, freq): """ Method to generate the examples from train data @@ -36,7 +39,7 @@ def generate_tf_train_examples(name, train_data_list, freq): i = len(train_data) while i > 0: - train_data = train_data[max(i-CONTEXT_LENGTH, 0):i] + train_data = train_data[max(i - CONTEXT_LENGTH, 0) : i] if len(train_data) < CONTEXT_LENGTH: train_data = [0] * (CONTEXT_LENGTH - len(train_data)) + train_data @@ -53,23 +56,24 @@ def generate_tf_train_examples(name, train_data_list, freq): print(train_data) yield tf.train.Example( - features=tf.train.Features( - feature={ - "id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[name.encode()])), - "ts": tf.train.Feature( - int64_list=tf.train.Int64List( - value=dates.astype(np.int64) - ) - ), - "y": tf.train.Feature( - float_list=tf.train.FloatList(value=train_data) - ), - "noise": tf.train.Feature( - float_list=tf.train.FloatList(value=noise) - ), - } - ) + features=tf.train.Features( + feature={ + 'id': tf.train.Feature( + bytes_list=tf.train.BytesList(value=[name.encode()]) + ), + 'ts': tf.train.Feature( + int64_list=tf.train.Int64List(value=dates.astype(np.int64)) + ), + 'y': tf.train.Feature( + float_list=tf.train.FloatList(value=train_data) + ), + 'noise': tf.train.Feature( + float_list=tf.train.FloatList(value=noise) + ), + } ) + ) + def save_tf_records(prefix: str, dest: str, it): """ @@ -77,35 +81,40 @@ def save_tf_records(prefix: str, dest: str, it): """ with NamedTemporaryFile() as tfile: with tf.io.TFRecordWriter( - tfile.name, options=tf.io.TFRecordOptions(compression_type="GZIP") + tfile.name, options=tf.io.TFRecordOptions(compression_type='GZIP') ) as writer: for record in tqdm(it): writer.write(record.SerializeToString()) tfile.seek(0) CloudLocation(prefix + dest).copy_from_file(tfile) + def save_tf_dataset(prefix: str, dataset_name: str, data: list, freq: str): """ Generate dataset and save as tf records """ save_tf_records( prefix, - f"{dataset_name}.tfrecords", - generate_tf_train_examples(dataset_name, data, freq) + f'{dataset_name}.tfrecords', + generate_tf_train_examples(dataset_name, data, freq), ) - print(f"Written to file {dataset_name}.tfrecords") + print(f'Written to file {dataset_name}.tfrecords') + def main(): parser = argparse.ArgumentParser() - parser.add_argument("-c", "--config", required=True, help="Path to config file") + parser.add_argument('-c', '--config', required=True, help='Path to config file') args = parser.parse_args() with open(args.config) as config_file: config = yaml.load(config_file, yaml.loader.SafeLoader) train_data = read_timeseries_file(config['train_path']) - save_tf_dataset(config['prefix'], config['dataset_name'], train_data, config['freq']) + save_tf_dataset( + config['prefix'], config['dataset_name'], train_data, config['freq'] + ) + if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/src/synthetic_generation/config_variables.py b/src/synthetic_generation/config_variables.py index 9e8b280..55382d8 100644 --- a/src/synthetic_generation/config_variables.py +++ b/src/synthetic_generation/config_variables.py @@ -2,6 +2,7 @@ Module containing configuration setting for the script """ + class Config: frequencies = None frequency_names = None @@ -11,14 +12,35 @@ class Config: @classmethod def set_freq_variables(cls, is_sub_day): if is_sub_day: - cls.frequencies = [("min", 1/1440), ("H", 1/24), ("D", 1), ("W", 7), ("MS", 30), ("Y", 12)] - cls.frequency_names = ["minute", "hourly", "daily", "weekly", "monthly", "yearly"] - cls.freq_and_index = (("minute", 0), ("hourly", 1), ("daily", 2), ("weekly", 3), ("monthly", 4), ("yearly", 5)) + cls.frequencies = [ + ('min', 1 / 1440), + ('H', 1 / 24), + ('D', 1), + ('W', 7), + ('MS', 30), + ('Y', 12), + ] + cls.frequency_names = [ + 'minute', + 'hourly', + 'daily', + 'weekly', + 'monthly', + 'yearly', + ] + cls.freq_and_index = ( + ('minute', 0), + ('hourly', 1), + ('daily', 2), + ('weekly', 3), + ('monthly', 4), + ('yearly', 5), + ) else: - cls.frequencies = [("D", 1), ("W", 7), ("MS", 30)] - cls.frequency_names = ["daily", "weekly", "monthly"] - cls.freq_and_index = (("daily", 0), ("weekly", 1), ("monthly", 2)) + cls.frequencies = [('D', 1), ('W', 7), ('MS', 30)] + cls.frequency_names = ['daily', 'weekly', 'monthly'] + cls.freq_and_index = (('daily', 0), ('weekly', 1), ('monthly', 2)) @classmethod def set_transition(cls, transition): - cls.transition = transition \ No newline at end of file + cls.transition = transition diff --git a/src/synthetic_generation/constants.py b/src/synthetic_generation/constants.py index 505c9d7..1c9712e 100644 --- a/src/synthetic_generation/constants.py +++ b/src/synthetic_generation/constants.py @@ -3,28 +3,29 @@ """ from datetime import date + import tensorflow as tf -BASE_START = date.fromisoformat("1885-01-01").toordinal() -BASE_END = date.fromisoformat("2023-12-31").toordinal() + 1 +BASE_START = date.fromisoformat('1885-01-01').toordinal() +BASE_END = date.fromisoformat('2023-12-31').toordinal() + 1 PRODUCT_SCHEMA = { - "doc": "Timeseries sample", - "name": "TimeseriesSample", - "type": "record", - "fields": [ - {"name": "id", "type": "string"}, - {"name": "ts", "type": {"type": "int", "logicalType": "date"}}, - {"name": "y", "type": ["null", "float"]}, - {"name": "noise", "type": ["float"]} + 'doc': 'Timeseries sample', + 'name': 'TimeseriesSample', + 'type': 'record', + 'fields': [ + {'name': 'id', 'type': 'string'}, + {'name': 'ts', 'type': {'type': 'int', 'logicalType': 'date'}}, + {'name': 'y', 'type': ['null', 'float']}, + {'name': 'noise', 'type': ['float']}, ], } CONTEXT_LENGTH = 1_000 TF_SCHEMA = { - "id": tf.io.FixedLenFeature([], dtype=tf.string), - "ts": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64), - "y": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32), - "noise": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32) + 'id': tf.io.FixedLenFeature([], dtype=tf.string), + 'ts': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64), + 'y': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32), + 'noise': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32), } diff --git a/src/synthetic_generation/generate_series.py b/src/synthetic_generation/generate_series.py index 7f6b6f8..39cab33 100644 --- a/src/synthetic_generation/generate_series.py +++ b/src/synthetic_generation/generate_series.py @@ -2,19 +2,21 @@ Module to generate synthetic series """ +from datetime import date + import numpy as np import pandas as pd -from datetime import date -from pandas.tseries.frequencies import to_offset -from constants import * from config_variables import Config +from constants import BASE_END, BASE_START, CONTEXT_LENGTH from generate_series_components import make_series -from utils import sample_scale, get_transition_coefficients -from series_config import ComponentScale, SeriesConfig, ComponentNoise +from pandas.tseries.frequencies import to_offset from scipy.stats import beta +from series_config import ComponentNoise, ComponentScale, SeriesConfig +from utils import get_transition_coefficients, sample_scale + def __generate( - n = 100, + n=100, freq_index: int = None, start: pd.Timestamp = None, options: dict = {}, @@ -31,22 +33,22 @@ def __generate( # annual, monthly, weekly, hourly and minutely components a, m, w, h, minute = 0.0, 0.0, 0.0, 0.0, 0.0 - if freq == "min": + if freq == 'min': minute = np.random.uniform(0.0, 1.0) h = np.random.uniform(0.0, 0.2) - elif freq == "H": + elif freq == 'H': minute = np.random.uniform(0.0, 0.2) h = np.random.uniform(0.0, 1) - elif freq == "D": + elif freq == 'D': w = np.random.uniform(0.0, 1.0) m = np.random.uniform(0.0, 0.2) - elif freq == "W": + elif freq == 'W': m = np.random.uniform(0.0, 0.3) a = np.random.uniform(0.0, 0.3) - elif freq == "MS": + elif freq == 'MS': w = np.random.uniform(0.0, 0.1) a = np.random.uniform(0.0, 0.5) - elif freq == "Y": + elif freq == 'Y': w = np.random.uniform(0.0, 0.2) a = np.random.uniform(0.0, 1) else: @@ -54,7 +56,9 @@ def __generate( if start is None: # start = pd.Timestamp(date.fromordinal(np.random.randint(BASE_START, BASE_END))) - start = pd.Timestamp(date.fromordinal(int((BASE_START - BASE_END)*beta.rvs(5,1)+BASE_START))) + start = pd.Timestamp( + date.fromordinal(int((BASE_START - BASE_END) * beta.rvs(5, 1) + BASE_START)) + ) scale_config = ComponentScale( 1.0, @@ -64,7 +68,7 @@ def __generate( m=m, w=w, minute=minute, - h=h + h=h, ) offset_config = ComponentScale( @@ -77,17 +81,16 @@ def __generate( ) noise_config = ComponentNoise( - k=np.random.uniform(1, 5), - median=1, - scale=sample_scale() + k=np.random.uniform(1, 5), median=1, scale=sample_scale() ) cfg = SeriesConfig(scale_config, offset_config, noise_config) return cfg, make_series(cfg, to_offset(freq), n, start, options, random_walk) + def generate( - n = 100, + n=100, freq_index: int = None, start: pd.Timestamp = None, options: dict = {}, @@ -106,10 +109,8 @@ def generate( else: values = series1['values'] - dataframe_data = { - 'series_values': values, - 'noise': series1['noise'] - } - - return cfg1, pd.DataFrame(data=dataframe_data, index=series1['dates'])#.clip(lower=0.0) + dataframe_data = {'series_values': values, 'noise': series1['noise']} + return cfg1, pd.DataFrame( + data=dataframe_data, index=series1['dates'] + ) # .clip(lower=0.0) diff --git a/src/synthetic_generation/generate_series_components.py b/src/synthetic_generation/generate_series_components.py index 7bc88b2..900d5e2 100644 --- a/src/synthetic_generation/generate_series_components.py +++ b/src/synthetic_generation/generate_series_components.py @@ -1,12 +1,13 @@ """ Module to generate trend and seasonal components of series """ +from collections import defaultdict + import numpy as np import pandas as pd -from constants import * from series_config import SeriesConfig -from utils import shift_axis, weibull_noise, get_random_walk_series -from collections import defaultdict +from utils import get_random_walk_series, shift_axis, weibull_noise + def make_series_trend(series: SeriesConfig, dates: pd.DatetimeIndex): """ @@ -25,6 +26,7 @@ def make_series_trend(series: SeriesConfig, dates: pd.DatetimeIndex): return values + def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int): """ Method to get systematic movement of values across time @@ -38,7 +40,7 @@ def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int): :return: numpy array of shape dates_feature.shape containing sinusoidal value for a given point in time """ - harmonics = list(range(1, n_harmonics+1)) + harmonics = list(range(1, n_harmonics + 1)) # initialize sin and cosine coefficients with 0 sin_coef = np.zeros(n_harmonics) @@ -46,8 +48,8 @@ def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int): # choose coefficients inversely proportional to the harmonic for idx, harmonic in enumerate(harmonics): - sin_coef[idx] = np.random.normal(scale = 1 / harmonic) - cos_coef[idx] = np.random.normal(scale = 1 / harmonic) + sin_coef[idx] = np.random.normal(scale=1 / harmonic) + cos_coef[idx] = np.random.normal(scale=1 / harmonic) # normalize the coefficients such that their sum of squares is 1 coef_sq_sum = np.sqrt(np.sum(np.square(sin_coef)) + np.sum(np.square(cos_coef))) @@ -58,8 +60,12 @@ def get_freq_component(dates_feature: pd.Index, n_harmonics: int, n_total: int): # comprises of patterns of varying frequency return_val = 0 for idx, harmonic in enumerate(harmonics): - return_val += sin_coef[idx] * np.sin(2 * np.pi * harmonic * dates_feature / n_total) - return_val += cos_coef[idx] * np.cos(2 * np.pi * harmonic * dates_feature / n_total) + return_val += sin_coef[idx] * np.sin( + 2 * np.pi * harmonic * dates_feature / n_total + ) + return_val += cos_coef[idx] * np.cos( + 2 * np.pi * harmonic * dates_feature / n_total + ) return return_val @@ -75,24 +81,35 @@ def make_series_seasonal(series: SeriesConfig, dates: pd.DatetimeIndex): seasonal_components = defaultdict(lambda: 1) if series.scale.minute is not None: - seasonal_components['minute'] = 1 + series.scale.minute * get_freq_component(dates.minute, 10, 60) + seasonal_components['minute'] = 1 + series.scale.minute * get_freq_component( + dates.minute, 10, 60 + ) seasonal *= seasonal_components['minute'] if series.scale.h is not None: - seasonal_components['h'] = 1 + series.scale.h * get_freq_component(dates.hour, 10, 24) + seasonal_components['h'] = 1 + series.scale.h * get_freq_component( + dates.hour, 10, 24 + ) seasonal *= seasonal_components['h'] if series.scale.a is not None: - seasonal_components['a'] = 1 + series.scale.a * get_freq_component(dates.month, 6, 12) + seasonal_components['a'] = 1 + series.scale.a * get_freq_component( + dates.month, 6, 12 + ) seasonal *= seasonal_components['a'] if series.scale.m is not None: - seasonal_components['m'] = 1 + series.scale.m * get_freq_component(dates.day, 10, 30.5) + seasonal_components['m'] = 1 + series.scale.m * get_freq_component( + dates.day, 10, 30.5 + ) seasonal *= seasonal_components['m'] if series.scale.w is not None: - seasonal_components['w'] = 1 + series.scale.w * get_freq_component(dates.dayofweek, 4, 7) + seasonal_components['w'] = 1 + series.scale.w * get_freq_component( + dates.dayofweek, 4, 7 + ) seasonal *= seasonal_components['w'] seasonal_components['seasonal'] = seasonal return seasonal_components + def make_series( series: SeriesConfig, freq: pd.DateOffset, @@ -120,7 +137,7 @@ def make_series( weibull_noise_term = weibull_noise( k=series.noise_config.k, median=series.noise_config.median, - length=len(values) + length=len(values), ) # approximating estimated value from median @@ -128,13 +145,15 @@ def make_series( # expected value of this term is 0 # for no noise, scale is set to 0 - scaled_noise_term = series.noise_config.scale * (weibull_noise_term - noise_expected_val) + scaled_noise_term = series.noise_config.scale * ( + weibull_noise_term - noise_expected_val + ) dataframe_data = { **values_seasonal, 'values': values, 'noise': 1 + scaled_noise_term, - 'dates': dates + 'dates': dates, } return dataframe_data diff --git a/src/synthetic_generation/main.py b/src/synthetic_generation/main.py index df34c78..ee35e21 100644 --- a/src/synthetic_generation/main.py +++ b/src/synthetic_generation/main.py @@ -2,17 +2,17 @@ Module to generate synthetic dataset for pre training a time series forecasting model """ -import yaml import argparse -import pandas as pd + +import yaml +from config_variables import Config from tf_generate_series import ( - save_tf_records, - tf_generate_n, convert_tf_to_rows, - load_tf_dataset, generate_product_input, + load_tf_dataset, + save_tf_records, + tf_generate_n, ) -from config_variables import Config def save_tf_dataset(prefix: str, version: str, options: dict, num_series: int = 10_000): @@ -20,10 +20,10 @@ def save_tf_dataset(prefix: str, version: str, options: dict, num_series: int = Generate dataset and save as tf records """ for freq, freq_index in Config.freq_and_index: - print("Frequency: " + freq) + print('Frequency: ' + freq) save_tf_records( prefix, - f"{version}/{freq}.tfrecords", + f'{version}/{freq}.tfrecords', tf_generate_n( N=num_series, freq_index=freq_index, @@ -38,32 +38,35 @@ def generate_product_input_dataset(prefix, version): Load dataset from tf records and save as avro files """ for freq in Config.frequency_names: - print("Frequency: " + freq) + print('Frequency: ' + freq) generate_product_input( prefix, - f"{version}/{freq}.avro", + f'{version}/{freq}.avro', convert_tf_to_rows( - load_tf_dataset(prefix, f"{version}/{freq}.tfrecords").as_numpy_iterator() + load_tf_dataset( + prefix, f'{version}/{freq}.tfrecords' + ).as_numpy_iterator() ), ) def main(): parser = argparse.ArgumentParser() - parser.add_argument("-c", "--config", required=True, help="Path to config file") + parser.add_argument('-c', '--config', required=True, help='Path to config file') args = parser.parse_args() with open(args.config) as config_file: config = yaml.load(config_file, yaml.loader.SafeLoader) - Config.set_freq_variables(config["sub_day"]) - if "transition" in config: - Config.set_transition(config["transition"]) - + Config.set_freq_variables(config['sub_day']) + if 'transition' in config: + Config.set_transition(config['transition']) - save_tf_dataset(config["prefix"], config["version"], config["options"], config["num_series"]) - generate_product_input_dataset(config["prefix"], config["version"]) + save_tf_dataset( + config['prefix'], config['version'], config['options'], config['num_series'] + ) + generate_product_input_dataset(config['prefix'], config['version']) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/src/synthetic_generation/series_config.py b/src/synthetic_generation/series_config.py index 09c0d06..33b2691 100644 --- a/src/synthetic_generation/series_config.py +++ b/src/synthetic_generation/series_config.py @@ -1,9 +1,10 @@ """ Module containing dataclasses for synthetic data generator """ -import numpy as np from dataclasses import dataclass +import numpy as np + @dataclass class ComponentScale: @@ -17,6 +18,7 @@ class ComponentScale: h: np.ndarray = None minute: np.ndarray = None + @dataclass class ComponentNoise: # shape parameter for the weibull distribution @@ -28,6 +30,7 @@ class ComponentNoise: # no noise can be represented by scale = 0 scale: float + @dataclass class SeriesConfig: scale: ComponentScale @@ -35,4 +38,4 @@ class SeriesConfig: noise_config: ComponentNoise def __str__(self): - return f"L{1000*self.scale.linear:+02.0f}E{10000*(self.scale.exp - 1):+02.0f}A{100*self.scale.a:02.0f}M{100*self.scale.m:02.0f}W{100*self.scale.w:02.0f}" + return f'L{1000*self.scale.linear:+02.0f}E{10000*(self.scale.exp - 1):+02.0f}A{100*self.scale.a:02.0f}M{100*self.scale.m:02.0f}W{100*self.scale.w:02.0f}' diff --git a/src/synthetic_generation/tf_generate_series.py b/src/synthetic_generation/tf_generate_series.py index b66e1aa..511847d 100644 --- a/src/synthetic_generation/tf_generate_series.py +++ b/src/synthetic_generation/tf_generate_series.py @@ -2,17 +2,17 @@ Module to convert process synthetic series using tensorflow """ +from datetime import date +from tempfile import NamedTemporaryFile + import fastavro -import tensorflow_io import numpy as np import pandas as pd import tensorflow as tf -from datetime import date -from tempfile import NamedTemporaryFile -from reainternal.cloud import CloudLocation +from constants import CONTEXT_LENGTH from generate_series import generate -from constants import * -from series_config import * +from reainternal.cloud import CloudLocation +from series_config import PRODUCT_SCHEMA, TF_SCHEMA def tf_generate_n( @@ -28,9 +28,9 @@ def tf_generate_n( for i in range(N): if i % 1000 == 0: - print(f"Completed: {i}") + print(f'Completed: {i}') - if i < N * options.get("linear_random_walk_frac", 0): + if i < N * options.get('linear_random_walk_frac', 0): cfg, sample = generate( size, freq_index=freq_index, @@ -43,7 +43,7 @@ def tf_generate_n( size, freq_index=freq_index, start=start, options=options ) # cfg is the name of the time series - # sample is a pandas dataframe where + # sample is a pandas dataframe where # the index is the datetime object # columns `series_value` and `noise` @@ -51,16 +51,16 @@ def tf_generate_n( yield tf.train.Example( features=tf.train.Features( feature={ - "id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[id_])), - "ts": tf.train.Feature( + 'id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[id_])), + 'ts': tf.train.Feature( int64_list=tf.train.Int64List( value=sample.index.astype(np.int64) ) ), - "y": tf.train.Feature( + 'y': tf.train.Feature( float_list=tf.train.FloatList(value=sample.series_values.values) ), - "noise": tf.train.Feature( + 'noise': tf.train.Feature( float_list=tf.train.FloatList(value=sample.noise.values) ), } @@ -74,7 +74,7 @@ def save_tf_records(prefix: str, dest: str, it): """ with NamedTemporaryFile() as tfile: with tf.io.TFRecordWriter( - tfile.name, options=tf.io.TFRecordOptions(compression_type="GZIP") + tfile.name, options=tf.io.TFRecordOptions(compression_type='GZIP') ) as writer: for record in it: writer.write(record.SerializeToString()) @@ -87,27 +87,25 @@ def decode_fn(record_bytes): def load_tf_dataset(prefix: str, src: str): - return tf.data.TFRecordDataset(prefix + src, compression_type="GZIP").map( - decode_fn - ) + return tf.data.TFRecordDataset(prefix + src, compression_type='GZIP').map(decode_fn) def convert_tf_to_rows(records): for i, r in enumerate(records): if i % 1000 == 0: - print(f"Completed: {i}") - id_ = r["id"].decode() + print(f'Completed: {i}') + id_ = r['id'].decode() for ts, y, noise in zip( - (date.fromtimestamp(v / 1_000_000_000) for v in r["ts"]), - (float(v) for v in r["y"]), - (float(_noise) for _noise in r["noise"]) + (date.fromtimestamp(v / 1_000_000_000) for v in r['ts']), + (float(v) for v in r['y']), + (float(_noise) for _noise in r['noise']), ): - yield {"id": id_, "ts": ts, "y": y, "noise": noise} + yield {'id': id_, 'ts': ts, 'y': y, 'noise': noise} def generate_product_input(prefix: str, dest: str, it): """ Write generated dataset into avro files """ - with CloudLocation(prefix + dest).open(mode="wb") as file: - fastavro.writer(file, PRODUCT_SCHEMA, it, codec="deflate") + with CloudLocation(prefix + dest).open(mode='wb') as file: + fastavro.writer(file, PRODUCT_SCHEMA, it, codec='deflate') diff --git a/src/synthetic_generation/utils.py b/src/synthetic_generation/utils.py index 1cf00b2..d14fc27 100644 --- a/src/synthetic_generation/utils.py +++ b/src/synthetic_generation/utils.py @@ -66,5 +66,5 @@ def get_transition_coefficients(context_length): m = (a + b) / 2 k = 1 / (a - m) * np.log(f_a / (1 - f_a)) - coeff = 1 / (1 + np.exp(-k * (np.arange(1, context_length+1) - m))) + coeff = 1 / (1 + np.exp(-k * (np.arange(1, context_length + 1) - m))) return coeff diff --git a/src/training/config_variables.py b/src/training/config_variables.py index 0140cd2..36841e0 100644 --- a/src/training/config_variables.py +++ b/src/training/config_variables.py @@ -2,6 +2,7 @@ Module containing configuration setting for the script """ + class Config: is_sub_day = False diff --git a/src/training/constants.py b/src/training/constants.py index e535604..6f7f730 100644 --- a/src/training/constants.py +++ b/src/training/constants.py @@ -21,14 +21,14 @@ CONTEXT_LENGTH = 500 TF_SCHEMA = { - "id": tf.io.FixedLenFeature([], dtype=tf.string), - "ts": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64), - "y": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32), - "noise": tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32) + 'id': tf.io.FixedLenFeature([], dtype=tf.string), + 'ts': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.int64), + 'y': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32), + 'noise': tf.io.FixedLenFeature([CONTEXT_LENGTH], dtype=tf.float32), } # constant to reference where the academic_comparison and metalearning folders are # will not be needed for training without validating on these datasets ACADEMIC_HOME = '/home/ubuntu/ForecastPFN/academic_comparison/' -METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/' \ No newline at end of file +METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/' diff --git a/src/training/create_train_test_df.py b/src/training/create_train_test_df.py index 52a53c0..79dac75 100644 --- a/src/training/create_train_test_df.py +++ b/src/training/create_train_test_df.py @@ -2,10 +2,16 @@ Module to create train and test dfs """ import tensorflow as tf -import tensorflow_io -from prepare_dataset import gen_random_single_point, gen_mean_to_random_date, \ - gen_std_to_random_date, filter_unusable_points, build_frames, gen_random_single_point_no_noise, \ - gen_mean_to_random_date_no_noise, gen_std_to_random_date_no_noise +from prepare_dataset import ( + build_frames, + filter_unusable_points, + gen_mean_to_random_date, + gen_mean_to_random_date_no_noise, + gen_random_single_point, + gen_random_single_point_no_noise, + gen_std_to_random_date, + gen_std_to_random_date_no_noise, +) def remove_noise(x, y): @@ -20,10 +26,12 @@ def remove_noise(x, y): 'ts': x['ts'], 'history': x['history'], 'target_ts': x['target_ts'], - 'task': x['task'] - }, y + 'task': x['task'], + }, + y, ) + def create_train_test_df(combined_ds, test_noise=False): """ Method to create a train/test split from the combined_ds @@ -36,20 +44,24 @@ def create_train_test_df(combined_ds, test_noise=False): task_map = { 'point': gen_random_single_point, 'mean': gen_mean_to_random_date, - 'stdev': gen_std_to_random_date + 'stdev': gen_std_to_random_date, } train_tasks_dfs = [ base_train_df.map(func, num_parallel_calls=tf.data.AUTOTUNE) for func in task_map.values() ] - train_df = tf.data.Dataset.choose_from_datasets( - train_tasks_dfs, tf.data.Dataset.range(len(train_tasks_dfs)).repeat() - ).unbatch().filter(filter_unusable_points) + train_df = ( + tf.data.Dataset.choose_from_datasets( + train_tasks_dfs, tf.data.Dataset.range(len(train_tasks_dfs)).repeat() + ) + .unbatch() + .filter(filter_unusable_points) + ) task_map_test = { 'point': gen_random_single_point_no_noise, 'mean': gen_mean_to_random_date_no_noise, - 'stdev': gen_std_to_random_date_no_noise + 'stdev': gen_std_to_random_date_no_noise, } if test_noise: @@ -63,9 +75,13 @@ def create_train_test_df(combined_ds, test_noise=False): for func in task_map_test.values() ] - test_df = tf.data.Dataset.choose_from_datasets( - test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat() - ).unbatch().filter(filter_unusable_points) + test_df = ( + tf.data.Dataset.choose_from_datasets( + test_tasks_dfs, tf.data.Dataset.range(len(test_tasks_dfs)).repeat() + ) + .unbatch() + .filter(filter_unusable_points) + ) # remove noise and target_noise from train and test df as they are now useless # train_df = train_df.map(remove_noise) diff --git a/src/training/metalearned_validation.py b/src/training/metalearned_validation.py index 3c97e24..164027f 100644 --- a/src/training/metalearned_validation.py +++ b/src/training/metalearned_validation.py @@ -1,62 +1,69 @@ +import datetime import sys -ACADEMIC_HOME = '/home/ubuntu/notebooks/ForecastPFN/academic_comparison/' -METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/' -sys.path.append(ACADEMIC_HOME) -sys.path.append(METALEARNED_HOME) -import datetime import numpy as np import pandas as pd import tensorflow as tf -from data_provider.UnivariateTimeseriesSampler_WithStamps import UnivariateTimeseriesSampler_WithStamps -from resources.tourism.dataset import TourismDataset, TourismMeta +from data_provider.UnivariateTimeseriesSampler_WithStamps import ( + UnivariateTimeseriesSampler_WithStamps, +) from resources.m3.dataset import M3Dataset, M3Meta +from resources.tourism.dataset import TourismDataset, TourismMeta + +ACADEMIC_HOME = '/home/ubuntu/notebooks/ForecastPFN/academic_comparison/' +METALEARNED_HOME = ACADEMIC_HOME + 'metalearned/' +sys.path.append(ACADEMIC_HOME) +sys.path.append(METALEARNED_HOME) + def _ForecastPFN_time_features(ts: np.ndarray): if type(ts[0]) == datetime.datetime: year = [x.year for x in ts] month = [x.month for x in ts] day = [x.day for x in ts] - day_of_week = [x.weekday()+1 for x in ts] + day_of_week = [x.weekday() + 1 for x in ts] day_of_year = [x.timetuple().tm_yday for x in ts] return np.stack([year, month, day, day_of_week, day_of_year], axis=-1) ts = pd.to_datetime(ts) - return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) - + return np.stack( + [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1 + ) -def prepare_metalearned_test(metaleanredDataset, metalearnedMeta, sp, p_input_size, p_horizon) -> tf.data.Dataset: +def prepare_metalearned_test( + metaleanredDataset, metalearnedMeta, sp, p_input_size, p_horizon +) -> tf.data.Dataset: target_input, target_output = metaleanredDataset( - METALEARNED_HOME+metalearnedMeta.dataset_path).standard_split() + METALEARNED_HOME + metalearnedMeta.dataset_path + ).standard_split() in_bundle, out_bundle, sp = target_input, target_output, sp - in_bundle = in_bundle.filter( - lambda ts: ts.meta['seasonal_pattern'] == sp) - out_bundle = out_bundle.filter( - lambda ts: ts.meta['seasonal_pattern'] == sp) - + in_bundle = in_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) + out_bundle = out_bundle.filter(lambda ts: ts.meta['seasonal_pattern'] == sp) input_set = in_bundle.values() input_timestamps = in_bundle.time_stamps() - input_set = UnivariateTimeseriesSampler_WithStamps(timeseries=input_set, - time_stamps=input_timestamps, - insample_size=p_input_size, - outsample_size=0, - window_sampling_limit=1, - batch_size=1, - time_features=_ForecastPFN_time_features, - ) + input_set = UnivariateTimeseriesSampler_WithStamps( + timeseries=input_set, + time_stamps=input_timestamps, + insample_size=p_input_size, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + time_features=_ForecastPFN_time_features, + ) p_x, p_x_mask, p_x_timestamps = input_set.sequential_latest_insamples() output_set = out_bundle.values() output_timestamps = out_bundle.time_stamps() - output_set = UnivariateTimeseriesSampler_WithStamps(timeseries=output_set, - time_stamps=output_timestamps, - insample_size=p_horizon, - outsample_size=0, - window_sampling_limit=1, - batch_size=1, - time_features=_ForecastPFN_time_features, - ) + output_set = UnivariateTimeseriesSampler_WithStamps( + timeseries=output_set, + time_stamps=output_timestamps, + insample_size=p_horizon, + outsample_size=0, + window_sampling_limit=1, + batch_size=1, + time_features=_ForecastPFN_time_features, + ) p_y, p_y_mask, p_y_timestamps = output_set.sequential_latest_insamples() x, x_mark, y, y_mark = p_x, p_x_timestamps, p_y, p_y_timestamps @@ -67,9 +74,11 @@ def prepare_metalearned_test(metaleanredDataset, metalearnedMeta, sp, p_input_si task = [] y_out = [] for x, y, x_mark, y_mark in zip(p_x, p_y, p_x_timestamps, p_y_timestamps): - for yi, yi_mark in zip(y,y_mark): + for yi, yi_mark in zip(y, y_mark): if sum(yi_mark): - ts.append(np.append(np.zeros((100 - x_mark.shape[0],5)), x_mark, axis=0)) + ts.append( + np.append(np.zeros((100 - x_mark.shape[0], 5)), x_mark, axis=0) + ) history.append(np.append(np.zeros(100 - x_mark.shape[0]), x)) target_ts.append(np.array([yi_mark])) task.append(1) @@ -77,34 +86,30 @@ def prepare_metalearned_test(metaleanredDataset, metalearnedMeta, sp, p_input_si ts = tf.convert_to_tensor(np.array(ts), dtype=np.int64, name='ts') history = tf.convert_to_tensor(np.array(history), dtype=np.float32, name='history') - target_ts = tf.convert_to_tensor(np.array(target_ts), dtype=np.int64, name='target_ts') + target_ts = tf.convert_to_tensor( + np.array(target_ts), dtype=np.int64, name='target_ts' + ) task = tf.convert_to_tensor(np.array(task), dtype=np.int64, name='task') y = tf.convert_to_tensor(np.array(y_out), dtype=np.float32) - ds = { - 'ts': ts, - 'history': history, - 'target_ts': target_ts, - 'task': task - }, y + ds = {'ts': ts, 'history': history, 'target_ts': target_ts, 'task': task}, y return tf.data.Dataset.from_tensor_slices(ds) # Tourism tourism_yearly_test_df = prepare_metalearned_test( - TourismDataset, TourismMeta, 'Yearly', 8, 4) + TourismDataset, TourismMeta, 'Yearly', 8, 4 +) tourism_quarterly_test_df = prepare_metalearned_test( - TourismDataset, TourismMeta, 'Quarterly', 16, 8) + TourismDataset, TourismMeta, 'Quarterly', 16, 8 +) tourism_monthly_test_df = prepare_metalearned_test( - TourismDataset, TourismMeta, 'Monthly', 48, 24) + TourismDataset, TourismMeta, 'Monthly', 48, 24 +) # M3 -m3_yearly_test_df = prepare_metalearned_test( - M3Dataset, M3Meta, 'M3Year', 12, 6) -m3_quarterly_test_df = prepare_metalearned_test( - M3Dataset, M3Meta, 'M3Quart', 16, 8) -m3_monthly_test_df = prepare_metalearned_test( - M3Dataset, M3Meta, 'M3Month', 36, 18) -m3_others_test_df = prepare_metalearned_test( - M3Dataset, M3Meta, 'M3Other', 16, 8) +m3_yearly_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Year', 12, 6) +m3_quarterly_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Quart', 16, 8) +m3_monthly_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Month', 36, 18) +m3_others_test_df = prepare_metalearned_test(M3Dataset, M3Meta, 'M3Other', 16, 8) diff --git a/src/training/models.py b/src/training/models.py index d45bc3e..4727df1 100644 --- a/src/training/models.py +++ b/src/training/models.py @@ -1,10 +1,11 @@ from typing import Dict + import tensorflow as tf -import tensorflow_io -from tensorflow.keras import layers, Model, Input -from constants import * +from constants import DAY, DOW, MONTH, NUM_TASKS, YEAR from prepare_dataset import position_encoding -from scalers import robust_scaler, max_scaling +from scalers import max_scaling, robust_scaler +from tensorflow.keras import layers + class CustomScaling(layers.Layer): def __init__(self, name): @@ -14,10 +15,10 @@ def __init__(self, name): elif name == 'robust': self.scaler = robust_scaler - def call(self, history_channels, epsilon): return self.scaler(history_channels, epsilon) + class PositionExpansion(layers.Layer): def __init__(self, periods: int, freqs: int, **kwargs): super().__init__(**kwargs) @@ -32,14 +33,23 @@ def call(self, tc): out_shape = tf.shape(tc) return tf.reshape(embedded, [out_shape[0], out_shape[1], self.channels]) + class TransformerBlock(layers.Layer): def __init__(self, key_dim, heads=4, value_dim=None, residual=False, **kwargs): super().__init__(**kwargs) self.attention = layers.MultiHeadAttention( - num_heads=heads, key_dim=key_dim, value_dim=value_dim, name=f'{self.name}_attention') + num_heads=heads, + key_dim=key_dim, + value_dim=value_dim, + name=f'{self.name}_attention', + ) value_dim = value_dim or key_dim - self.ff1 = layers.Dense(4 * heads * value_dim, activation='gelu', name=f'{self.name}_ff1') - self.ff2 = layers.Dense(heads * value_dim, activation='gelu', name=f'{self.name}_ff2') + self.ff1 = layers.Dense( + 4 * heads * value_dim, activation='gelu', name=f'{self.name}_ff1' + ) + self.ff2 = layers.Dense( + heads * value_dim, activation='gelu', name=f'{self.name}_ff2' + ) self.residual = residual if self.residual: self.attn_norm = layers.LayerNormalization(name=f'{self.name}_attn_norm') @@ -52,8 +62,9 @@ def call(self, x, mask): a = self.attention(x, x, attention_mask=mask) a = self.ff1(a) return self.ff2(a) - #na = self.attn_norm(a + x) - #return self.ff_norm(self.ff(na) + na) + # na = self.attn_norm(a + x) + # return self.ff_norm(self.ff(na) + na) + class BaseModel(tf.keras.Model): def __init__(self, epsilon=1e-4, scaler='robust', **kwargs): @@ -64,9 +75,16 @@ def __init__(self, epsilon=1e-4, scaler='robust', **kwargs): self.pos_day = PositionExpansion(31, 6) self.pos_dow = PositionExpansion(7, 4) self.robust_scaler = CustomScaling(scaler) - self.embed_size = sum(emb.channels for emb in (self.pos_year, self.pos_month, self.pos_day, self.pos_dow)) - self.expand_target_nopos = layers.Dense(self.embed_size, name='NoPosEnc', activation='relu') - self.expand_target_forpos = layers.Dense(self.embed_size, name='ForPosEnc', activation='relu') + self.embed_size = sum( + emb.channels + for emb in (self.pos_year, self.pos_month, self.pos_day, self.pos_dow) + ) + self.expand_target_nopos = layers.Dense( + self.embed_size, name='NoPosEnc', activation='relu' + ) + self.expand_target_forpos = layers.Dense( + self.embed_size, name='ForPosEnc', activation='relu' + ) self.concat_pos = layers.Concatenate(axis=-1, name='ConcatPos') self.concat_embed = layers.Concatenate(axis=-1, name='ConcatEmbed') # Will be an embedding when we have different tasks. @@ -83,32 +101,40 @@ def call(self, x: Dict[str, tf.Tensor]): # Build position encodings year = self.tc(ts, YEAR) delta_year = tf.clip_by_value(year[:, -1:] - year, 0, self.pos_year.periods) - pos_embedding = self.concat_pos([ - self.pos_year(delta_year), - self.pos_month(self.tc(ts, MONTH)), - self.pos_day(self.tc(ts, DAY)), - self.pos_dow(self.tc(ts, DOW)), - ]) + pos_embedding = self.concat_pos( + [ + self.pos_year(delta_year), + self.pos_month(self.tc(ts, MONTH)), + self.pos_day(self.tc(ts, DAY)), + self.pos_dow(self.tc(ts, DOW)), + ] + ) mask = year > 0 # Embed history history_channels = tf.expand_dims(history, axis=-1) -# scale = self.max_scaling(history_channels) + self.epsilon -# scaled = history_channels / scale + # scale = self.max_scaling(history_channels) + self.epsilon + # scaled = history_channels / scale scale, scaled = self.robust_scaler(history_channels, self.epsilon) embed_nopos = self.expand_target_nopos(scaled) embed_pos = self.expand_target_forpos(scaled) + pos_embedding embedded = self.concat_embed([embed_nopos, embed_pos]) - # Embed target - target_year = tf.clip_by_value(year[:, -1:] - self.tc(target_ts, YEAR), 0, self.pos_year.periods) - target_pos_embed = tf.squeeze(self.concat_pos([ - self.pos_year(target_year), - self.pos_month(self.tc(target_ts, MONTH)), - self.pos_day(self.tc(target_ts, DAY)), - self.pos_dow(self.tc(target_ts, DOW)), - ]), axis=1) + target_year = tf.clip_by_value( + year[:, -1:] - self.tc(target_ts, YEAR), 0, self.pos_year.periods + ) + target_pos_embed = tf.squeeze( + self.concat_pos( + [ + self.pos_year(target_year), + self.pos_month(self.tc(target_ts, MONTH)), + self.pos_day(self.tc(target_ts, DAY)), + self.pos_dow(self.tc(target_ts, DOW)), + ] + ), + axis=1, + ) task_embed = self.target_marker(task) target = self.concat_embed([task_embed, task_embed + target_pos_embed]) @@ -120,11 +146,21 @@ def call(self, x: Dict[str, tf.Tensor]): def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None): # return super().compute_loss(x, y, y_pred['result'], sample_weight) scale = y_pred['scale'] - return super().compute_loss(x, y / scale, y_pred['result'] / scale, sample_weight) - - def forecast(self, ts: tf.Tensor, mask: tf.Tensor, scale: tf.Tensor, embedded: tf.Tensor, target: tf.Tensor): + return super().compute_loss( + x, y / scale, y_pred['result'] / scale, sample_weight + ) + + def forecast( + self, + ts: tf.Tensor, + mask: tf.Tensor, + scale: tf.Tensor, + embedded: tf.Tensor, + target: tf.Tensor, + ): return NotImplemented + class LSTMModel(BaseModel): def __init__(self, unit=30, **kwargs): super().__init__(**kwargs) @@ -133,7 +169,14 @@ def __init__(self, unit=30, **kwargs): self.combine_target = layers.Concatenate(name='AppendTarget', axis=-1) self.cont_output = layers.Dense(1, name='Output', activation='relu') - def forecast(self, ts: tf.Tensor, mask: tf.Tensor, scale: tf.Tensor, embedded: tf.Tensor, target: tf.Tensor): + def forecast( + self, + ts: tf.Tensor, + mask: tf.Tensor, + scale: tf.Tensor, + embedded: tf.Tensor, + target: tf.Tensor, + ): lstm_out = self.lstm(embedded, mask=mask) with_target = self.combine_target([lstm_out, target]) return self.cont_output(with_target) @@ -160,13 +203,17 @@ def __init__(self, tx_layers=2, **kwargs): # self.encoder2 = TransformerBlock(key_dim=(self.embed_size * 2)) # self.final_output = layers.Dense(1, name='FinalOutput', activation='relu') - def forecast(self, ts: tf.Tensor, mask: tf.Tensor, scale: tf.Tensor, embedded: tf.Tensor, target: tf.Tensor): + def forecast( + self, + ts: tf.Tensor, + mask: tf.Tensor, + scale: tf.Tensor, + embedded: tf.Tensor, + target: tf.Tensor, + ): mask = tf.pad(mask, [[0, 0], [0, 1]], constant_values=True) mask = tf.math.logical_and(tf.expand_dims(mask, 1), tf.expand_dims(mask, -1)) - x = self.concat_target([ - embedded, - tf.expand_dims(target, axis=1) - ]) + x = self.concat_target([embedded, tf.expand_dims(target, axis=1)]) x = self.encoder1(x, mask) x = self.encoder2(x, mask) # x = self.encoder3(x, mask) diff --git a/src/training/noise_ablation.py b/src/training/noise_ablation.py index b8692e3..eeb82fd 100644 --- a/src/training/noise_ablation.py +++ b/src/training/noise_ablation.py @@ -2,34 +2,43 @@ Module to train the model """ -from keras import backend -import yaml -import datetime import argparse -import tensorflow as tf -from tensorflow import keras +import datetime + import numpy as np -import tensorflow_io -from utils import load_tf_dataset -from models import TransformerModel -from create_train_test_df import create_train_test_df +import tensorflow as tf +import yaml from config_variables import Config +from create_train_test_df import create_train_test_df +from keras import backend +from metalearned_validation import ( + m3_monthly_test_df, + m3_others_test_df, + m3_quarterly_test_df, + m3_yearly_test_df, + tourism_monthly_test_df, + tourism_quarterly_test_df, + tourism_yearly_test_df, +) +from models import TransformerModel +from train import AdditionalValidationSets +from utils import load_tf_dataset def get_combined_ds(config): - version = config["version"] + version = config['version'] # all the datasets we have. Ideally we use only 3 of these for trainig # adjust the values in this list accordingly datasets = [ # load_tf_dataset(config["prefix"] + f"{version}/minute.tfrecords"), # load_tf_dataset(config["prefix"] + f"{version}/hourly.tfrecords"), - load_tf_dataset(config["prefix"] + f"{version}/daily.tfrecords"), - load_tf_dataset(config["prefix"] + f"{version}/weekly.tfrecords"), - load_tf_dataset(config["prefix"] + f"{version}/monthly.tfrecords"), + load_tf_dataset(config['prefix'] + f'{version}/daily.tfrecords'), + load_tf_dataset(config['prefix'] + f'{version}/weekly.tfrecords'), + load_tf_dataset(config['prefix'] + f'{version}/monthly.tfrecords'), ] - # # ucomment these lines to use the real world datasets in training + # # uncomment these lines to use the real world datasets in training # tourism_ds = load_tf_dataset(config['prefix'] + 'tourism.tfrecords') # wikiweb_ds = load_tf_dataset(config['prefix'] + 'wikiweb.tfrecords') @@ -45,24 +54,21 @@ def main(): np.random.seed(42) parser = argparse.ArgumentParser() - parser.add_argument("-c", "--config", required=True, help="Path to config file") + parser.add_argument('-c', '--config', required=True, help='Path to config file') args = parser.parse_args() with open(args.config) as config_file: config = yaml.load(config_file, yaml.loader.SafeLoader) - Config.set_sub_day(config["sub_day"]) + Config.set_sub_day(config['sub_day']) combined_ds = get_combined_ds(config) - train_df, test_df = create_train_test_df(combined_ds, config["test_noise"]) - - + train_df, test_df = create_train_test_df(combined_ds, config['test_noise']) model = TransformerModel(scaler=config['scaler']) - def smape(y_true, y_pred): - """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. + """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)` Args: y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. @@ -84,55 +90,58 @@ def smape(y_true, y_pred): # need these two lines, else fit gives error batch_X, batch_y = next(iter(train_df.batch(2).take(1))) - pred_y = model(batch_X) - + model(batch_X) model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=tf.keras.losses.MeanSquaredError(), - metrics=[tf.keras.metrics.MeanAbsolutePercentageError(name='mape'), - tf.keras.metrics.MeanSquaredError(name='mse'), - smape, - ] + metrics=[ + tf.keras.metrics.MeanAbsolutePercentageError(name='mape'), + tf.keras.metrics.MeanSquaredError(name='mse'), + smape, + ], ) + fit_id = '.'.join( + [config['model_save_name'], datetime.datetime.now().strftime('%Y%m%d-%H%M%S')] + ) - fit_id = '.'.join([config["model_save_name"], - datetime.datetime.now().strftime("%Y%m%d-%H%M%S")]) - - logdir = f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}" + logdir = f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}' tbCallback = tf.keras.callbacks.TensorBoard(logdir) - tbCallback._val_dir = logdir+'/validation' + tbCallback._val_dir = logdir + '/validation' callbacks = tf.keras.callbacks.CallbackList( callbacks=[ tf.keras.callbacks.ModelCheckpoint( - config["prefix"] + f"models/{fit_id}/ckpts", monitor="loss", verbose=1 + config['prefix'] + f'models/{fit_id}/ckpts', monitor='loss', verbose=1 ), tf.keras.callbacks.TensorBoard( - f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}" + f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}' ), # tf.keras.callbacks.LearningRateScheduler( # lambda epoch, lr: min(0.001, lr * (epoch + 1)) # ) - AdditionalValidationSets([(tourism_yearly_test_df, 'tourism_yearly'), - (tourism_quarterly_test_df,'tourism_quarterly'), - (tourism_monthly_test_df,'tourism_monthly'), - (m3_yearly_test_df, 'm3_yearly'), - (m3_quarterly_test_df, 'm3_quarterly'), - (m3_monthly_test_df, 'm3_monthly'), - (m3_others_test_df, 'm3_others'), - ], - tbCallback) + AdditionalValidationSets( + [ + (tourism_yearly_test_df, 'tourism_yearly'), + (tourism_quarterly_test_df, 'tourism_quarterly'), + (tourism_monthly_test_df, 'tourism_monthly'), + (m3_yearly_test_df, 'm3_yearly'), + (m3_quarterly_test_df, 'm3_quarterly'), + (m3_monthly_test_df, 'm3_monthly'), + (m3_others_test_df, 'm3_others'), + ], + tbCallback, + ), ], add_history=True, add_progbar=True, model=model, ) - model.fit( - train_df.shuffle(5_000_000, reshuffle_each_iteration=True).batch( - 1024).prefetch(tf.data.AUTOTUNE), + train_df.shuffle(5_000_000, reshuffle_each_iteration=True) + .batch(1024) + .prefetch(tf.data.AUTOTUNE), # train_df.take(1000_000).cache().shuffle(100_000).batch(1024).prefetch(tf.data.AUTOTUNE), validation_data=test_df.batch(1024, drop_remainder=False).cache(), epochs=700, @@ -140,8 +149,8 @@ def smape(y_true, y_pred): callbacks=callbacks, ) - model.save(config["prefix"] + 'models/'+ config["model_save_name"]) + model.save(config['prefix'] + 'models/' + config['model_save_name']) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/src/training/prepare_dataset.py b/src/training/prepare_dataset.py index 4d5aeba..d0fc867 100644 --- a/src/training/prepare_dataset.py +++ b/src/training/prepare_dataset.py @@ -4,13 +4,21 @@ """ from typing import Dict -import pandas as pd + import numpy as np +import pandas as pd import tensorflow as tf -import tensorflow_io from config_variables import Config -from constants import PADDING, HISTORY_LEN, TARGET_LEN, TRIM_LEN, TARGET_INDEX, \ - SINGLE_POINT, MEAN_TO_DATE, STDEV_TO_DATE +from constants import ( + HISTORY_LEN, + MEAN_TO_DATE, + PADDING, + SINGLE_POINT, + STDEV_TO_DATE, + TARGET_INDEX, + TARGET_LEN, + TRIM_LEN, +) def compute_time_features(ts: np.ndarray): @@ -21,21 +29,22 @@ def compute_time_features(ts: np.ndarray): """ ts = pd.to_datetime(ts) if Config.is_sub_day: - return np.stack([ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) - return np.stack([ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1) - + return np.stack( + [ts.minute, ts.hour, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1 + ) + return np.stack( + [ts.year, ts.month, ts.day, ts.day_of_week + 1, ts.day_of_year], axis=-1 + ) @tf.function def build_frames(r: Dict[str, tf.Tensor]): - raw_date_info = tf.numpy_function(compute_time_features, inp=[r['ts']], Tout=tf.int64) + raw_date_info = tf.numpy_function( + compute_time_features, inp=[r['ts']], Tout=tf.int64 + ) date_info = tf.signal.frame( - tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]), - HISTORY_LEN, - 1, - axis=0 - ) - + tf.pad(raw_date_info, [[PADDING, 0], [0, 0]]), HISTORY_LEN, 1, axis=0 + ) history = tf.signal.frame(tf.pad(r['y'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1) noise = tf.signal.frame(tf.pad(r['noise'], [[PADDING, 0]]), HISTORY_LEN, 1, axis=-1) @@ -45,180 +54,220 @@ def build_frames(r: Dict[str, tf.Tensor]): target_noise = tf.signal.frame(r['noise'], TARGET_LEN, 1, axis=-1) start_index = target_values.shape[0] - TRIM_LEN - batch_size = start_index - TARGET_LEN + start_index - TARGET_LEN return ( - date_info[-start_index:-TARGET_LEN], - history[-start_index:-TARGET_LEN], - noise[-start_index:-TARGET_LEN], - target_dates[TARGET_INDEX:], - target_values[TARGET_INDEX:], - target_noise[TARGET_INDEX:] - ) + date_info[-start_index:-TARGET_LEN], + history[-start_index:-TARGET_LEN], + noise[-start_index:-TARGET_LEN], + target_dates[TARGET_INDEX:], + target_values[TARGET_INDEX:], + target_noise[TARGET_INDEX:], + ) @tf.function def gen_random_single_point( - date_info: tf.Tensor, - history: tf.Tensor, - noise: tf.Tensor, - target_dates: tf.Tensor, - target_values: tf.Tensor, - target_noise: tf.Tensor - ): - - + date_info: tf.Tensor, + history: tf.Tensor, + noise: tf.Tensor, + target_dates: tf.Tensor, + target_values: tf.Tensor, + target_noise: tf.Tensor, +): # To limit to a single date batch_size = tf.shape(target_dates)[0] - targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32) + targets = tf.random.uniform( + shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32 + ) target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1) target_value = tf.gather(target_values, targets, axis=1, batch_dims=1) return dict( ts=date_info, - history=history*noise, + history=history * noise, noise=noise, target_ts=target_date, - task=tf.fill([batch_size,], SINGLE_POINT), - target_noise=target_noise + task=tf.fill( + [ + batch_size, + ], + SINGLE_POINT, + ), + target_noise=target_noise, ), target_value @tf.function def gen_mean_to_random_date( - date_info: tf.Tensor, - history: tf.Tensor, - noise: tf.Tensor, - target_dates: tf.Tensor, - target_values: tf.Tensor, - target_noise: tf.Tensor - ): + date_info: tf.Tensor, + history: tf.Tensor, + noise: tf.Tensor, + target_dates: tf.Tensor, + target_values: tf.Tensor, + target_noise: tf.Tensor, +): # To limit to a single date batch_size = tf.shape(target_dates)[0] - targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32) + targets = tf.random.uniform( + shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32 + ) target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1) target_value = tf.math.reduce_mean( - tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), - keepdims=True, - axis=-1 - ) + tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), + keepdims=True, + axis=-1, + ) return dict( ts=date_info, - history=history*noise*.75, + history=history * noise * 0.75, noise=noise, target_ts=target_date, - task=tf.fill([batch_size,], MEAN_TO_DATE), - target_noise=target_noise + task=tf.fill( + [ + batch_size, + ], + MEAN_TO_DATE, + ), + target_noise=target_noise, ), target_value @tf.function def gen_std_to_random_date( - date_info: tf.Tensor, - history: tf.Tensor, - noise: tf.Tensor, - target_dates: tf.Tensor, - target_values: tf.Tensor, - target_noise: tf.Tensor - ): + date_info: tf.Tensor, + history: tf.Tensor, + noise: tf.Tensor, + target_dates: tf.Tensor, + target_values: tf.Tensor, + target_noise: tf.Tensor, +): # To limit to a single date batch_size = tf.shape(target_dates)[0] - targets = tf.random.uniform(shape=[batch_size, 1], minval=(TARGET_LEN // 2), maxval=TARGET_LEN, dtype=tf.int32) + targets = tf.random.uniform( + shape=[batch_size, 1], + minval=(TARGET_LEN // 2), + maxval=TARGET_LEN, + dtype=tf.int32, + ) target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1) target_value = tf.math.reduce_std( - tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), - keepdims=True, - axis=-1 - ) + tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), + keepdims=True, + axis=-1, + ) target_noise_std = tf.math.reduce_std( - tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)), - keepdims=True, - axis=-1 - ) + tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)), + keepdims=True, + axis=-1, + ) target_value = tf.math.sqrt(target_value**2 + target_noise_std**2) return dict( ts=date_info, - history=history*noise, + history=history * noise, noise=noise, target_ts=target_date, - task=tf.fill([batch_size,], STDEV_TO_DATE), - target_noise=target_noise + task=tf.fill( + [ + batch_size, + ], + STDEV_TO_DATE, + ), + target_noise=target_noise, ), target_value + @tf.function def gen_random_single_point_no_noise( - date_info: tf.Tensor, - history: tf.Tensor, - noise: tf.Tensor, - target_dates: tf.Tensor, - target_values: tf.Tensor, - target_noise: tf.Tensor - ): - - + date_info: tf.Tensor, + history: tf.Tensor, + noise: tf.Tensor, + target_dates: tf.Tensor, + target_values: tf.Tensor, + target_noise: tf.Tensor, +): # To limit to a single date batch_size = tf.shape(target_dates)[0] - targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32) + targets = tf.random.uniform( + shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32 + ) target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1) target_value = tf.gather(target_values, targets, axis=1, batch_dims=1) return dict( ts=date_info, history=history, target_ts=target_date, - task=tf.fill([batch_size,], SINGLE_POINT), + task=tf.fill( + [ + batch_size, + ], + SINGLE_POINT, + ), ), target_value @tf.function def gen_mean_to_random_date_no_noise( - date_info: tf.Tensor, - history: tf.Tensor, - noise: tf.Tensor, - target_dates: tf.Tensor, - target_values: tf.Tensor, - target_noise: tf.Tensor - ): + date_info: tf.Tensor, + history: tf.Tensor, + noise: tf.Tensor, + target_dates: tf.Tensor, + target_values: tf.Tensor, + target_noise: tf.Tensor, +): # To limit to a single date batch_size = tf.shape(target_dates)[0] - targets = tf.random.uniform(shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32) + targets = tf.random.uniform( + shape=[batch_size, 1], maxval=TARGET_LEN, dtype=tf.int32 + ) target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1) target_value = tf.math.reduce_mean( - tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), - keepdims=True, - axis=-1 - ) + tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), + keepdims=True, + axis=-1, + ) return dict( ts=date_info, history=history, target_ts=target_date, - task=tf.fill([batch_size,], MEAN_TO_DATE), + task=tf.fill( + [ + batch_size, + ], + MEAN_TO_DATE, + ), ), target_value @tf.function def gen_std_to_random_date_no_noise( - date_info: tf.Tensor, - history: tf.Tensor, - noise: tf.Tensor, - target_dates: tf.Tensor, - target_values: tf.Tensor, - target_noise: tf.Tensor - ): + date_info: tf.Tensor, + history: tf.Tensor, + noise: tf.Tensor, + target_dates: tf.Tensor, + target_values: tf.Tensor, + target_noise: tf.Tensor, +): # To limit to a single date batch_size = tf.shape(target_dates)[0] - targets = tf.random.uniform(shape=[batch_size, 1], minval=(TARGET_LEN // 2), maxval=TARGET_LEN, dtype=tf.int32) + targets = tf.random.uniform( + shape=[batch_size, 1], + minval=(TARGET_LEN // 2), + maxval=TARGET_LEN, + dtype=tf.int32, + ) target_date = tf.gather(target_dates, targets, axis=1, batch_dims=1) target_value = tf.math.reduce_std( - tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), - keepdims=True, - axis=-1 - ) + tf.RaggedTensor.from_tensor(target_values, lengths=(targets[:, 0] + 1)), + keepdims=True, + axis=-1, + ) target_noise_std = tf.math.reduce_std( - tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)), - keepdims=True, - axis=-1 - ) + tf.RaggedTensor.from_tensor(target_noise, lengths=(targets[:, 0] + 1)), + keepdims=True, + axis=-1, + ) target_value = tf.math.sqrt(target_value**2 + target_noise_std**2) @@ -226,9 +275,15 @@ def gen_std_to_random_date_no_noise( ts=date_info, history=history, target_ts=target_date, - task=tf.fill([batch_size,], STDEV_TO_DATE), + task=tf.fill( + [ + batch_size, + ], + STDEV_TO_DATE, + ), ), target_value + @tf.function def filter_unusable_points(X: Dict[str, tf.Tensor], y: tf.Tensor): """ @@ -236,8 +291,17 @@ def filter_unusable_points(X: Dict[str, tf.Tensor], y: tf.Tensor): """ return tf.logical_and(tf.reduce_max(X['history']) > 0.1, tf.math.is_finite(y))[0] + def position_encoding(periods: int, freqs: int): - return np.hstack([ - np.fromfunction(lambda i, j: np.sin(np.pi / periods * (2**j) * (i-1)), (periods + 1, freqs)), - np.fromfunction(lambda i, j: np.cos(np.pi / periods * (2**j) * (i-1)), (periods + 1, freqs)) - ]) \ No newline at end of file + return np.hstack( + [ + np.fromfunction( + lambda i, j: np.sin(np.pi / periods * (2**j) * (i - 1)), + (periods + 1, freqs), + ), + np.fromfunction( + lambda i, j: np.cos(np.pi / periods * (2**j) * (i - 1)), + (periods + 1, freqs), + ), + ] + ) diff --git a/src/training/scalers.py b/src/training/scalers.py index 5eed7f0..ba5c510 100644 --- a/src/training/scalers.py +++ b/src/training/scalers.py @@ -2,9 +2,8 @@ Module containing different scaler functions """ import tensorflow as tf -import tensorflow_io from tensorflow.keras import layers -import numpy as np + def robust_scaler(inputs, epsilon): # inputs.shape = (batch_size, history_len, 1) @@ -29,14 +28,10 @@ def robust_scaler(inputs, epsilon): # calculate mean and std of clipped data clipped_mean = tf.math.reduce_mean( - clipped_and_masked, - axis=1, - keepdims=True + clipped_and_masked, axis=1, keepdims=True ).to_tensor() clipped_std = tf.math.reduce_std( - clipped_and_masked, - axis=1, - keepdims=True + clipped_and_masked, axis=1, keepdims=True ).to_tensor() # scale is of shape (batch_size,1,1) @@ -51,10 +46,8 @@ def robust_scaler(inputs, epsilon): def max_scaling(inputs, epsilon): - scaler = layers.GlobalMaxPooling1D(name='MaxScaling', keepdims=1) scale = scaler(inputs) + epsilon output = inputs / scale return scale, output - diff --git a/src/training/train.py b/src/training/train.py index 3271f6e..eb46e7b 100644 --- a/src/training/train.py +++ b/src/training/train.py @@ -2,30 +2,28 @@ Module to train the model """ -from keras import backend -import yaml -import datetime import argparse -import tensorflow as tf -from tensorflow import keras +import datetime + import numpy as np -import tensorflow_io -from utils import load_tf_dataset -from models import TransformerModel -from create_train_test_df import create_train_test_df +import tensorflow as tf +import yaml from config_variables import Config -from metalearned_validation import prepare_metalearned_test +from create_train_test_df import create_train_test_df +from keras import backend +from models import TransformerModel +from utils import load_tf_dataset def get_combined_ds(config): - version = config["version"] + version = config['version'] # all the datasets we have. Ideally we use only 3 of these for trainig # adjust the values in this list accordingly datasets = [ - load_tf_dataset(config["prefix"] + f"{version}/daily.tfrecords"), - load_tf_dataset(config["prefix"] + f"{version}/weekly.tfrecords"), - load_tf_dataset(config["prefix"] + f"{version}/monthly.tfrecords"), + load_tf_dataset(config['prefix'] + f'{version}/daily.tfrecords'), + load_tf_dataset(config['prefix'] + f'{version}/weekly.tfrecords'), + load_tf_dataset(config['prefix'] + f'{version}/monthly.tfrecords'), ] combined_ds = tf.data.Dataset.choose_from_datasets( @@ -40,21 +38,21 @@ def main(): np.random.seed(42) parser = argparse.ArgumentParser() - parser.add_argument("-c", "--config", required=True, help="Path to config file") + parser.add_argument('-c', '--config', required=True, help='Path to config file') args = parser.parse_args() with open(args.config) as config_file: config = yaml.load(config_file, yaml.loader.SafeLoader) - Config.set_sub_day(config["sub_day"]) + Config.set_sub_day(config['sub_day']) combined_ds = get_combined_ds(config) - train_df, test_df = create_train_test_df(combined_ds, config["test_noise"]) + train_df, test_df = create_train_test_df(combined_ds, config['test_noise']) model = TransformerModel(scaler=config['scaler']) def smape(y_true, y_pred): - """ Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. + """Calculate Armstrong's original definition of sMAPE between `y_true` & `y_pred`. `loss = 200 * mean(abs((y_true - y_pred) / (y_true + y_pred), axis=-1)` Args: y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. @@ -72,20 +70,19 @@ def smape(y_true, y_pred): # need these two lines, else fit gives error batch_X, batch_y = next(iter(train_df.batch(2).take(1))) - pred_y = model(batch_X) - + model(batch_X) model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=tf.keras.losses.MeanSquaredError(), - metrics=[tf.keras.metrics.MeanAbsolutePercentageError(name='mape'), - tf.keras.metrics.MeanSquaredError(name='mse'), - smape, - ] + metrics=[ + tf.keras.metrics.MeanAbsolutePercentageError(name='mape'), + tf.keras.metrics.MeanSquaredError(name='mse'), + smape, + ], ) class AdditionalValidationSets(tf.keras.callbacks.Callback): - def __init__(self, validation_sets, tbCallback, verbose=1, batch_size=1): """ :param validation_sets: @@ -126,35 +123,35 @@ def on_epoch_end(self, epoch, logs=None): sample_weights = None else: raise ValueError() - + print(validation_set_name) - results = self.model.evaluate(x=validation_data, - verbose=self.verbose, - sample_weight=sample_weights, - batch_size=self.batch_size) + results = self.model.evaluate( + x=validation_data, + verbose=self.verbose, + sample_weight=sample_weights, + batch_size=self.batch_size, + ) for metric, result in zip(self.model.metrics_names, results): valuename = validation_set_name + '_' + metric self.history.setdefault(valuename, []).append(result) with self.tbCallback._val_writer.as_default(step=epoch): - tf.summary.scalar(valuename, result) - - + tf.summary.scalar(valuename, result) + fit_id = '.'.join( + [config['model_save_name'], datetime.datetime.now().strftime('%Y%m%d-%H%M%S')] + ) - fit_id = '.'.join([config["model_save_name"], - datetime.datetime.now().strftime("%Y%m%d-%H%M%S")]) - - logdir = f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}" + logdir = f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}' tbCallback = tf.keras.callbacks.TensorBoard(logdir) - tbCallback._val_dir = logdir+'/validation' + tbCallback._val_dir = logdir + '/validation' callbacks = tf.keras.callbacks.CallbackList( callbacks=[ tf.keras.callbacks.ModelCheckpoint( - config["prefix"] + f"models/{fit_id}/ckpts", monitor="loss", verbose=1 + config['prefix'] + f'models/{fit_id}/ckpts', monitor='loss', verbose=1 ), tf.keras.callbacks.TensorBoard( - f"/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}" + f'/home/ubuntu/tensorboard/notebook/pretrained/{fit_id}' ), ], add_history=True, @@ -162,10 +159,10 @@ def on_epoch_end(self, epoch, logs=None): model=model, ) - model.fit( - train_df.shuffle(5_000, reshuffle_each_iteration=True).batch( - 1024).prefetch(tf.data.AUTOTUNE), + train_df.shuffle(5_000, reshuffle_each_iteration=True) + .batch(1024) + .prefetch(tf.data.AUTOTUNE), # train_df.take(1000_000).cache().shuffle(100_000).batch(1024).prefetch(tf.data.AUTOTUNE), validation_data=test_df.batch(1024, drop_remainder=False).cache(), epochs=700, @@ -173,8 +170,8 @@ def on_epoch_end(self, epoch, logs=None): callbacks=callbacks, ) - model.save(config["prefix"] + 'models/'+ config["model_save_name"]) + model.save(config['prefix'] + 'models/' + config['model_save_name']) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/src/training/utils.py b/src/training/utils.py index e34a4d7..3445d5b 100644 --- a/src/training/utils.py +++ b/src/training/utils.py @@ -2,9 +2,9 @@ Utility functions for training script """ import tensorflow as tf -import tensorflow_io from constants import TF_SCHEMA + def decode_fn(record_bytes): """ Method to process bytes from tfrecord files