We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hi, I want to use a multivariate series.
So, I have for example t2m and sm100 data. I want to use both in order to train the model but predict on t2m.
t2m
sm100
I tried to use the data generator from here but when when I call the fit method, it throws
ValueError: applied function returned data with unexpected number of dimensions. Received 1 dimension(s) but expected 0 dimensions with names: ()
at the line y = self.data.isel(forecast_time=idxs + self.lead_time).values in __getitem__ method.
y = self.data.isel(forecast_time=idxs + self.lead_time).values
__getitem__
Also, not that in the data generator , I have commented out the lines
self.n_samples = self.data.isel(forecast_time=slice(0, -lead_time)).shape[0] self.init_time = self.data.isel(forecast_time=slice(None, -lead_time)).forecast_time self.valid_time = self.data.isel(forecast_time=slice(lead_time, None)).forecast_time
( I am using self.n_samples = self.data.forecast_time.size instead)
self.n_samples = self.data.forecast_time.size
If I use it , it throws me:
TypeError: 'DataArray' object cannot be interpreted as an integer
Any ideas about that? Thanks!
the code:
import xarray as xr import tensorflow as tf import numpy as np from collections import OrderedDict #from tensorflow.keras.layers import Input, Conv2D, Dense from tensorflow.keras import Model, Sequential from tensorflow.keras.layers import Dense, Dropout, LSTM, Input, Conv2D, TimeDistributed,\ MaxPooling2D, Flatten, RepeatVector, Reshape, Lambda, GlobalAveragePooling2D,\ Bidirectional, ConvLSTM2D, BatchNormalization t2m = xr.open_dataset("/home/ggousios/s2s-ai-challenge-agroapps/t2m.nc") sm100 = xr.open_dataset("/home/ggousios/s2s-ai-challenge-agroapps/sm100.nc") lead_time = t2m.isel(lead_time=0).lead_time class PeriodicPadding2D(tf.keras.layers.Layer): def __init__(self, pad_width, **kwargs): super().__init__(**kwargs) self.pad_width = pad_width def call(self, inputs, **kwargs): if self.pad_width == 0: return inputs inputs_padded = tf.concat( [inputs[:, :, -self.pad_width:, :], inputs, inputs[:, :, :self.pad_width, :]], axis=2) # Zero padding in the lat direction inputs_padded = tf.pad(inputs_padded, [[0, 0], [self.pad_width, self.pad_width], [0, 0], [0, 0]]) return inputs_padded def get_config(self): config = super().get_config() config.update({'pad_width': self.pad_width}) return config class PeriodicConv2D(tf.keras.layers.Layer): def __init__(self, filters, kernel_size, conv_kwargs={}, **kwargs, ): super().__init__(**kwargs) self.filters = filters self.kernel_size = kernel_size self.conv_kwargs = conv_kwargs if type(kernel_size) is not int: assert kernel_size[0] == kernel_size[1], \ 'PeriodicConv2D only works for square kernels' kernel_size = kernel_size[0] pad_width = (kernel_size - 1) // 2 self.padding = PeriodicPadding2D(pad_width) self.conv = Conv2D( filters, kernel_size, padding='valid', **conv_kwargs ) def call(self, inputs): return self.conv(self.padding(inputs)) def get_config(self): config = super().get_config() config.update({'filters': self.filters, 'kernel_size': self.kernel_size, 'conv_kwargs': self.conv_kwargs}) return config class DataGenerator(tf.keras.utils.Sequence): def __init__(self, ds, var_dict, lead_time, batch_size, shuffle=True, load=True, mean=None, std=None): """ Data generator for WeatherBench data. Template from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly Args: ds: Dataset containing all variables var_dict: Dictionary of the form {'var': level}. Use None for level if data is of single level lead_time: Lead time in hours batch_size: Batch size shuffle: bool. If True, data is shuffled. load: bool. If True, datadet is loaded into RAM. mean: If None, compute mean from data. std: If None, compute standard deviation from data. """ self.ds = ds self.var_dict = var_dict self.batch_size = batch_size self.shuffle = shuffle self.lead_time = lead_time data = [] generic_level = xr.DataArray([1])#, coords={'realization': [1]}, dims=['realization']) for var, levels in var_dict.items(): #try: data.append(ds[var])#.sel(realization=levels)) #except ValueError: # data.append(ds[var].expand_dims({'realization': generic_level}, 1)) self.data = xr.concat(data, 'realization').transpose('forecast_time', ...) self.mean = self.data.mean(('forecast_time')).compute() if mean is None else mean self.std = self.data.std('forecast_time').compute() if std is None else std # Normalize self.data = (self.data - self.mean) / self.std #self.n_samples = self.data.isel(forecast_time=slice(0, -lead_time)).shape[0] self.n_samples = self.data.forecast_time.size # self.init_time = self.data.isel(forecast_time=slice(None, -lead_time)).forecast_time # self.valid_time = self.data.isel(forecast_time=slice(lead_time, None)).forecast_time self.on_epoch_end() # For some weird reason calling .load() earlier messes up the mean and std computations if load: print('Loading data into RAM'); self.data.load() def __len__(self): 'Denotes the number of batches per epoch' return int(np.ceil(self.n_samples / self.batch_size)) def __getitem__(self, i): 'Generate one batch of data' idxs = self.idxs[i * self.batch_size:(i + 1) * self.batch_size] X = self.data.isel(forecast_time=idxs).values y = self.data.isel(forecast_time=idxs + self.lead_time).values return X, y def on_epoch_end(self): 'Updates indexes after each epoch' self.idxs = np.arange(self.n_samples) if self.shuffle == True: np.random.shuffle(self.idxs) datasets = [t2m, sm100] ds = xr.merge(datasets) # I am choosing only t2m (not tp) and sm100 here dic = OrderedDict({'t2m': None, 'sm100': None}) dg_train = DataGenerator( ds.sel(forecast_time=slice('2000', '2001')), dic, lead_time=lead_time, batch_size=8, load=True) dg_valid = DataGenerator( ds.sel(forecast_time=slice('2018', '2019')), dic, lead_time=lead_time, batch_size=8, mean=dg_train.mean, std=dg_train.std, shuffle=False) def custom_categ_crossentropy(y_true, y_pred, sample_weight=None): y_true = tf.one_hot(tf.cast(y_true,'int32'), depth=3) loss = tf.keras.losses.CategoricalCrossentropy()(y_true, y_pred) return loss def build_cnn(filters, kernels, input_shape): inputs = Input(batch_shape=(8, 121, 240, 1)) print(inputs.shape) x = (PeriodicConv2D(filters, kernels, conv_kwargs={'activation':'relu'}))((inputs)) x = PeriodicConv2D(32, 5, conv_kwargs={'activation':'relu'})(x) output = Dense(3, activation='softmax')(x) model = Model(inputs, output) model.compile(optimizer='sgd', loss=custom_categ_crossentropy, metrics=['accuracy']) print(model.summary()) return model def fit(): model = build_cnn(64, 9, (121, 240, 1)) history = model.fit(dg_train, epochs=10, validation_data=dg_valid) return history history = fit()
The data
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hi, I want to use a multivariate series.
So, I have for example
t2m
andsm100
data. I want to use both in order to train the model but predict ont2m
.I tried to use the data generator from here but when when I call the fit method, it throws
at the line
y = self.data.isel(forecast_time=idxs + self.lead_time).values
in__getitem__
method.Also, not that in the data generator , I have commented out the lines
( I am using
self.n_samples = self.data.forecast_time.size
instead)If I use it , it throws me:
TypeError: 'DataArray' object cannot be interpreted as an integer
Any ideas about that?
Thanks!
the code:
The data
The text was updated successfully, but these errors were encountered: