From e9a3193a4f433c4458b4301fc0b1a2ba5155e11c Mon Sep 17 00:00:00 2001 From: Rajveer Rathod <64583161+rajveer43@users.noreply.github.com> Date: Fri, 4 Aug 2023 12:44:08 +0000 Subject: [PATCH 01/31] docstring_update --- lightwood/helpers/torch.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/lightwood/helpers/torch.py b/lightwood/helpers/torch.py index 807519ddd..58c0f551e 100644 --- a/lightwood/helpers/torch.py +++ b/lightwood/helpers/torch.py @@ -13,7 +13,7 @@ def concat_vectors_and_pad(vec_list, max_): pad_size = max_ - len(vec_list) padding = (0, pad_size * vec_list[0].size(0)) - padded = pad(cat_vec[None], padding, 'constant', 0)[0] + padded = pad(cat_vec[None], xpadding, 'constant', 0)[0] return padded @@ -27,6 +27,25 @@ class LightwoodAutocast: """ Equivalent to torch.cuda.amp.autocast, but checks device compute capability to activate the feature only when the GPU has tensor cores to leverage AMP. + + **Attributes:** + + * `active` (bool): Whether AMP is currently active. + + **Methods:** + + * `__init__(self, enabled=True)`: Initializes the class and sets the initial value of `active`. + * `__enter__()`: Enters the context manager and enables AMP if it is not already enabled. + * `__exit__()`: Exits the context manager and disables AMP. + * `__call__(self, func)`: Returns a decorated function that enables AMP when it is called. + + **Usage:** + + ```python + >>> import lightwood.helpers.torch as lt + >>> with lt.LightwoodAutocast(): + ... # This code will be executed in AMP mode. + ... pass """ active = False From 370f0603dfa7caeae540ff698259c67fbdf03218 Mon Sep 17 00:00:00 2001 From: Rajveer Rathod <64583161+rajveer43@users.noreply.github.com> Date: Wed, 9 Aug 2023 09:16:42 +0000 Subject: [PATCH 02/31] doc: Update docstrings for better readability --- lightwood/helpers/device.py | 56 ++++++++++++++++++++++++++++++++++--- lightwood/helpers/torch.py | 14 +++++++++- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/lightwood/helpers/device.py b/lightwood/helpers/device.py index 8b1864231..a27f7d544 100644 --- a/lightwood/helpers/device.py +++ b/lightwood/helpers/device.py @@ -5,6 +5,22 @@ def is_cuda_compatible(): + """ + Check if the system has CUDA-compatible devices with the required architecture and compiled CUDA version. + + This function checks the compatibility of CUDA devices available on the system by comparing their architectures + and the compiled CUDA version. It iterates through the available devices and verifies if their architectures meet + the minimum requirement specified by the function, and also checks if the compiled CUDA version is greater than + a specific version. + + Returns: + bool: True if there are compatible CUDA devices, otherwise False. + + Example: + >>> is_compatible = is_cuda_compatible() + >>> print(is_compatible) + True + """ compatible_device_count = 0 if torch.version.cuda is not None: for d in range(device_count()): @@ -23,6 +39,25 @@ def is_cuda_compatible(): def get_devices(): + """ + Get the appropriate Torch device(s) based on CUDA availability and compatibility. + + This function determines the appropriate Torch device(s) to be used for computations based on the availability of + CUDA and compatible devices. It checks if CUDA is available and if the available CUDA devices are compatible + according to the 'is_cuda_compatible()' function. If compatible devices are found, the function selects either + the first available CUDA device or a randomly selected one based on the 'RANDOM_GPU' environment variable. + If CUDA is not available or no compatible devices are found, the function returns the CPU device. + + Returns: + Tuple: A tuple containing the selected Torch device and the number of available devices. + + Example: + >>> device, num_devices = get_devices() + >>> print(device) + cuda:0 + >>> print(num_devices) + 1 + """ if torch.cuda.is_available() and is_cuda_compatible(): device_str = "cuda" available_devices = torch.cuda.device_count() @@ -40,10 +75,23 @@ def get_devices(): def get_device_from_name(device_name=''): """ - Returns the device specified as an argument. - If the argument is left empty it will returns the output of get_devices(). - - :param device_name: name of the device to use (default is an empty string), if is an empty string will use the output of get_devices() instead") + Get a Torch device based on the specified device name or default behavior. + + This function returns a Torch device based on the specified device name or the default behavior, which is to return + the output of the 'get_devices()' function. If a device name is provided, the function creates a Torch device + based on that name. If the device name is left empty, the function returns the primary device determined by + 'get_devices()' function. + + Args: + device_name (str, optional): Name of the device to use. Default is an empty string. + + Returns: + torch.device: The selected Torch device. + + Example: + >>> device = get_device_from_name('cuda:1') + >>> print(device) + cuda:1 """ # noqa E501 if(device_name != ''): device = torch.device(device_name) diff --git a/lightwood/helpers/torch.py b/lightwood/helpers/torch.py index 58c0f551e..d6d599e8a 100644 --- a/lightwood/helpers/torch.py +++ b/lightwood/helpers/torch.py @@ -13,7 +13,7 @@ def concat_vectors_and_pad(vec_list, max_): pad_size = max_ - len(vec_list) padding = (0, pad_size * vec_list[0].size(0)) - padded = pad(cat_vec[None], xpadding, 'constant', 0)[0] + padded = pad(cat_vec[None], padding, 'constant', 0)[0] return padded @@ -50,6 +50,9 @@ class LightwoodAutocast: active = False def __init__(self, enabled=True): + """ + Enters the context manager and enables AMP if it is not already enabled. + """ self.major = 0 # GPU major version torch_version = [int(i) for i in torch.__version__.split('.')[:-1]] @@ -69,12 +72,18 @@ def __init__(self, enabled=True): LightwoodAutocast.active = self._enabled def __enter__(self): + """ + * `__enter__()`: Enters the context manager and enables AMP if it is not already enabled. + """ if self._enabled: self.prev = torch.is_autocast_enabled() torch.set_autocast_enabled(self._enabled) torch.autocast_increment_nesting() def __exit__(self, *args): + """ + * `__exit__()`: Exits the context manager and disables AMP. + """ if self._enabled: # Drop the cache when we exit to a nesting level that's outside any instance of autocast if torch.autocast_decrement_nesting() == 0: @@ -83,6 +92,9 @@ def __exit__(self, *args): return False def __call__(self, func): + """ + * `__call__(self, func)`: Returns a decorated function that enables AMP when it is called. + """ @functools.wraps(func) def decorate_autocast(*args, **kwargs): with self: From f016882ba02821208e4dcc538519cd9481e76571 Mon Sep 17 00:00:00 2001 From: Rajveer Rathod <64583161+rajveer43@users.noreply.github.com> Date: Fri, 11 Aug 2023 06:15:46 +0000 Subject: [PATCH 03/31] doc update 3 --- lightwood/helpers/device.py | 36 ++++++++++++++++++++---------------- lightwood/helpers/torch.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/lightwood/helpers/device.py b/lightwood/helpers/device.py index a27f7d544..64bb1dd26 100644 --- a/lightwood/helpers/device.py +++ b/lightwood/helpers/device.py @@ -6,12 +6,14 @@ def is_cuda_compatible(): """ - Check if the system has CUDA-compatible devices with the required architecture and compiled CUDA version. + Check if the system has CUDA-compatible devices with the required architecture and + compiled CUDA version. - This function checks the compatibility of CUDA devices available on the system by comparing their architectures - and the compiled CUDA version. It iterates through the available devices and verifies if their architectures meet - the minimum requirement specified by the function, and also checks if the compiled CUDA version is greater than - a specific version. + This function checks the compatibility of CUDA devices available on the system by + comparing their architectures and the compiled CUDA version. It iterates through + the available devices and verifies if their architectures meet the minimum + requirement specified by the function, and also checks if the compiled CUDA version + is greater than a specific version. Returns: bool: True if there are compatible CUDA devices, otherwise False. @@ -42,14 +44,17 @@ def get_devices(): """ Get the appropriate Torch device(s) based on CUDA availability and compatibility. - This function determines the appropriate Torch device(s) to be used for computations based on the availability of - CUDA and compatible devices. It checks if CUDA is available and if the available CUDA devices are compatible - according to the 'is_cuda_compatible()' function. If compatible devices are found, the function selects either - the first available CUDA device or a randomly selected one based on the 'RANDOM_GPU' environment variable. - If CUDA is not available or no compatible devices are found, the function returns the CPU device. + This function determines the appropriate Torch device(s) to be used for computations + based on the availability of CUDA and compatible devices. It checks if CUDA is + available and if the available CUDA devices are compatible according to the + 'is_cuda_compatible()' function. If compatible devices are found, the function + selects either the first available CUDA device or a randomly selected one based on + the 'RANDOM_GPU' environment variable. If CUDA is not available or no compatible + devices are found, the function returns the CPU device. Returns: - Tuple: A tuple containing the selected Torch device and the number of available devices. + Tuple: A tuple containing the selected Torch device and the number of available + devices. Example: >>> device, num_devices = get_devices() @@ -77,13 +82,12 @@ def get_device_from_name(device_name=''): """ Get a Torch device based on the specified device name or default behavior. - This function returns a Torch device based on the specified device name or the default behavior, which is to return - the output of the 'get_devices()' function. If a device name is provided, the function creates a Torch device - based on that name. If the device name is left empty, the function returns the primary device determined by - 'get_devices()' function. + This function returns a Torch device based on the specified device name or the + default behavior, which is to return the output of the 'get_devices()' function. Args: - device_name (str, optional): Name of the device to use. Default is an empty string. + device_name (str, optional): Name of the device to use. Default is an empty + string. Returns: torch.device: The selected Torch device. diff --git a/lightwood/helpers/torch.py b/lightwood/helpers/torch.py index d6d599e8a..12ffd6503 100644 --- a/lightwood/helpers/torch.py +++ b/lightwood/helpers/torch.py @@ -5,6 +5,32 @@ def concat_vectors_and_pad(vec_list, max_): + """ + Concatenates a list of input vectors and pads them to match a specified maximum + length. + + This function takes a list of input vectors, concatenates them along a specified + dimension (dim=0), and then pads the concatenated vector to achieve a specified + maximum length. The padding is done with zeros. + + Args: + vec_list (list of torch.Tensor): List of input vectors to concatenate and pad. + max_len (int): The maximum length of the concatenated and padded vector. + + Returns: + torch.Tensor: The concatenated and padded vector. + + Raises: + AssertionError: If the length of 'vec_list' is not greater than 0, or if it + exceeds 'max_len', or if 'max_len' is not greater than 0. + + Example: + >>> input_tensors = [torch.tensor([1, 2]), torch.tensor([3, 4, 5])] + >>> max_length = 5 + >>> concatenated_padded = concat_vectors_and_pad(input_tensors, max_length) + >>> print(concatenated_padded) + tensor([1, 2, 3, 4, 5]) + """ assert len(vec_list) > 0 assert len(vec_list) <= max_ assert max_ > 0 @@ -30,14 +56,8 @@ class LightwoodAutocast: **Attributes:** - * `active` (bool): Whether AMP is currently active. - - **Methods:** - - * `__init__(self, enabled=True)`: Initializes the class and sets the initial value of `active`. - * `__enter__()`: Enters the context manager and enables AMP if it is not already enabled. - * `__exit__()`: Exits the context manager and disables AMP. - * `__call__(self, func)`: Returns a decorated function that enables AMP when it is called. + * `active` (bool): Whether AMP is currently active. This attribute is at the class + level **Usage:** From 788fbc1f9d1328951207b142e63c9d71117230d5 Mon Sep 17 00:00:00 2001 From: Rajveer Rathod <64583161+rajveer43@users.noreply.github.com> Date: Fri, 11 Aug 2023 06:26:52 +0000 Subject: [PATCH 04/31] blank space update --- lightwood/helpers/device.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lightwood/helpers/device.py b/lightwood/helpers/device.py index 64bb1dd26..71aa14d02 100644 --- a/lightwood/helpers/device.py +++ b/lightwood/helpers/device.py @@ -6,12 +6,12 @@ def is_cuda_compatible(): """ - Check if the system has CUDA-compatible devices with the required architecture and + Check if the system has CUDA-compatible devices with the required architecture and compiled CUDA version. - This function checks the compatibility of CUDA devices available on the system by - comparing their architectures and the compiled CUDA version. It iterates through - the available devices and verifies if their architectures meet the minimum + This function checks the compatibility of CUDA devices available on the system by + comparing their architectures and the compiled CUDA version. It iterates through + the available devices and verifies if their architectures meet the minimum requirement specified by the function, and also checks if the compiled CUDA version is greater than a specific version. @@ -44,12 +44,12 @@ def get_devices(): """ Get the appropriate Torch device(s) based on CUDA availability and compatibility. - This function determines the appropriate Torch device(s) to be used for computations - based on the availability of CUDA and compatible devices. It checks if CUDA is - available and if the available CUDA devices are compatible according to the - 'is_cuda_compatible()' function. If compatible devices are found, the function + This function determines the appropriate Torch device(s) to be used for + computations based on the availability of CUDA and compatible devices. It checks + if CUDA is available and if the available CUDA devices are compatible according to + the 'is_cuda_compatible()' function. If compatible devices are found, the function selects either the first available CUDA device or a randomly selected one based on - the 'RANDOM_GPU' environment variable. If CUDA is not available or no compatible + the 'RANDOM_GPU' environment variable. If CUDA is not available or no compatible devices are found, the function returns the CPU device. Returns: @@ -82,11 +82,11 @@ def get_device_from_name(device_name=''): """ Get a Torch device based on the specified device name or default behavior. - This function returns a Torch device based on the specified device name or the + This function returns a Torch device based on the specified device name or the default behavior, which is to return the output of the 'get_devices()' function. Args: - device_name (str, optional): Name of the device to use. Default is an empty + device_name (str, optional): Name of the device to use. Default is an empty string. Returns: From 85785d696e281db3094a7e5eacab0654d61213ba Mon Sep 17 00:00:00 2001 From: Rajveer Rathod <64583161+rajveer43@users.noreply.github.com> Date: Fri, 11 Aug 2023 06:31:04 +0000 Subject: [PATCH 05/31] flake8 erors resolved --- lightwood/helpers/device.py | 9 ++++----- lightwood/helpers/torch.py | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/lightwood/helpers/device.py b/lightwood/helpers/device.py index 71aa14d02..8198ab07c 100644 --- a/lightwood/helpers/device.py +++ b/lightwood/helpers/device.py @@ -12,8 +12,8 @@ def is_cuda_compatible(): This function checks the compatibility of CUDA devices available on the system by comparing their architectures and the compiled CUDA version. It iterates through the available devices and verifies if their architectures meet the minimum - requirement specified by the function, and also checks if the compiled CUDA version - is greater than a specific version. + requirement specified by the function, and also checks if the compiled CUDA + version is greater than a specific version. Returns: bool: True if there are compatible CUDA devices, otherwise False. @@ -48,14 +48,13 @@ def get_devices(): computations based on the availability of CUDA and compatible devices. It checks if CUDA is available and if the available CUDA devices are compatible according to the 'is_cuda_compatible()' function. If compatible devices are found, the function - selects either the first available CUDA device or a randomly selected one based on + selects either the first available CUDA device or a randomly selected one based on the 'RANDOM_GPU' environment variable. If CUDA is not available or no compatible devices are found, the function returns the CPU device. Returns: - Tuple: A tuple containing the selected Torch device and the number of available + Tuple: A tuple containing the selected Torch device and the number of available devices. - Example: >>> device, num_devices = get_devices() >>> print(device) diff --git a/lightwood/helpers/torch.py b/lightwood/helpers/torch.py index 12ffd6503..2a10356a7 100644 --- a/lightwood/helpers/torch.py +++ b/lightwood/helpers/torch.py @@ -6,11 +6,11 @@ def concat_vectors_and_pad(vec_list, max_): """ - Concatenates a list of input vectors and pads them to match a specified maximum + Concatenates a list of input vectors and pads them to match a specified maximum length. - This function takes a list of input vectors, concatenates them along a specified - dimension (dim=0), and then pads the concatenated vector to achieve a specified + This function takes a list of input vectors, concatenates them along a specified + dimension (dim=0), and then pads the concatenated vector to achieve a specified maximum length. The padding is done with zeros. Args: @@ -21,7 +21,7 @@ def concat_vectors_and_pad(vec_list, max_): torch.Tensor: The concatenated and padded vector. Raises: - AssertionError: If the length of 'vec_list' is not greater than 0, or if it + AssertionError: If the length of 'vec_list' is not greater than 0, or if it exceeds 'max_len', or if 'max_len' is not greater than 0. Example: @@ -56,7 +56,7 @@ class LightwoodAutocast: **Attributes:** - * `active` (bool): Whether AMP is currently active. This attribute is at the class + * `active` (bool): Whether AMP is currently active. This attribute is at the class level **Usage:** From 4c138e5f944991820941d47ee2bbd5974e789388 Mon Sep 17 00:00:00 2001 From: Rajveer Rathod <64583161+rajveer43@users.noreply.github.com> Date: Tue, 15 Aug 2023 12:13:56 +0000 Subject: [PATCH 06/31] PUSH --- lightwood/helpers/torch.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lightwood/helpers/torch.py b/lightwood/helpers/torch.py index 2a10356a7..9d930f93a 100644 --- a/lightwood/helpers/torch.py +++ b/lightwood/helpers/torch.py @@ -15,7 +15,7 @@ def concat_vectors_and_pad(vec_list, max_): Args: vec_list (list of torch.Tensor): List of input vectors to concatenate and pad. - max_len (int): The maximum length of the concatenated and padded vector. + max_ (int): The maximum length of the concatenated and padded vector. Returns: torch.Tensor: The concatenated and padded vector. @@ -71,7 +71,10 @@ class LightwoodAutocast: def __init__(self, enabled=True): """ - Enters the context manager and enables AMP if it is not already enabled. + Initializes the context manager for Automatic Mixed Precision (AMP) functionality. + + Args: + enabled (bool, optional): Whether to enable AMP. Defaults to True. """ self.major = 0 # GPU major version torch_version = [int(i) for i in torch.__version__.split('.')[:-1]] From fa1d564b421b4c1c723471e80299d1fae49e1105 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Wed, 6 Sep 2023 18:13:45 -0700 Subject: [PATCH 07/31] remove sktime --- lightwood/api/json_ai.py | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/lightwood/api/json_ai.py b/lightwood/api/json_ai.py index fbc775d16..94cedd482 100644 --- a/lightwood/api/json_ai.py +++ b/lightwood/api/json_ai.py @@ -257,32 +257,7 @@ def generate_json_ai( ) elif tss.is_timeseries and tss.horizon > 1 and tss.use_previous_target and \ dtype_dict[target] in (dtype.integer, dtype.float, dtype.quantity): - - submodels.extend( - [ - { - "module": "SkTime", - "args": { - "stop_after": "$problem_definition.seconds_per_mixer", - "horizon": "$problem_definition.timeseries_settings.horizon", - }, - }, - { - "module": "ETSMixer", - "args": { - "stop_after": "$problem_definition.seconds_per_mixer", - "horizon": "$problem_definition.timeseries_settings.horizon", - }, - }, - { - "module": "ARIMAMixer", - "args": { - "stop_after": "$problem_definition.seconds_per_mixer", - "horizon": "$problem_definition.timeseries_settings.horizon", - }, - } - ] - ) + pass # TODO: XGBoostArrayMixer model = { "module": "BestOf", From 55126dcf5fe1ba028031b79b20ac3c628e23d231 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Fri, 8 Sep 2023 13:09:39 -0700 Subject: [PATCH 08/31] progress --- lightwood/api/json_ai.py | 4 ++ lightwood/ensemble/best_of.py | 2 +- lightwood/mixer/nhits.py | 45 +++++++++++++------ tests/integration/advanced/test_timeseries.py | 2 +- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/lightwood/api/json_ai.py b/lightwood/api/json_ai.py index 94cedd482..81f20948b 100644 --- a/lightwood/api/json_ai.py +++ b/lightwood/api/json_ai.py @@ -226,6 +226,10 @@ def generate_json_ai( "stop_after": "$problem_definition.seconds_per_mixer", "search_hyperparameters": True, }, + }, + { + "module": "NHitsMixer", + "args": {}, } ] ) diff --git a/lightwood/ensemble/best_of.py b/lightwood/ensemble/best_of.py index e7fd3411b..50753d941 100644 --- a/lightwood/ensemble/best_of.py +++ b/lightwood/ensemble/best_of.py @@ -57,7 +57,7 @@ def __init__(self, target, mixers: List[BaseMixer], data: EncodedDs, accuracy_fu score_list.append(avg_score) - self.indexes_by_accuracy = list(reversed(np.array(score_list).argsort())) + self.indexes_by_accuracy = list(np.array(score_list).argsort()) self.supports_proba = self.mixers[self.indexes_by_accuracy[0]].supports_proba log.info(f'Picked best mixer: {type(self.mixers[self.indexes_by_accuracy[0]]).__name__}') self.prepared = True diff --git a/lightwood/mixer/nhits.py b/lightwood/mixer/nhits.py index b88528c16..7dee62a84 100644 --- a/lightwood/mixer/nhits.py +++ b/lightwood/mixer/nhits.py @@ -5,6 +5,7 @@ import pandas as pd from neuralforecast import NeuralForecast from neuralforecast.models.nhits import NHITS +from neuralforecast.models.nbeats import NBEATS from neuralforecast.losses.pytorch import MQLoss from lightwood.helpers.log import log @@ -20,6 +21,7 @@ class NHitsMixer(BaseMixer): model_path: str hyperparam_search: bool default_config: dict + SUPPORTED_MODELS = ('nhits', 'nbeats') def __init__( self, @@ -56,7 +58,14 @@ def __init__( self.grouped_by = ['__default'] if not ts_analysis['tss'].group_by else ts_analysis['tss'].group_by self.group_boundaries = {} # stores last observed timestamp per series self.train_args = train_args.get('trainer_args', {}) if train_args else {} - self.train_args['early_stop_patience_steps'] = self.train_args.get('early_stop_patience_steps', 10) + + # we set a fairly aggressive training schedule by default + self.train_args['early_stop_patience_steps'] = self.train_args.get('early_stop_patience_steps', 1) + self.train_args['val_check_steps'] = self.train_args.get('val_check_steps', 10) + self.train_args['learning_rate'] = self.train_args.get('learning_rate', 3e-3) + self.train_args['mlp_units'] = self.train_args.get('mlp_units', [[128, 128], [128, 128]]) + self.train_args['random_seed'] = self.train_args.get('random_seed', 1) + self.conf_level = self.train_args.pop('conf_level', [90]) for level in self.conf_level: assert 0 <= level <= 100, f'A provided level is not in the [0, 100] range (found: {level})' @@ -74,18 +83,24 @@ def __init__( 'T': 'hourly', # NOTE: use another pre-trained model once available 'S': 'hourly' # NOTE: use another pre-trained model once available } + self.model = None + self.model_class_str = self.train_args.get('model_class', 'nhits').lower() + assert self.model_class_str in NHitsMixer.SUPPORTED_MODELS, f'Provided model class ({self.model_class_str}) is not supported. Supported models are: {NHitsMixer.SUPPORTED_MODELS}' # noqa + self.model_class = NBEATS if self.model_class_str == 'nbeats' else NHITS + self.model_name = None self.model_names = { - 'hourly': 'nhits_m4_hourly.ckpt', # hourly (non-tiny) - 'daily': 'nhits_m4_daily.ckpt', # daily - 'monthly': 'nhits_m4_monthly.ckpt', # monthly - 'yearly': 'nhits_m4_yearly.ckpt', # yearly + 'nhits': { + 'hourly': 'nhits_m4_hourly.ckpt', # hourly (non-tiny) + 'daily': 'nhits_m4_daily.ckpt', # daily + 'monthly': 'nhits_m4_monthly.ckpt', # monthly + 'yearly': 'nhits_m4_yearly.ckpt', # yearly + }, + 'nbeats': {} # TODO: complete } - self.model_name = None - self.model = None def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None: """ - Fits the N-HITS model. + Fits the NeuralForecast model. """ # noqa log.info('Started fitting N-HITS forecasting model') @@ -110,7 +125,7 @@ def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None: None) self.model_name = self.model_names['hourly'] if self.model_name is None else self.model_name ckpt_url = self.base_url + self.model_name - self.model = NHITS.load_from_checkpoint(ckpt_url) + self.model = self.model_class.load_from_checkpoint(ckpt_url) if not self.window < self.model.hparams.n_time_in: log.info(f'NOTE: Provided window ({self.window}) is smaller than specified model input length ({self.model.hparams.n_time_in}). Will train a new model from scratch.') # noqa @@ -126,8 +141,8 @@ def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None: new_window = max(1, n_time - self.horizon - 1) self.window = new_window log.info(f'Window {self.window} is too long for data provided (group: {df[gby].value_counts()[::-1].index[0]}), reducing window to {new_window}.') # noqa - model = NHITS(h=n_time_out, input_size=self.window, **self.train_args, loss=MQLoss(level=self.conf_level)) - self.model = NeuralForecast(models=[model], freq=self.ts_analysis['sample_freqs']['__default']) + model = self.model_class(h=n_time_out, input_size=self.window, **self.train_args, loss=MQLoss(level=self.conf_level)) # noqa + self.model = NeuralForecast(models=[model], freq=self.ts_analysis['sample_freqs']['__default'],) self.model.fit(df=Y_df, val_size=n_ts_val) log.info('Successfully trained N-HITS forecasting model.') @@ -156,7 +171,11 @@ def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], level = max(self.conf_level) target_cols = ['prediction', 'lower', 'upper'] - pred_cols = ['NHITS-median', f'NHITS-lo-{level}', f'NHITS-hi-{level}'] + pred_cols = [ + f'{self.model_class_str.upper()}-median', + f'{self.model_class_str.upper()}-lo-{level}', + f'{self.model_class_str.upper()}-hi-{level}' + ] input_df, idxs = self._make_initial_df(deepcopy(ds.data_frame)) length = sum(ds.encoded_ds_lengths) if isinstance(ds, ConcatedEncodedDs) else len(ds) @@ -189,7 +208,7 @@ def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], def _make_initial_df(self, df, mode='inference'): """ - Prepares a dataframe for the NHITS model according to what neuralforecast expects. + Prepares a dataframe for the model according to what neuralforecast expects. If a per-group boundary exists, this method additionally drops out all observations prior to the cutoff. """ # noqa diff --git a/tests/integration/advanced/test_timeseries.py b/tests/integration/advanced/test_timeseries.py index 412f36732..346dde6ec 100644 --- a/tests/integration/advanced/test_timeseries.py +++ b/tests/integration/advanced/test_timeseries.py @@ -475,7 +475,7 @@ def test_8_time_series_double_grouped_regression(self): target = 'MA' order_by = 'saledate' window = 8 - for horizon in [1, 4]: + for horizon in [4]: train, _, test = stratify(data, pct_train=0.8, pct_dev=0, pct_test=0.2, stratify_on=gby, seed=1, reshuffle=False) jai = json_ai_from_problem(train, From d650f22d0d2bc009edd76858957c591dae1ac39f Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Fri, 8 Sep 2023 17:38:39 -0700 Subject: [PATCH 09/31] fix reqs --- requirements.txt | 1 + requirements_extra_ts.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6b81917b2..b65c685f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,3 +32,4 @@ tab-transformer-pytorch >= 0.2.1 typing-inspect six regex +neuralforecast ==1.5.0 diff --git a/requirements_extra_ts.txt b/requirements_extra_ts.txt index e37416201..07f37ada2 100644 --- a/requirements_extra_ts.txt +++ b/requirements_extra_ts.txt @@ -1,5 +1,4 @@ pystan==2.19.1.1 prophet==1.1 -neuralforecast ==1.5.0 mxnet >=1.6.0, <2.0.0 gluonts >= 0.13.2, <0.14.0 From ba8ad29dff5cfd5560192d617191c87a3fa29a9d Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Fri, 8 Sep 2023 18:20:34 -0700 Subject: [PATCH 10/31] fix tests --- lightwood/ensemble/best_of.py | 2 +- tests/integration/basic/test_model_selection.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lightwood/ensemble/best_of.py b/lightwood/ensemble/best_of.py index 50753d941..e7fd3411b 100644 --- a/lightwood/ensemble/best_of.py +++ b/lightwood/ensemble/best_of.py @@ -57,7 +57,7 @@ def __init__(self, target, mixers: List[BaseMixer], data: EncodedDs, accuracy_fu score_list.append(avg_score) - self.indexes_by_accuracy = list(np.array(score_list).argsort()) + self.indexes_by_accuracy = list(reversed(np.array(score_list).argsort())) self.supports_proba = self.mixers[self.indexes_by_accuracy[0]].supports_proba log.info(f'Picked best mixer: {type(self.mixers[self.indexes_by_accuracy[0]]).__name__}') self.prepared = True diff --git a/tests/integration/basic/test_model_selection.py b/tests/integration/basic/test_model_selection.py index fb7a02363..8a1579868 100644 --- a/tests/integration/basic/test_model_selection.py +++ b/tests/integration/basic/test_model_selection.py @@ -53,7 +53,7 @@ def test_4_timeseries_t_plus_1(self): 'window': 5 } } - expected_mixers = ['NeuralTs', 'Regression', 'RandomForest', 'XGBoostMixer'] + expected_mixers = ['NeuralTs', 'Regression', 'RandomForest', 'XGBoostMixer', 'NHitsMixer'] mixers = self.get_mixers(df, target, prob_kwargs=prob_kwargs) self.assertEqual(set(mixers), set(expected_mixers)) @@ -69,6 +69,6 @@ def test_5_timeseries_t_plus_n(self): 'window': 5 } } - expected_mixers = ['NeuralTs', 'SkTime', 'ARIMAMixer', 'ETSMixer'] + expected_mixers = ['NeuralTs', 'NHitsMixer'] mixers = self.get_mixers(df, target, prob_kwargs=prob_kwargs) self.assertEqual(set(mixers), set(expected_mixers)) From c5a5c9f8a5d42740dd494304fab792cea0c571a7 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 14 Sep 2023 17:06:31 -0700 Subject: [PATCH 11/31] add xgboostarraymixer, remove nhits as default --- lightwood/api/json_ai.py | 21 ++++--- lightwood/mixer/__init__.py | 3 +- lightwood/mixer/xgboost.py | 4 +- lightwood/mixer/xgboost_array.py | 98 ++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 lightwood/mixer/xgboost_array.py diff --git a/lightwood/api/json_ai.py b/lightwood/api/json_ai.py index 81f20948b..0e697cb56 100644 --- a/lightwood/api/json_ai.py +++ b/lightwood/api/json_ai.py @@ -203,6 +203,8 @@ def generate_json_ai( ] ) else: + + # add neural model if not tss.is_timeseries: submodels.extend( [ @@ -227,13 +229,10 @@ def generate_json_ai( "search_hyperparameters": True, }, }, - { - "module": "NHitsMixer", - "args": {}, - } ] ) + # add other models if (not tss.is_timeseries or tss.horizon == 1) and dtype_dict[target] not in (dtype.num_array, dtype.cat_array): submodels.extend( [ @@ -259,9 +258,15 @@ def generate_json_ai( }, ] ) - elif tss.is_timeseries and tss.horizon > 1 and tss.use_previous_target and \ - dtype_dict[target] in (dtype.integer, dtype.float, dtype.quantity): - pass # TODO: XGBoostArrayMixer + + # special forecasting dispatch + elif tss.is_timeseries: + submodels.extend([ + { + "module": "XGBoostArrayMixer", + "args": {}, + }, + ]) model = { "module": "BestOf", @@ -550,7 +555,7 @@ def add_implicit_values(json_ai: JsonAI) -> JsonAI: "target_encoder", "$encoders[self.target]" ) - elif mixers[i]["module"] == "LightGBMArray": + elif mixers[i]["module"] in ("LightGBMArray", "XGBoostArrayMixer"): mixers[i]["args"]["input_cols"] = mixers[i]["args"].get( "input_cols", "$input_cols" ) diff --git a/lightwood/mixer/__init__.py b/lightwood/mixer/__init__.py index 3d7c1c2fa..d98806aa0 100644 --- a/lightwood/mixer/__init__.py +++ b/lightwood/mixer/__init__.py @@ -3,6 +3,7 @@ from lightwood.mixer.neural import Neural from lightwood.mixer.neural_ts import NeuralTs from lightwood.mixer.xgboost import XGBoostMixer +from lightwood.mixer.xgboost_array import XGBoostArrayMixer from lightwood.mixer.random_forest import RandomForest from lightwood.mixer.sktime import SkTime from lightwood.mixer.arima import ARIMAMixer @@ -43,4 +44,4 @@ __all__ = ['BaseMixer', 'Neural', 'NeuralTs', 'LightGBM', 'RandomForest', 'LightGBMArray', 'Unit', 'Regression', 'SkTime', 'QClassic', 'ProphetMixer', 'ETSMixer', 'ARIMAMixer', 'NHitsMixer', 'GluonTSMixer', 'XGBoostMixer', - 'TabTransformerMixer'] + 'TabTransformerMixer', 'XGBoostArrayMixer'] diff --git a/lightwood/mixer/xgboost.py b/lightwood/mixer/xgboost.py index d64470fad..c6b8c9cf9 100644 --- a/lightwood/mixer/xgboost.py +++ b/lightwood/mixer/xgboost.py @@ -83,8 +83,8 @@ def __init__( self.use_optuna = use_optuna self.params = {} self.fit_on_dev = fit_on_dev - self.cls_dtypes = [dtype.categorical, dtype.binary] # , dtype.cat_tsarray] # TODO - self.float_dtypes = [dtype.float, dtype.quantity] # , dtype.num_tsarray] # TODO + self.cls_dtypes = [dtype.categorical, dtype.binary, dtype.cat_tsarray] + self.float_dtypes = [dtype.float, dtype.quantity, dtype.num_tsarray] self.num_dtypes = [dtype.integer] + self.float_dtypes self.supports_proba = dtype_dict[target] in self.cls_dtypes self.stable = True diff --git a/lightwood/mixer/xgboost_array.py b/lightwood/mixer/xgboost_array.py new file mode 100644 index 000000000..dc7771554 --- /dev/null +++ b/lightwood/mixer/xgboost_array.py @@ -0,0 +1,98 @@ +from typing import Dict, List, Union, Optional +from copy import deepcopy + +import numpy as np +import pandas as pd + +from type_infer.dtype import dtype +from lightwood.helpers.log import log +from lightwood.encoder.base import BaseEncoder +from lightwood.mixer.base import BaseMixer +from lightwood.mixer.xgboost import XGBoostMixer, check_gpu_support +from lightwood.api.types import PredictionArguments, TimeseriesSettings +from lightwood.data.encoded_ds import EncodedDs, ConcatedEncodedDs + + +class XGBoostArrayMixer(BaseMixer): + """XGBoost-based model, intended for usage in forecasting tasks.""" + models: List[XGBoostMixer] + submodel_stop_after: float + target: str + supports_proba: bool + ts_analysis: Dict + tss: TimeseriesSettings + + def __init__( + self, + stop_after: float, + target: str, + dtype_dict: Dict[str, str], + input_cols: List[str], + fit_on_dev: bool, + target_encoder: BaseEncoder, + ts_analysis: Dict[str, object], + use_stl: bool, + tss: TimeseriesSettings + ): + super().__init__(stop_after) + self.tss = tss + self.horizon = tss.horizon + self.submodel_stop_after = stop_after / self.horizon + self.target = target + self.offset_pred_cols = [f'{self.target}_timestep_{i}' for i in range(1, self.horizon)] + if set(input_cols) != {self.tss.order_by}: + input_cols.remove(self.tss.order_by) + for col in self.offset_pred_cols: + dtype_dict[col] = dtype_dict[self.target] + self.models = [XGBoostMixer(self.submodel_stop_after, + target_col, + dtype_dict, + input_cols, + False, # fit_on_dev, + False, # use_optuna + target_encoder) + for _, target_col in zip(range(self.horizon), [target] + self.offset_pred_cols)] + self.ts_analysis = ts_analysis + self.supports_proba = False + self.use_stl = False + self.stable = True + + def _fit(self, train_data: EncodedDs, dev_data: EncodedDs, submodel_method='fit') -> None: + original_train = deepcopy(train_data.data_frame) + original_dev = deepcopy(dev_data.data_frame) + + for timestep in range(self.horizon): + getattr(self.models[timestep], submodel_method)(train_data, dev_data) + + # restore dfs + train_data.data_frame = original_train + dev_data.data_frame = original_dev + + def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None: + log.info('Started fitting LGBM models for array prediction') + self._fit(train_data, dev_data, submodel_method='fit') + + def partial_fit(self, train_data: EncodedDs, dev_data: EncodedDs, args: Optional[dict] = None) -> None: + log.info('Updating array of LGBM models...') + self._fit(train_data, dev_data, submodel_method='partial_fit') + + def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], + args: PredictionArguments = PredictionArguments()) -> pd.DataFrame: + if args.predict_proba: + log.warning('This model does not output probability estimates') + + original_df = deepcopy(ds.data_frame) + length = sum(ds.encoded_ds_lengths) if isinstance(ds, ConcatedEncodedDs) else len(ds) + ydf = pd.DataFrame(0, # zero-filled + index=np.arange(length), + columns=[f'prediction_{i}' for i in range(self.horizon)]) + + for timestep in range(self.horizon): + ydf[f'prediction_{timestep}'] = self.models[timestep](ds, args)['prediction'].values + + if self.models[0].positive_domain: + ydf = ydf.clip(0) + + ydf['prediction'] = ydf.values.tolist() + ds.data_frame = original_df + return ydf[['prediction']] From de79553e6dcac61a8dd76f481ca7aab4e0689139 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 14 Sep 2023 17:07:13 -0700 Subject: [PATCH 12/31] update reqs --- requirements.txt | 1 - requirements_extra_ts.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b65c685f4..6b81917b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,4 +32,3 @@ tab-transformer-pytorch >= 0.2.1 typing-inspect six regex -neuralforecast ==1.5.0 diff --git a/requirements_extra_ts.txt b/requirements_extra_ts.txt index 07f37ada2..e37416201 100644 --- a/requirements_extra_ts.txt +++ b/requirements_extra_ts.txt @@ -1,4 +1,5 @@ pystan==2.19.1.1 prophet==1.1 +neuralforecast ==1.5.0 mxnet >=1.6.0, <2.0.0 gluonts >= 0.13.2, <0.14.0 From 9a61d7c139e80020beaf58ed60b74958b28dab4b Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 14 Sep 2023 17:11:17 -0700 Subject: [PATCH 13/31] fix tests --- lightwood/mixer/xgboost_array.py | 2 +- tests/integration/advanced/test_timeseries.py | 2 +- tests/integration/basic/test_model_selection.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lightwood/mixer/xgboost_array.py b/lightwood/mixer/xgboost_array.py index dc7771554..1a50aa001 100644 --- a/lightwood/mixer/xgboost_array.py +++ b/lightwood/mixer/xgboost_array.py @@ -55,7 +55,7 @@ def __init__( self.ts_analysis = ts_analysis self.supports_proba = False self.use_stl = False - self.stable = True + self.stable = False def _fit(self, train_data: EncodedDs, dev_data: EncodedDs, submodel_method='fit') -> None: original_train = deepcopy(train_data.data_frame) diff --git a/tests/integration/advanced/test_timeseries.py b/tests/integration/advanced/test_timeseries.py index 346dde6ec..412f36732 100644 --- a/tests/integration/advanced/test_timeseries.py +++ b/tests/integration/advanced/test_timeseries.py @@ -475,7 +475,7 @@ def test_8_time_series_double_grouped_regression(self): target = 'MA' order_by = 'saledate' window = 8 - for horizon in [4]: + for horizon in [1, 4]: train, _, test = stratify(data, pct_train=0.8, pct_dev=0, pct_test=0.2, stratify_on=gby, seed=1, reshuffle=False) jai = json_ai_from_problem(train, diff --git a/tests/integration/basic/test_model_selection.py b/tests/integration/basic/test_model_selection.py index 8a1579868..6207aad5f 100644 --- a/tests/integration/basic/test_model_selection.py +++ b/tests/integration/basic/test_model_selection.py @@ -53,7 +53,7 @@ def test_4_timeseries_t_plus_1(self): 'window': 5 } } - expected_mixers = ['NeuralTs', 'Regression', 'RandomForest', 'XGBoostMixer', 'NHitsMixer'] + expected_mixers = ['NeuralTs', 'Regression', 'RandomForest', 'XGBoostMixer'] mixers = self.get_mixers(df, target, prob_kwargs=prob_kwargs) self.assertEqual(set(mixers), set(expected_mixers)) @@ -69,6 +69,6 @@ def test_5_timeseries_t_plus_n(self): 'window': 5 } } - expected_mixers = ['NeuralTs', 'NHitsMixer'] + expected_mixers = ['NeuralTs', 'XGBoostArrayMixer'] mixers = self.get_mixers(df, target, prob_kwargs=prob_kwargs) self.assertEqual(set(mixers), set(expected_mixers)) From e6263272c40585158861438dd39e6eb148e2f8cb Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 14 Sep 2023 17:11:40 -0700 Subject: [PATCH 14/31] lint: flake8 --- lightwood/mixer/xgboost_array.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lightwood/mixer/xgboost_array.py b/lightwood/mixer/xgboost_array.py index 1a50aa001..5b785abf7 100644 --- a/lightwood/mixer/xgboost_array.py +++ b/lightwood/mixer/xgboost_array.py @@ -4,11 +4,10 @@ import numpy as np import pandas as pd -from type_infer.dtype import dtype from lightwood.helpers.log import log from lightwood.encoder.base import BaseEncoder from lightwood.mixer.base import BaseMixer -from lightwood.mixer.xgboost import XGBoostMixer, check_gpu_support +from lightwood.mixer.xgboost import XGBoostMixer from lightwood.api.types import PredictionArguments, TimeseriesSettings from lightwood.data.encoded_ds import EncodedDs, ConcatedEncodedDs From c7d612d76c00c61e900d4b5853a25648b94e3c35 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Wed, 20 Sep 2023 20:47:43 -0300 Subject: [PATCH 15/31] fix type hint --- lightwood/api/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightwood/api/types.py b/lightwood/api/types.py index 5cdff7fe5..0ef7bc6dc 100644 --- a/lightwood/api/types.py +++ b/lightwood/api/types.py @@ -313,7 +313,7 @@ class JsonAI: analysis_blocks: Optional[List[Module]] = None timeseries_transformer: Optional[Module] = None timeseries_analyzer: Optional[Module] = None - accuracy_functions: Optional[List[str]] = None + accuracy_functions: Optional[List[Union[str, Module]]] = None @staticmethod def from_dict(obj: Dict): From 68dda71f1e43c08f46c3017994dab67087bb82f6 Mon Sep 17 00:00:00 2001 From: Tom Hudson <34073127+tomhuds@users.noreply.github.com> Date: Tue, 26 Sep 2023 12:12:44 -0700 Subject: [PATCH 16/31] Add files via upload --- .github/workflows/add_to_pr_review.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .github/workflows/add_to_pr_review.yml diff --git a/.github/workflows/add_to_pr_review.yml b/.github/workflows/add_to_pr_review.yml new file mode 100644 index 000000000..384f2becb --- /dev/null +++ b/.github/workflows/add_to_pr_review.yml @@ -0,0 +1,16 @@ +name: Add Pull Requests to PR review project + +on: + pull_request: + types: + - opened + +jobs: + add-to-project: + name: Add issue to project + runs-on: ubuntu-latest + steps: + - uses: actions/add-to-project@v0.5.0 + with: + project-url: https://github.com/orgs/mindsdb/projects/65 + github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} From 55d7ac992adc63415d4dc51a9f733b3ac28b2d68 Mon Sep 17 00:00:00 2001 From: Tom Hudson <34073127+tomhuds@users.noreply.github.com> Date: Tue, 26 Sep 2023 14:00:38 -0700 Subject: [PATCH 17/31] Delete .github/workflows/add_to_roadmap_project.yml --- .github/workflows/add_to_roadmap_project.yml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 .github/workflows/add_to_roadmap_project.yml diff --git a/.github/workflows/add_to_roadmap_project.yml b/.github/workflows/add_to_roadmap_project.yml deleted file mode 100644 index 4aec94733..000000000 --- a/.github/workflows/add_to_roadmap_project.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Add issue to roadmap project - -on: - issues: - types: - - opened - -jobs: - add-to-project: - name: Add issue to roadmap project - runs-on: ubuntu-latest - steps: - - uses: actions/add-to-project@v0.4.0 - with: - # You can target a repository in a different organization - # to the issue - project-url: https://github.com/orgs/mindsdb/projects/54 - github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} - labeled: enhancement From 5971267384f400d827c74481e4577640a72cbf6f Mon Sep 17 00:00:00 2001 From: Tom Hudson <34073127+tomhuds@users.noreply.github.com> Date: Tue, 26 Sep 2023 14:00:56 -0700 Subject: [PATCH 18/31] Delete .github/workflows/add_to_bugs_project.yml --- .github/workflows/add_to_bugs_project.yml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 .github/workflows/add_to_bugs_project.yml diff --git a/.github/workflows/add_to_bugs_project.yml b/.github/workflows/add_to_bugs_project.yml deleted file mode 100644 index 1326053a9..000000000 --- a/.github/workflows/add_to_bugs_project.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Add issue to bugs project - -on: - issues: - types: - - opened - -jobs: - add-to-project: - name: Add issue to bugs project - runs-on: ubuntu-latest - steps: - - uses: actions/add-to-project@v0.4.0 - with: - # You can target a repository in a different organization - # to the issue - project-url: https://github.com/orgs/mindsdb/projects/53 - github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} - labeled: bug From 47ee2d059a1fa3fa9731885f17295fbfdf651e0d Mon Sep 17 00:00:00 2001 From: Tom Hudson <34073127+tomhuds@users.noreply.github.com> Date: Tue, 26 Sep 2023 14:01:15 -0700 Subject: [PATCH 19/31] Add files via upload --- .github/workflows/add_to_roadmap_project.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .github/workflows/add_to_roadmap_project.yml diff --git a/.github/workflows/add_to_roadmap_project.yml b/.github/workflows/add_to_roadmap_project.yml new file mode 100644 index 000000000..48ce2eba7 --- /dev/null +++ b/.github/workflows/add_to_roadmap_project.yml @@ -0,0 +1,16 @@ +name: Add issue to roadmap project +on: + issues: + types: + - opened +jobs: + add-to-project: + name: Add issue to roadmap project + runs-on: ubuntu-latest + steps: + - uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/mindsdb/projects/53 + github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} + labeled: bug, enhancement + label-operator: OR \ No newline at end of file From 404f19d926f8ab3341b1740b1d2794b7bd4a563b Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Mon, 2 Oct 2023 19:50:54 -0300 Subject: [PATCH 20/31] Update docs URL as temporal fix --- docssrc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docssrc/Makefile b/docssrc/Makefile index 0d0f7fc04..045a0c860 100644 --- a/docssrc/Makefile +++ b/docssrc/Makefile @@ -26,4 +26,4 @@ github: @cp -a build/html/. ../docs @rm -r build @touch ../docs/.nojekyll - @echo lightwood.io > ../docs/CNAME + @echo mindsdb.github.com/lightwood > ../docs/CNAME From 86499c6f7d16b9636ade2f7212bc5e1addd56a05 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Mon, 2 Oct 2023 20:00:51 -0300 Subject: [PATCH 21/31] Update Makefile --- docssrc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docssrc/Makefile b/docssrc/Makefile index 045a0c860..14a569ddb 100644 --- a/docssrc/Makefile +++ b/docssrc/Makefile @@ -26,4 +26,4 @@ github: @cp -a build/html/. ../docs @rm -r build @touch ../docs/.nojekyll - @echo mindsdb.github.com/lightwood > ../docs/CNAME +# @echo lightwood.io > ../docs/CNAME # deactivated until domain is renewed From fd4a1677cf2ddfbaf1d7337ba38d233d05ca4c25 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Mon, 2 Oct 2023 20:15:57 -0300 Subject: [PATCH 22/31] Update Makefile --- docssrc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docssrc/Makefile b/docssrc/Makefile index 14a569ddb..0d0f7fc04 100644 --- a/docssrc/Makefile +++ b/docssrc/Makefile @@ -26,4 +26,4 @@ github: @cp -a build/html/. ../docs @rm -r build @touch ../docs/.nojekyll -# @echo lightwood.io > ../docs/CNAME # deactivated until domain is renewed + @echo lightwood.io > ../docs/CNAME From 9996304c5701f019862d0bb33c2665639e7e55b1 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Fri, 13 Oct 2023 19:49:53 +0300 Subject: [PATCH 23/31] bump neuralforecast requirement --- requirements_extra_ts.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_extra_ts.txt b/requirements_extra_ts.txt index e37416201..ea70c95c1 100644 --- a/requirements_extra_ts.txt +++ b/requirements_extra_ts.txt @@ -1,5 +1,5 @@ pystan==2.19.1.1 prophet==1.1 -neuralforecast ==1.5.0 +neuralforecast >=1.6.4, <1.7.0 mxnet >=1.6.0, <2.0.0 gluonts >= 0.13.2, <0.14.0 From b8d57a3a11d552f5de02620affd292a36b5767eb Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Mon, 23 Oct 2023 11:58:50 -0300 Subject: [PATCH 24/31] Update doc_build.yml --- .github/workflows/doc_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/doc_build.yml b/.github/workflows/doc_build.yml index 0d146ccd2..a5e6b4112 100644 --- a/.github/workflows/doc_build.yml +++ b/.github/workflows/doc_build.yml @@ -24,7 +24,7 @@ jobs: run: | sudo apt install pandoc python -m pip install --upgrade pip - pip install install 'Sphinx==4.1.2' 'sphinx-autoapi==1.8.4' 'sphinx-autodoc-typehints==1.12.0' 'sphinx-code-include==1.1.1' 'sphinx-rtd-theme==0.5.2' 'sphinxcontrib-applehelp==1.0.2' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.0' 'sphinxcontrib-jsmath==1.0.1' 'sphinxcontrib-napoleon==0.7' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec + pip install install 'Sphinx==6.2.1' 'sphinx-autoapi==3.0.0' 'sphinx-autodoc-typehints==1.12.0' 'sphinx-code-include==1.1.1' 'sphinx-rtd-theme==0.5.2' 'sphinxcontrib-applehelp==1.0.2' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.0' 'sphinxcontrib-jsmath==1.0.1' 'sphinxcontrib-napoleon==0.7' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec pip install --no-cache-dir -e . - name: Re-run notebooks run: | From c012329d81dec500d3c1019f4c977dae9bf3b817 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Mon, 23 Oct 2023 11:59:14 -0300 Subject: [PATCH 25/31] Update Makefile --- docssrc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docssrc/Makefile b/docssrc/Makefile index 0d0f7fc04..ac451ccfe 100644 --- a/docssrc/Makefile +++ b/docssrc/Makefile @@ -26,4 +26,4 @@ github: @cp -a build/html/. ../docs @rm -r build @touch ../docs/.nojekyll - @echo lightwood.io > ../docs/CNAME + @echo https://mindsdb.github.io/lightwood > ../docs/CNAME From e80fa3f8499d4b5836beed48dc0cc7e99915a3f5 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Mon, 23 Oct 2023 12:02:19 -0300 Subject: [PATCH 26/31] Update doc_build.yml --- .github/workflows/doc_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/doc_build.yml b/.github/workflows/doc_build.yml index a5e6b4112..b70957a6a 100644 --- a/.github/workflows/doc_build.yml +++ b/.github/workflows/doc_build.yml @@ -24,7 +24,7 @@ jobs: run: | sudo apt install pandoc python -m pip install --upgrade pip - pip install install 'Sphinx==6.2.1' 'sphinx-autoapi==3.0.0' 'sphinx-autodoc-typehints==1.12.0' 'sphinx-code-include==1.1.1' 'sphinx-rtd-theme==0.5.2' 'sphinxcontrib-applehelp==1.0.2' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.0' 'sphinxcontrib-jsmath==1.0.1' 'sphinxcontrib-napoleon==0.7' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec + pip install install 'Sphinx==6.2.1' 'sphinx-autoapi==3.0.0' 'sphinx-autodoc-typehints' 'sphinx-code-include' 'sphinx-rtd-theme' 'sphinxcontrib-applehelp' 'sphinxcontrib-devhelp' 'sphinxcontrib-htmlhelp' 'sphinxcontrib-jsmath' 'sphinxcontrib-napoleon' 'sphinxcontrib-qthelp' 'sphinxcontrib-serializinghtml' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec pip install --no-cache-dir -e . - name: Re-run notebooks run: | From 619268f151412eeb40886e09b206f2dde1a658f3 Mon Sep 17 00:00:00 2001 From: MindsDB DevOps / Infrastructure <144632297+mindsdb-devops@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:41:19 -0700 Subject: [PATCH 27/31] requirements.txt statsforecast to match mindsdb core library so no conflict --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6b81917b2..31846d454 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ scikit-learn >=1.0.0 dataclasses_json >=0.5.4 dill ==0.3.6 sktime >=0.21.0,<0.22.0 -statsforecast ==1.4.0 +statsforecast>=1.4.0, <2.0 torch_optimizer ==0.1.0 black ==23.3.0 typing_extensions From f3470b95d81a4415543cee395f8f5b7cafedf21e Mon Sep 17 00:00:00 2001 From: MindsDB DevOps / Infrastructure <144632297+mindsdb-devops@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:20:11 -0700 Subject: [PATCH 28/31] Reverting 5f5353011 Reverting last commit --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 31846d454..6b81917b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ scikit-learn >=1.0.0 dataclasses_json >=0.5.4 dill ==0.3.6 sktime >=0.21.0,<0.22.0 -statsforecast>=1.4.0, <2.0 +statsforecast ==1.4.0 torch_optimizer ==0.1.0 black ==23.3.0 typing_extensions From bb8822e56c5c513068dd55e2d61e11b07456be1d Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Tue, 24 Oct 2023 12:50:24 -0300 Subject: [PATCH 29/31] bump statsforecast~=1.6.0 and sktime~=0.24.0 --- lightwood/mixer/sktime.py | 2 +- requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lightwood/mixer/sktime.py b/lightwood/mixer/sktime.py index 7ebabcfd9..0fbcfd992 100644 --- a/lightwood/mixer/sktime.py +++ b/lightwood/mixer/sktime.py @@ -69,7 +69,7 @@ def __init__( :param use_stl: Whether to use de-trenders and de-seasonalizers fitted in the timeseries analysis phase. """ # noqa super().__init__(stop_after) - self.stable = False + self.stable = True # TODO remove, debug self.prepared = False self.supports_proba = False self.target = target diff --git a/requirements.txt b/requirements.txt index 6b81917b2..56bde0bb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,8 +17,8 @@ wheel >=0.32.2 scikit-learn >=1.0.0 dataclasses_json >=0.5.4 dill ==0.3.6 -sktime >=0.21.0,<0.22.0 -statsforecast ==1.4.0 +sktime >=0.24.0,<0.25 .0 +statsforecast ~=1.6.0 torch_optimizer ==0.1.0 black ==23.3.0 typing_extensions From 2ea507141b4092814471a8a32e923badd69ec462 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Tue, 24 Oct 2023 12:54:39 -0300 Subject: [PATCH 30/31] fix reqs --- lightwood/mixer/sktime.py | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lightwood/mixer/sktime.py b/lightwood/mixer/sktime.py index 0fbcfd992..7ebabcfd9 100644 --- a/lightwood/mixer/sktime.py +++ b/lightwood/mixer/sktime.py @@ -69,7 +69,7 @@ def __init__( :param use_stl: Whether to use de-trenders and de-seasonalizers fitted in the timeseries analysis phase. """ # noqa super().__init__(stop_after) - self.stable = True # TODO remove, debug + self.stable = False self.prepared = False self.supports_proba = False self.target = target diff --git a/requirements.txt b/requirements.txt index 56bde0bb3..9352c20b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ wheel >=0.32.2 scikit-learn >=1.0.0 dataclasses_json >=0.5.4 dill ==0.3.6 -sktime >=0.24.0,<0.25 .0 +sktime >=0.24.0,<0.25.0 statsforecast ~=1.6.0 torch_optimizer ==0.1.0 black ==23.3.0 From be242273a25d673fff590b1a7ce40c5ec9a78502 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 26 Oct 2023 12:27:21 -0700 Subject: [PATCH 31/31] Version bump: 23.11.1.0 --- lightwood/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightwood/__about__.py b/lightwood/__about__.py index 91a35f04a..a55b713f2 100644 --- a/lightwood/__about__.py +++ b/lightwood/__about__.py @@ -1,6 +1,6 @@ __title__ = 'lightwood' __package_name__ = 'lightwood' -__version__ = '23.8.1.0' +__version__ = '23.11.1.0' __description__ = "Lightwood is a toolkit for automatic machine learning model building" __email__ = "community@mindsdb.com" __author__ = 'MindsDB Inc'