diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/README.md b/notebooks/hfdemo/tinytimemixer/full_benchmarking/README.md new file mode 100644 index 00000000..f0d26dc5 --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/README.md @@ -0,0 +1,23 @@ +# Steps to run the full benchmarking + +1. In terminal, the any one of the three bash scripts `granite-r2.sh`, `granite-r1.sh`, or `research-use-r2.sh`. +2. Run `summarize_results.py`. For example, +``` +sh granite-r2.sh +python summarize_results.py -rd=results-granite-r2/ +``` + +It will run all benchmarking and dump the results. The dumped results are available in the CSV files. +1. TTM-Research-Use model results: + - `combined_results-research-use-r2.csv`: Across all datasets, all TTM models, and all forecast horizons. + - `combined_avg_results-research-use-r2.csv`: Across all datasets and all TTM models average over forecast horizons. +2. TTM-Granite-R2 model results: + - `combined_results-granite-r2.csv`: Across all datasets, all TTM models, and all forecast horizons. + - `combined_avg_results-granite-r2.csv`: Across all datasets and all TTM models average over forecast horizons. +2. TTM-Granite-R1 model results: + - `combined_results-granite-r1.csv`: Across all datasets, all TTM models, and all forecast horizons. + - `combined_avg_results-granite-r1.csv`: Across all datasets and all TTM models average over forecast horizons. + Note that TTM-Granite-R1 models supports 512/1024 as context length, and 96 as the forecast horizon. + +# Sample benchmarking notebooks +We also provide a bunch of sample benchmarking notebooks in the `sample_notebooks` folder. These notebooks can be directly run or modifed according to the need. \ No newline at end of file diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-granite-r1.csv b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-granite-r1.csv new file mode 100644 index 00000000..48371f07 --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-granite-r1.csv @@ -0,0 +1,15 @@ +dataset,CL,zs_mse,fs5_mse +etth1,512,0.363,0.363 +etth2,512,0.286,0.285 +ettm1,512,0.415,0.36 +ettm2,512,0.186,0.175 +weather,512,0.152,0.15 +electricity,512,0.17,0.144 +traffic,512,0.509,0.401 +etth1,1024,0.362,0.36 +etth2,1024,0.281,0.28 +ettm1,1024,0.387,0.373 +ettm2,1024,0.175,0.172 +weather,1024,0.152,0.151 +electricity,1024,0.156,0.143 +traffic,1024,0.458,0.409 diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-granite-r2.csv b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-granite-r2.csv new file mode 100644 index 00000000..ee864848 --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-granite-r2.csv @@ -0,0 +1,22 @@ +dataset,CL,zs_mse,fs5_mse +etth1,512,0.407,0.383 +etth2,512,0.356,0.334 +ettm1,512,0.392,0.386 +ettm2,512,0.288,0.283 +weather,512,0.23,0.228 +electricity,512,0.212,0.18 +traffic,512,0.561,0.424 +etth1,1024,0.405,0.385 +etth2,1024,0.34,0.32 +ettm1,1024,0.39,0.389 +ettm2,1024,0.274,0.271 +weather,1024,0.227,0.227 +electricity,1024,0.198,0.201 +traffic,1024,0.518,0.438 +etth1,1536,0.396,0.384 +etth2,1536,0.338,0.322 +ettm1,1536,0.366,0.367 +ettm2,1536,0.256,0.256 +weather,1536,0.226,0.224 +electricity,1536,0.196,0.179 +traffic,1536,0.505,0.488 diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-research-use-r2.csv b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-research-use-r2.csv new file mode 100644 index 00000000..aa1d998e --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_avg_results-research-use-r2.csv @@ -0,0 +1,22 @@ +dataset,CL,zs_mse,fs5_mse +etth1,512,0.394,0.383 +etth2,512,0.345,0.324 +ettm1,512,0.386,0.376 +ettm2,512,0.281,0.272 +weather,512,0.237,0.234 +electricity,512,0.205,0.183 +traffic,512,0.564,0.434 +etth1,1024,0.404,0.385 +etth2,1024,0.335,0.318 +ettm1,1024,0.38,0.379 +ettm2,1024,0.271,0.269 +weather,1024,0.238,0.24 +electricity,1024,0.194,0.201 +traffic,1024,0.514,0.436 +etth1,1536,0.4,0.386 +etth2,1536,0.333,0.315 +ettm1,1536,0.362,0.361 +ettm2,1536,0.252,0.252 +weather,1536,0.231,0.228 +electricity,1536,0.192,0.18 +traffic,1536,0.502,0.49 diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-granite-r1.csv b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-granite-r1.csv new file mode 100644 index 00000000..c8535ccf --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-granite-r1.csv @@ -0,0 +1,15 @@ +dataset,CL,FL,zs_mse,fs5_mse +etth1,512,96,0.363,0.363 +etth2,512,96,0.286,0.285 +ettm1,512,96,0.415,0.36 +ettm2,512,96,0.186,0.175 +weather,512,96,0.152,0.15 +electricity,512,96,0.17,0.144 +traffic,512,96,0.509,0.401 +etth1,1024,96,0.362,0.36 +etth2,1024,96,0.281,0.28 +ettm1,1024,96,0.387,0.373 +ettm2,1024,96,0.175,0.172 +weather,1024,96,0.152,0.151 +electricity,1024,96,0.156,0.143 +traffic,1024,96,0.458,0.409 diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-granite-r2.csv b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-granite-r2.csv new file mode 100644 index 00000000..c65dbd8e --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-granite-r2.csv @@ -0,0 +1,85 @@ +dataset,CL,FL,zs_mse,fs5_mse +etth1,512,96,0.363,0.362 +etth1,512,192,0.387,0.386 +etth1,512,336,0.402,0.4 +etth1,512,720,0.475, +etth2,512,96,0.276,0.273 +etth2,512,192,0.346,0.345 +etth2,512,336,0.385,0.384 +etth2,512,720,0.419, +ettm1,512,96,0.338,0.341 +ettm1,512,192,0.38,0.373 +ettm1,512,336,0.402,0.389 +ettm1,512,720,0.446,0.442 +ettm2,512,96,0.176,0.176 +ettm2,512,192,0.246,0.242 +ettm2,512,336,0.324,0.315 +ettm2,512,720,0.406,0.398 +weather,512,96,0.15,0.15 +weather,512,192,0.195,0.195 +weather,512,336,0.256,0.247 +weather,512,720,0.319,0.319 +electricity,512,96,0.18,0.145 +electricity,512,192,0.194,0.162 +electricity,512,336,0.213,0.183 +electricity,512,720,0.26,0.231 +traffic,512,96,0.518,0.41 +traffic,512,192,0.538,0.421 +traffic,512,336,0.571,0.44 +traffic,512,720,0.617, +etth1,1024,96,0.359,0.359 +etth1,1024,192,0.389,0.389 +etth1,1024,336,0.409,0.406 +etth1,1024,720,0.462, +etth2,1024,96,0.269,0.269 +etth2,1024,192,0.331,0.331 +etth2,1024,336,0.359,0.359 +etth2,1024,720,0.402, +ettm1,1024,96,0.337,0.336 +ettm1,1024,192,0.387,0.387 +ettm1,1024,336,0.411,0.407 +ettm1,1024,720,0.427,0.427 +ettm2,1024,96,0.176,0.176 +ettm2,1024,192,0.239,0.238 +ettm2,1024,336,0.29,0.289 +ettm2,1024,720,0.391,0.383 +weather,1024,96,0.15,0.15 +weather,1024,192,0.195,0.197 +weather,1024,336,0.245,0.244 +weather,1024,720,0.318,0.316 +electricity,1024,96,0.158,0.147 +electricity,1024,192,0.181,0.173 +electricity,1024,336,0.196,0.197 +electricity,1024,720,0.255,0.286 +traffic,1024,96,0.474,0.418 +traffic,1024,192,0.496,0.435 +traffic,1024,336,0.521,0.461 +traffic,1024,720,0.581, +etth1,1536,96,0.357,0.357 +etth1,1536,192,0.386,0.387 +etth1,1536,336,0.405,0.407 +etth1,1536,720,0.435, +etth2,1536,96,0.274,0.277 +etth2,1536,192,0.331,0.331 +etth2,1536,336,0.356,0.357 +etth2,1536,720,0.392, +ettm1,1536,96,0.327,0.331 +ettm1,1536,192,0.353,0.352 +ettm1,1536,336,0.387,0.386 +ettm1,1536,720,0.399,0.398 +ettm2,1536,96,0.168,0.168 +ettm2,1536,192,0.231,0.23 +ettm2,1536,336,0.276,0.276 +ettm2,1536,720,0.35,0.348 +weather,1536,96,0.15,0.149 +weather,1536,192,0.192,0.192 +weather,1536,336,0.248,0.245 +weather,1536,720,0.316,0.308 +electricity,1536,96,0.155,0.138 +electricity,1536,192,0.178,0.158 +electricity,1536,336,0.197,0.176 +electricity,1536,720,0.253,0.244 +traffic,1536,96,0.463,0.466 +traffic,1536,192,0.488,0.488 +traffic,1536,336,0.504,0.509 +traffic,1536,720,0.566, diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-research-use-r2.csv b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-research-use-r2.csv new file mode 100644 index 00000000..fea3fcba --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/combined_results-research-use-r2.csv @@ -0,0 +1,85 @@ +dataset,CL,FL,zs_mse,fs5_mse +etth1,512,96,0.364,0.364 +etth1,512,192,0.386,0.387 +etth1,512,336,0.404,0.398 +etth1,512,720,0.424, +etth2,512,96,0.277,0.277 +etth2,512,192,0.334,0.334 +etth2,512,336,0.362,0.361 +etth2,512,720,0.408, +ettm1,512,96,0.322,0.311 +ettm1,512,192,0.376,0.359 +ettm1,512,336,0.407,0.396 +ettm1,512,720,0.439,0.437 +ettm2,512,96,0.171,0.171 +ettm2,512,192,0.238,0.231 +ettm2,512,336,0.304,0.293 +ettm2,512,720,0.41,0.394 +weather,512,96,0.158,0.153 +weather,512,192,0.206,0.204 +weather,512,336,0.256,0.252 +weather,512,720,0.328,0.327 +electricity,512,96,0.166,0.146 +electricity,512,192,0.191,0.164 +electricity,512,336,0.207,0.185 +electricity,512,720,0.255,0.238 +traffic,512,96,0.514,0.418 +traffic,512,192,0.544,0.425 +traffic,512,336,0.575,0.458 +traffic,512,720,0.622, +etth1,1024,96,0.363,0.363 +etth1,1024,192,0.393,0.393 +etth1,1024,336,0.406,0.4 +etth1,1024,720,0.452, +etth2,1024,96,0.271,0.271 +etth2,1024,192,0.324,0.325 +etth2,1024,336,0.357,0.357 +etth2,1024,720,0.388, +ettm1,1024,96,0.327,0.328 +ettm1,1024,192,0.377,0.372 +ettm1,1024,336,0.395,0.4 +ettm1,1024,720,0.419,0.417 +ettm2,1024,96,0.178,0.178 +ettm2,1024,192,0.238,0.237 +ettm2,1024,336,0.29,0.286 +ettm2,1024,720,0.379,0.375 +weather,1024,96,0.166,0.165 +weather,1024,192,0.214,0.213 +weather,1024,336,0.254,0.262 +weather,1024,720,0.319,0.32 +electricity,1024,96,0.157,0.148 +electricity,1024,192,0.174,0.168 +electricity,1024,336,0.195,0.2 +electricity,1024,720,0.25,0.289 +traffic,1024,96,0.476,0.414 +traffic,1024,192,0.5,0.438 +traffic,1024,336,0.51,0.456 +traffic,1024,720,0.571, +etth1,1536,96,0.359,0.359 +etth1,1536,192,0.389,0.393 +etth1,1536,336,0.405,0.406 +etth1,1536,720,0.448, +etth2,1536,96,0.264,0.269 +etth2,1536,192,0.321,0.321 +etth2,1536,336,0.351,0.355 +etth2,1536,720,0.395, +ettm1,1536,96,0.318,0.317 +ettm1,1536,192,0.354,0.355 +ettm1,1536,336,0.376,0.375 +ettm1,1536,720,0.398,0.398 +ettm2,1536,96,0.169,0.169 +ettm2,1536,192,0.223,0.222 +ettm2,1536,336,0.276,0.274 +ettm2,1536,720,0.342,0.341 +weather,1536,96,0.159,0.155 +weather,1536,192,0.203,0.201 +weather,1536,336,0.247,0.243 +weather,1536,720,0.314,0.315 +electricity,1536,96,0.152,0.14 +electricity,1536,192,0.179,0.159 +electricity,1536,336,0.193,0.179 +electricity,1536,720,0.243,0.241 +traffic,1536,96,0.462,0.469 +traffic,1536,192,0.491,0.491 +traffic,1536,336,0.509,0.509 +traffic,1536,720,0.547, diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/granite-r1.sh b/notebooks/hfdemo/tinytimemixer/full_benchmarking/granite-r1.sh new file mode 100644 index 00000000..f4542123 --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/granite-r1.sh @@ -0,0 +1,9 @@ +data_root_path=$1 +for cl in 512 1024; do + for fl in 96; do + python ttm_full_benchmarking.py --context_length $cl --forecast_length $fl --num_epochs 50 --num_workers 16 \ + --hf_model_path ibm-granite/granite-timeseries-ttm-r1 \ + --data_root_path $data_root_path \ + --save_dir results-granite-r1/ + done; +done; diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/granite-r2.sh b/notebooks/hfdemo/tinytimemixer/full_benchmarking/granite-r2.sh new file mode 100644 index 00000000..cdfc2d75 --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/granite-r2.sh @@ -0,0 +1,9 @@ +data_root_path=$1 +for cl in 512 1024 1536; do + for fl in 96 192 336 720; do + python ttm_full_benchmarking.py --context_length $cl --forecast_length $fl --num_epochs 50 --num_workers 16 \ + --hf_model_path ibm-granite/granite-timeseries-ttm-r2 \ + --data_root_path $data_root_path \ + --save_dir results-granite-r2/ + done; +done; diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/research-use-r2.sh b/notebooks/hfdemo/tinytimemixer/full_benchmarking/research-use-r2.sh new file mode 100644 index 00000000..3897f736 --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/research-use-r2.sh @@ -0,0 +1,10 @@ +data_root_path=$1 +for cl in 512 1024 1536; do + for fl in 96 192 336 720; do + python ttm_full_benchmarking.py --context_length $cl --forecast_length $fl \ + --num_epochs 50 --num_workers 16 --enable_prefix_tuning 1 \ + --hf_model_path ibm/ttm-research-r2 \ + --data_root_path $data_root_path \ + --save_dir results-research-use-r2/ + done; +done; diff --git a/notebooks/hfdemo/tinytimemixer/research_use/ttm-r2_freq_benchmarking_1024_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/research_use/ttm-r2_freq_benchmarking_1024_96.ipynb similarity index 100% rename from notebooks/hfdemo/tinytimemixer/research_use/ttm-r2_freq_benchmarking_1024_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/research_use/ttm-r2_freq_benchmarking_1024_96.ipynb diff --git a/notebooks/hfdemo/tinytimemixer/research_use/ttm-r2_freq_benchmarking_1536_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/research_use/ttm-r2_freq_benchmarking_1536_96.ipynb similarity index 100% rename from notebooks/hfdemo/tinytimemixer/research_use/ttm-r2_freq_benchmarking_1536_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/research_use/ttm-r2_freq_benchmarking_1536_96.ipynb diff --git a/notebooks/hfdemo/tinytimemixer/research_use/ttm-r2_freq_benchmarking_512_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/research_use/ttm-r2_freq_benchmarking_512_96.ipynb similarity index 100% rename from notebooks/hfdemo/tinytimemixer/research_use/ttm-r2_freq_benchmarking_512_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/research_use/ttm-r2_freq_benchmarking_512_96.ipynb diff --git a/notebooks/hfdemo/tinytimemixer/ttm-r1_benchmarking_1024_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r1_benchmarking_1024_96.ipynb similarity index 90% rename from notebooks/hfdemo/tinytimemixer/ttm-r1_benchmarking_1024_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r1_benchmarking_1024_96.ipynb index 808e4352..9c1889f3 100644 --- a/notebooks/hfdemo/tinytimemixer/ttm-r1_benchmarking_1024_96.ipynb +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r1_benchmarking_1024_96.ipynb @@ -9,17 +9,13 @@ "\n", "**Using TTM-1024-96 model.**\n", "\n", - "Pre-trained TTM models will be fetched from the [Hugging Face TTM Model Repository](ibm-granite/granite-timeseries-ttm-r1).\n", + "Pre-trained TTM models will be fetched from the [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1).\n", "\n", - "1. TTM-R1 pre-trained models can be found here: [TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024_96_v1\"`\n", - "2. TTM-R2 pre-trained models can be found here: [TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024-96-r2\"`\n", - " 3. For 1536-96 model set `TTM_MODEL_REVISION=\"1536-96-r2\"`\n", + "For details, visit the [Hugging Face TTM Model Repository](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "Details about the revisions (R1 and R2) can be found [here](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)." + "1. IBM Granite TTM-R1 pre-trained models can be found here: [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", + "2. IBM Granite TTM-R2 pre-trained models can be found here: [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", + "3. Research-use (non-commercial use only) TTM-R2 pre-trained models can be found here: [Research-Use-TTM-R2](https://huggingface.co/ibm/ttm-research-r2)" ] }, { @@ -39,10 +35,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-10 07:33:33.458902: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2024-10-10 07:33:33.499290: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-05 01:50:13.619224: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-11-05 01:50:13.658544: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-10 07:33:34.206046: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-11-05 01:50:14.553147: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", " warn(f\"Failed to load image Python extension: {e}\")\n" ] @@ -58,7 +54,8 @@ "from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed\n", "from transformers.integrations import INTEGRATION_TO_CALLBACK\n", "\n", - "from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public import TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public.toolkit.get_model import get_model\n", "from tsfm_public.toolkit.visualization import plot_predictions" ] }, @@ -128,7 +125,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Get model path" + "## Set model path" ] }, { @@ -137,13 +134,7 @@ "metadata": {}, "outputs": [], "source": [ - "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r1\"\n", - "if context_length == 512:\n", - " hf_model_branch = \"main\"\n", - "elif context_length == 1024:\n", - " hf_model_branch = \"1024_96_v1\"\n", - "else:\n", - " raise ValueError(\"Current supported context lengths are 512 and 1024. Stay tuned for more TTMs!\")" + "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r1\"" ] }, { @@ -163,8 +154,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: etth1, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 7521, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -174,43 +164,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = etth1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/1024_96_v1\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "078dee5f93884e67b7b6e11fda57e305", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "config.json: 0%| | 0.00/1.19k [00:00\n", " \n", " \n", - " [ 85/250 00:36 < 01:12, 2.27 it/s, Epoch 17/50]\n", + " [ 85/250 00:36 < 01:13, 2.25 it/s, Epoch 17/50]\n", " \n", " \n", " \n", @@ -477,7 +424,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.9880044320050407 seconds, Total Train Time = 38.51875972747803\n", + "[TrackingCallback] Mean Epoch Time = 0.9482389057383818 seconds, Total Train Time = 39.05620765686035\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -519,7 +466,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3614313006401062, 'eval_runtime': 1.2179, 'eval_samples_per_second': 2286.638, 'eval_steps_per_second': 36.126, 'epoch': 17.0}\n", + "{'eval_loss': 0.3614313006401062, 'eval_runtime': 1.1398, 'eval_samples_per_second': 2443.306, 'eval_steps_per_second': 38.602, 'epoch': 17.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -527,8 +474,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: etth1, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 666, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -547,8 +493,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -560,7 +505,7 @@ "
\n", " \n", " \n", - " [143/550 00:28 < 01:22, 4.91 it/s, Epoch 13/50]\n", + " [143/550 00:28 < 01:23, 4.89 it/s, Epoch 13/50]\n", "
\n", "
\n", " \n", @@ -706,7 +651,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.0025265216827393 seconds, Total Train Time = 29.51686191558838\n", + "[TrackingCallback] Mean Epoch Time = 1.0440415602463942 seconds, Total Train Time = 29.59992265701294\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -748,7 +693,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.36271077394485474, 'eval_runtime': 1.2248, 'eval_samples_per_second': 2273.762, 'eval_steps_per_second': 35.923, 'epoch': 13.0}\n", + "{'eval_loss': 0.36271077394485474, 'eval_runtime': 1.1134, 'eval_samples_per_second': 2501.438, 'eval_steps_per_second': 39.52, 'epoch': 13.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -756,8 +701,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: etth2, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 7521, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -769,15 +713,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = etth2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/1024_96_v1\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -809,7 +752,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.280693918466568, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 0.7537, 'eval_samples_per_second': 3695.306, 'eval_steps_per_second': 58.382}\n", + "{'eval_loss': 0.280693918466568, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 0.9381, 'eval_samples_per_second': 2968.845, 'eval_steps_per_second': 46.905}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -817,31 +760,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: etth2, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 285, val = 2785, test = 2785\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-------------------- Running few-shot 5% --------------------\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", - " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 946336\n", "Number of params after freezing the backbone 389984\n", "Using learning rate = 0.001\n" @@ -851,6 +777,9 @@ "name": "stderr", "output_type": "stream", "text": [ + "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", + " warnings.warn(\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -862,7 +791,7 @@ "
\n", " \n", " \n", - " [ 55/250 00:23 < 01:27, 2.23 it/s, Epoch 11/50]\n", + " [ 55/250 00:23 < 01:24, 2.30 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -990,7 +919,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.941808743910356 seconds, Total Train Time = 24.5705406665802\n", + "[TrackingCallback] Mean Epoch Time = 0.8974325223402544 seconds, Total Train Time = 23.770314931869507\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1032,7 +961,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.2801705598831177, 'eval_runtime': 1.1876, 'eval_samples_per_second': 2345.043, 'eval_steps_per_second': 37.049, 'epoch': 11.0}\n", + "{'eval_loss': 0.2801705598831177, 'eval_runtime': 1.1153, 'eval_samples_per_second': 2497.161, 'eval_steps_per_second': 39.452, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1040,8 +969,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: etth2, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 666, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1060,8 +988,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -1073,7 +1000,7 @@ "
\n", " \n", " \n", - " [132/550 00:27 < 01:28, 4.72 it/s, Epoch 12/50]\n", + " [132/550 00:25 < 01:23, 5.00 it/s, Epoch 12/50]\n", "
\n", "
\n", " \n", @@ -1210,7 +1137,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.0485320885976155 seconds, Total Train Time = 28.39580202102661\n", + "[TrackingCallback] Mean Epoch Time = 0.9633900125821432 seconds, Total Train Time = 26.702669858932495\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1252,7 +1179,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.28046759963035583, 'eval_runtime': 1.1773, 'eval_samples_per_second': 2365.604, 'eval_steps_per_second': 37.374, 'epoch': 12.0}\n", + "{'eval_loss': 0.28046759963035583, 'eval_runtime': 1.1216, 'eval_samples_per_second': 2483.164, 'eval_steps_per_second': 39.231, 'epoch': 12.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1260,8 +1187,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 33441, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1274,15 +1200,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = ettm1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/1024_96_v1\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1314,7 +1239,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.38726314902305603, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 3.0352, 'eval_samples_per_second': 3764.195, 'eval_steps_per_second': 58.975}\n", + "{'eval_loss': 0.38726314902305603, 'eval_model_preparation_time': 0.002, 'eval_runtime': 3.0815, 'eval_samples_per_second': 3707.636, 'eval_steps_per_second': 58.089}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1322,8 +1247,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 1581, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1339,8 +1263,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1367,7 +1290,7 @@ "
\n", " \n", " \n", - " [ 375/1250 00:52 < 02:02, 7.16 it/s, Epoch 15/50]\n", + " [ 375/1250 00:50 < 01:58, 7.40 it/s, Epoch 15/50]\n", "
\n", "
\n", " \n", @@ -1531,7 +1454,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.2938549836476645 seconds, Total Train Time = 53.02472805976868\n", + "[TrackingCallback] Mean Epoch Time = 1.2302387396494547 seconds, Total Train Time = 51.18072175979614\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1573,7 +1496,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3715095520019531, 'eval_runtime': 2.1521, 'eval_samples_per_second': 5308.838, 'eval_steps_per_second': 83.176, 'epoch': 15.0}\n", + "{'eval_loss': 0.3715095520019531, 'eval_runtime': 2.2124, 'eval_samples_per_second': 5163.992, 'eval_steps_per_second': 80.906, 'epoch': 15.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1581,8 +1504,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 3258, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1598,8 +1520,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1626,7 +1547,7 @@ "
\n", " \n", " \n", - " [ 714/2550 00:52 < 02:16, 13.45 it/s, Epoch 14/50]\n", + " [ 714/2550 00:51 < 02:13, 13.78 it/s, Epoch 14/50]\n", "
\n", "
\n", " \n", @@ -1781,7 +1702,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.6105220488139562 seconds, Total Train Time = 53.82016968727112\n", + "[TrackingCallback] Mean Epoch Time = 1.6246584824153356 seconds, Total Train Time = 52.44022512435913\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1823,7 +1744,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.37059730291366577, 'eval_runtime': 2.1347, 'eval_samples_per_second': 5351.977, 'eval_steps_per_second': 83.852, 'epoch': 14.0}\n", + "{'eval_loss': 0.37059730291366577, 'eval_runtime': 2.1998, 'eval_samples_per_second': 5193.564, 'eval_steps_per_second': 81.37, 'epoch': 14.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1831,8 +1752,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 33441, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1846,15 +1766,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = ettm2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/1024_96_v1\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1886,7 +1805,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17503736913204193, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 2.9526, 'eval_samples_per_second': 3869.42, 'eval_steps_per_second': 60.624}\n", + "{'eval_loss': 0.17503736913204193, 'eval_model_preparation_time': 0.002, 'eval_runtime': 3.0285, 'eval_samples_per_second': 3772.489, 'eval_steps_per_second': 59.105}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1894,8 +1813,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 1581, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1911,8 +1829,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1939,7 +1856,7 @@ "
\n", " \n", " \n", - " [ 275/1250 00:37 < 02:13, 7.32 it/s, Epoch 11/50]\n", + " [ 275/1250 00:36 < 02:09, 7.52 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -2067,7 +1984,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.262766101143577 seconds, Total Train Time = 38.181320667266846\n", + "[TrackingCallback] Mean Epoch Time = 1.2246154004877263 seconds, Total Train Time = 37.09647488594055\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2109,7 +2026,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17288224399089813, 'eval_runtime': 2.1253, 'eval_samples_per_second': 5375.828, 'eval_steps_per_second': 84.225, 'epoch': 11.0}\n", + "{'eval_loss': 0.17288224399089813, 'eval_runtime': 2.0563, 'eval_samples_per_second': 5555.966, 'eval_steps_per_second': 87.048, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -2117,8 +2034,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 3258, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -2134,8 +2050,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2162,7 +2077,7 @@ "
\n", " \n", " \n", - " [ 561/2550 00:41 < 02:28, 13.40 it/s, Epoch 11/50]\n", + " [ 561/2550 00:40 < 02:25, 13.70 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -2290,7 +2205,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.63915508443659 seconds, Total Train Time = 42.55753254890442\n", + "[TrackingCallback] Mean Epoch Time = 1.6438330086794766 seconds, Total Train Time = 41.61775827407837\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2332,7 +2247,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1721040904521942, 'eval_runtime': 2.1601, 'eval_samples_per_second': 5289.139, 'eval_steps_per_second': 82.867, 'epoch': 11.0}\n", + "{'eval_loss': 0.1721040904521942, 'eval_runtime': 2.1992, 'eval_samples_per_second': 5195.151, 'eval_steps_per_second': 81.394, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -2340,8 +2255,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: weather, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 35768, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -2356,15 +2270,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = weather, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/1024_96_v1\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2381,7 +2294,7 @@ "
\n", " \n", " \n", - " [164/164 00:05]\n", + " [164/164 00:04]\n", "
\n", " " ], @@ -2396,7 +2309,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15184031426906586, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 5.133, 'eval_samples_per_second': 2034.658, 'eval_steps_per_second': 31.95}\n", + "{'eval_loss': 0.15184031426906586, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 4.9387, 'eval_samples_per_second': 2114.708, 'eval_steps_per_second': 33.207}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -2404,8 +2317,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: weather, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 1698, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -2421,8 +2333,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2449,7 +2360,7 @@ "
\n", " \n", " \n", - " [ 297/1350 00:43 < 02:34, 6.83 it/s, Epoch 11/50]\n", + " [ 297/1350 00:41 < 02:28, 7.07 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -2577,7 +2488,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.6853359612551602 seconds, Total Train Time = 44.3294575214386\n", + "[TrackingCallback] Mean Epoch Time = 1.5865312923084607 seconds, Total Train Time = 42.57614731788635\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2619,7 +2530,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1506919413805008, 'eval_runtime': 3.4903, 'eval_samples_per_second': 2992.315, 'eval_steps_per_second': 46.988, 'epoch': 11.0}\n", + "{'eval_loss': 0.1506919413805008, 'eval_runtime': 3.4423, 'eval_samples_per_second': 3034.011, 'eval_steps_per_second': 47.642, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -2627,8 +2538,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: weather, context length: 1024, prediction length 96\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 3491, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -2644,8 +2554,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2672,7 +2581,7 @@ "
\n", " \n", " \n", - " [ 605/2750 00:51 < 03:04, 11.64 it/s, Epoch 11/50]\n", + " [ 605/2750 00:50 < 02:58, 11.99 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -2800,7 +2709,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 2.416881713000211 seconds, Total Train Time = 52.82250738143921\n", + "[TrackingCallback] Mean Epoch Time = 2.335432356054133 seconds, Total Train Time = 51.203959941864014\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2842,21 +2751,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15016287565231323, 'eval_runtime': 3.5776, 'eval_samples_per_second': 2919.276, 'eval_steps_per_second': 45.841, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: electricity, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.15016287565231323, 'eval_runtime': 3.5628, 'eval_samples_per_second': 2931.377, 'eval_steps_per_second': 46.031, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.362 0.361 0.363\n", "1 etth2 0.281 0.280 0.280\n", @@ -2866,22 +2762,21 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = electricity, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/1024_96_v1\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 17293, val = 2537, test = 5165\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2898,7 +2793,7 @@ "
\n", " \n", " \n", - " [162/162 00:24]\n", + " [162/162 00:23]\n", "
\n", " " ], @@ -2913,21 +2808,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1555725336074829, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 24.5596, 'eval_samples_per_second': 210.305, 'eval_steps_per_second': 6.596}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: electricity, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.1555725336074829, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 24.0761, 'eval_samples_per_second': 214.529, 'eval_steps_per_second': 6.729}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -2935,17 +2817,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 774, val = 2537, test = 5165\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2972,7 +2853,7 @@ "
\n", " \n", " \n", - " [1250/1250 12:32, Epoch 50/50]\n", + " [1250/1250 12:48, Epoch 50/50]\n", "
\n", "
\n", " \n", @@ -3455,7 +3336,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 5.032508630752563 seconds, Total Train Time = 755.1325304508209\n", + "[TrackingCallback] Mean Epoch Time = 5.149650845527649 seconds, Total Train Time = 771.8066599369049\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -3497,21 +3378,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.14543357491493225, 'eval_runtime': 18.4848, 'eval_samples_per_second': 279.418, 'eval_steps_per_second': 8.764, 'epoch': 50.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: electricity, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.14543357491493225, 'eval_runtime': 18.8494, 'eval_samples_per_second': 274.014, 'eval_steps_per_second': 8.594, 'epoch': 50.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 10% --------------------\n" ] }, @@ -3519,17 +3387,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 1643, val = 2537, test = 5165\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -3556,7 +3423,7 @@ "
\n", " \n", " \n", - " [2288/2600 13:55 < 01:54, 2.73 it/s, Epoch 44/50]\n", + " [2288/2600 13:59 < 01:54, 2.72 it/s, Epoch 44/50]\n", "
\n", "
\n", " \n", @@ -3981,7 +3848,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 8.928198153322393 seconds, Total Train Time = 837.7255702018738\n", + "[TrackingCallback] Mean Epoch Time = 8.964020311832428 seconds, Total Train Time = 841.2196242809296\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -4023,21 +3890,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.13808377087116241, 'eval_runtime': 18.4352, 'eval_samples_per_second': 280.17, 'eval_steps_per_second': 8.788, 'epoch': 44.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: traffic, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.13808377087116241, 'eval_runtime': 18.7227, 'eval_samples_per_second': 275.868, 'eval_steps_per_second': 8.653, 'epoch': 44.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.362 0.361 0.363\n", "1 etth2 0.281 0.280 0.280\n", @@ -4048,22 +3902,21 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = traffic, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/1024_96_v1\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 11161, val = 1661, test = 3413\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -4080,7 +3933,7 @@ "
\n", " \n", " \n", - " [427/427 00:43]\n", + " [427/427 00:41]\n", "
\n", " " ], @@ -4095,21 +3948,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4576044976711273, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 43.9397, 'eval_samples_per_second': 77.675, 'eval_steps_per_second': 9.718}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: traffic, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.4576044976711273, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 41.4647, 'eval_samples_per_second': 82.311, 'eval_steps_per_second': 10.298}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -4117,17 +3957,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 467, val = 1661, test = 3413\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", - " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + " warnings.warn(\n" ] }, { @@ -4143,6 +3981,7 @@ "name": "stderr", "output_type": "stream", "text": [ + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -4154,7 +3993,7 @@ "
\n", " \n", " \n", - " [1652/2950 11:12 < 08:48, 2.45 it/s, Epoch 28/50]\n", + " [1652/2950 11:17 < 08:53, 2.43 it/s, Epoch 28/50]\n", "
\n", "
\n", " \n", @@ -4435,7 +4274,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 7.41969324861254 seconds, Total Train Time = 674.0370872020721\n", + "[TrackingCallback] Mean Epoch Time = 7.461020086492811 seconds, Total Train Time = 679.5133337974548\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -4477,21 +4316,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4156947731971741, 'eval_runtime': 32.3569, 'eval_samples_per_second': 105.48, 'eval_steps_per_second': 13.197, 'epoch': 28.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Dataset name: traffic, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.4156947731971741, 'eval_runtime': 32.2478, 'eval_samples_per_second': 105.837, 'eval_steps_per_second': 13.241, 'epoch': 28.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 10% --------------------\n" ] }, @@ -4499,17 +4325,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3144901:t-23206823609088:data_handling.py:load_dataset:Data lengths: train = 1030, val = 1661, test = 3413\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3144901:t-23206823609088:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3144901:t-23206823609088:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -4536,7 +4361,7 @@ "
\n", " \n", " \n", - " [2064/6450 08:16 < 17:35, 4.16 it/s, Epoch 16/50]\n", + " [2064/6450 08:15 < 17:32, 4.17 it/s, Epoch 16/50]\n", "
\n", "
\n", " \n", @@ -4709,7 +4534,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 14.462455168366432 seconds, Total Train Time = 497.8177742958069\n", + "[TrackingCallback] Mean Epoch Time = 14.446707516908646 seconds, Total Train Time = 496.55380868911743\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -4751,7 +4576,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.41844481229782104, 'eval_runtime': 32.5373, 'eval_samples_per_second': 104.895, 'eval_steps_per_second': 13.123, 'epoch': 16.0}\n", + "{'eval_loss': 0.41844481229782104, 'eval_runtime': 32.4527, 'eval_samples_per_second': 105.168, 'eval_steps_per_second': 13.158, 'epoch': 16.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.362 0.361 0.363\n", @@ -4785,7 +4610,7 @@ " print(\n", " f\"Running zero-shot/few-shot for TTM-{context_length} on dataset = {DATASET}, forecast_len = {forecast_length}\"\n", " )\n", - " print(f\"Model will be loaded from {hf_model_path}/{hf_model_branch}\")\n", + " print(f\"Model will be loaded from {hf_model_path}\")\n", " SUBDIR = f\"{OUT_DIR}/{DATASET}\"\n", "\n", " # Set batch size\n", @@ -4803,7 +4628,7 @@ " ##### Use the pretrained model in zero-shot forecasting #####\n", " #############################################################\n", " # Load model\n", - " zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(hf_model_path, revision=hf_model_branch)\n", + " zeroshot_model = get_model(hf_model_path, context_length=context_length, prediction_length=forecast_length)\n", "\n", " # zeroshot_trainer\n", " zeroshot_trainer = Trainer(\n", @@ -4853,12 +4678,12 @@ "\n", " # change head dropout to 0.7 for ett datasets\n", " if \"ett\" in DATASET:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch, head_dropout=0.7\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length, head_dropout=0.7\n", " )\n", " else:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length\n", " )\n", "\n", " if freeze_backbone:\n", @@ -5016,11 +4841,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5030,11 +4855,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5044,11 +4869,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5058,11 +4883,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5072,11 +4897,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5086,11 +4911,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5100,11 +4925,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5114,22 +4939,22 @@ ], "text/plain": [ " dataset zs_mse fs5_mse fs10_mse zs_eval_time fs5_mean_epoch_time \\\n", - "0 etth1 0.362 0.361 0.363 1.778 0.988 \n", - "1 etth2 0.281 0.280 0.280 0.754 0.942 \n", - "2 ettm1 0.387 0.372 0.371 3.035 1.294 \n", - "3 ettm2 0.175 0.173 0.172 2.953 1.263 \n", - "4 weather 0.152 0.151 0.150 5.133 1.685 \n", - "5 electricity 0.156 0.145 0.138 24.560 5.033 \n", - "6 traffic 0.458 0.416 0.418 43.940 7.420 \n", + "0 etth1 0.362 0.361 0.363 1.020 0.948 \n", + "1 etth2 0.281 0.280 0.280 0.938 0.897 \n", + "2 ettm1 0.387 0.372 0.371 3.082 1.230 \n", + "3 ettm2 0.175 0.173 0.172 3.028 1.225 \n", + "4 weather 0.152 0.151 0.150 4.939 1.587 \n", + "5 electricity 0.156 0.145 0.138 24.076 5.150 \n", + "6 traffic 0.458 0.416 0.418 41.465 7.461 \n", "\n", " fs5_total_train_time fs10_mean_epoch_time fs10_total_train_time \\\n", - "0 38.519 1.003 29.517 \n", - "1 24.571 1.049 28.396 \n", - "2 53.025 1.611 53.820 \n", - "3 38.181 1.639 42.558 \n", - "4 44.329 2.417 52.823 \n", - "5 755.133 8.928 837.726 \n", - "6 674.037 14.462 497.818 \n", + "0 39.056 1.044 29.600 \n", + "1 23.770 0.963 26.703 \n", + "2 51.181 1.625 52.440 \n", + "3 37.096 1.644 41.618 \n", + "4 42.576 2.335 51.204 \n", + "5 771.807 8.964 841.220 \n", + "6 679.513 14.447 496.554 \n", "\n", " fs5_best_val_metric fs10_best_val_metric \n", "0 0.658 0.663 \n", diff --git a/notebooks/hfdemo/tinytimemixer/ttm-r1_benchmarking_512_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r1_benchmarking_512_96.ipynb similarity index 91% rename from notebooks/hfdemo/tinytimemixer/ttm-r1_benchmarking_512_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r1_benchmarking_512_96.ipynb index 661972d5..b1ea36c8 100644 --- a/notebooks/hfdemo/tinytimemixer/ttm-r1_benchmarking_512_96.ipynb +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r1_benchmarking_512_96.ipynb @@ -9,17 +9,13 @@ "\n", "**Using TTM-512-96 model.**\n", "\n", - "Pre-trained TTM models will be fetched from the [Hugging Face TTM Model Repository](ibm-granite/granite-timeseries-ttm-r1).\n", + "Pre-trained TTM models will be fetched from the [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1).\n", "\n", - "1. TTM-R1 pre-trained models can be found here: [TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024_96_v1\"`\n", - "2. TTM-R2 pre-trained models can be found here: [TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024-96-r2\"`\n", - " 3. For 1536-96 model set `TTM_MODEL_REVISION=\"1536-96-r2\"`\n", + "For details, visit the [Hugging Face TTM Model Repository](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "Details about the revisions (R1 and R2) can be found [here](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)." + "1. IBM Granite TTM-R1 pre-trained models can be found here: [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", + "2. IBM Granite TTM-R2 pre-trained models can be found here: [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", + "3. Research-use (non-commercial use only) TTM-R2 pre-trained models can be found here: [Research-Use-TTM-R2](https://huggingface.co/ibm/ttm-research-r2)" ] }, { @@ -39,10 +35,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-10 07:30:29.873090: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2024-10-10 07:30:29.910301: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-04 11:03:01.066287: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-11-04 11:03:01.102168: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-10 07:30:30.926289: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-11-04 11:03:01.950652: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", " warn(f\"Failed to load image Python extension: {e}\")\n" ] @@ -58,7 +54,8 @@ "from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed\n", "from transformers.integrations import INTEGRATION_TO_CALLBACK\n", "\n", - "from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public import TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public.toolkit.get_model import get_model\n", "from tsfm_public.toolkit.visualization import plot_predictions" ] }, @@ -128,7 +125,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Get model path" + "## Set model path" ] }, { @@ -137,13 +134,7 @@ "metadata": {}, "outputs": [], "source": [ - "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r1\"\n", - "if context_length == 512:\n", - " hf_model_branch = \"main\"\n", - "elif context_length == 1024:\n", - " hf_model_branch = \"1024_96_v1\"\n", - "else:\n", - " raise ValueError(\"Current supported context lengths are 512 and 1024. Stay tuned for more TTMs!\")" + "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r1\"" ] }, { @@ -156,15 +147,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 8033, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -174,15 +164,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = etth1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -214,7 +203,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.36317431926727295, 'eval_model_preparation_time': 0.0026, 'eval_runtime': 1.7392, 'eval_samples_per_second': 1601.334, 'eval_steps_per_second': 25.299}\n", + "{'eval_loss': 0.36317431926727295, 'eval_model_preparation_time': 0.0029, 'eval_runtime': 0.9736, 'eval_samples_per_second': 2860.486, 'eval_steps_per_second': 45.193}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -222,31 +211,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-------------------- Running few-shot 5% --------------------\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 311, val = 2785, test = 2785\n", - "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", - " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n", "Using learning rate = 0.001\n" @@ -256,6 +228,9 @@ "name": "stderr", "output_type": "stream", "text": [ + "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", + " warnings.warn(\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -267,7 +242,7 @@ "
\n", " \n", " \n", - " [ 55/250 00:18 < 01:09, 2.80 it/s, Epoch 11/50]\n", + " [ 55/250 00:18 < 01:07, 2.89 it/s, Epoch 11/50]\n", "
\n", "
0.3620.3610.3631.7780.98838.5191.00329.5171.0200.94839.0561.04429.6000.6580.663
0.2810.2800.2800.7540.94224.5711.04928.3960.9380.89723.7700.96326.7030.2240.223
0.3870.3720.3713.0351.29453.0251.61153.8203.0821.23051.1811.62552.4400.4090.408
0.1750.1730.1722.9531.26338.1811.63942.5583.0281.22537.0961.64441.6180.1210.121
0.1520.1510.1505.1331.68544.3292.41752.8234.9391.58742.5762.33551.2040.4190.424
0.1560.1450.13824.5605.033755.1338.928837.72624.0765.150771.8078.964841.2200.1120.109
0.4580.4160.41843.9407.420674.03714.462497.81841.4657.461679.51314.447496.5540.3450.343
\n", " \n", @@ -395,7 +370,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.876734278418801 seconds, Total Train Time = 20.82619071006775\n", + "[TrackingCallback] Mean Epoch Time = 0.8337935317646373 seconds, Total Train Time = 20.22931718826294\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -437,7 +412,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.363126665353775, 'eval_runtime': 0.9014, 'eval_samples_per_second': 3089.797, 'eval_steps_per_second': 48.815, 'epoch': 11.0}\n", + "{'eval_loss': 0.363126665353775, 'eval_runtime': 0.9143, 'eval_samples_per_second': 3045.94, 'eval_steps_per_second': 48.123, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -445,15 +420,29 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 717, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------- Running few-shot 10% --------------------\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", + " warnings.warn(\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "-------------------- Running few-shot 10% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n", "Using learning rate = 0.001\n" @@ -463,10 +452,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", - " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -478,7 +463,7 @@ "
\n", " \n", " \n", - " [204/600 00:31 < 01:01, 6.43 it/s, Epoch 17/50]\n", + " [204/600 00:31 < 01:01, 6.42 it/s, Epoch 17/50]\n", "
\n", "
\n", " \n", @@ -660,7 +645,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.8558026341830983 seconds, Total Train Time = 32.004756927490234\n", + "[TrackingCallback] Mean Epoch Time = 0.8514599239124971 seconds, Total Train Time = 32.145331621170044\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -702,7 +687,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.36420342326164246, 'eval_runtime': 0.9572, 'eval_samples_per_second': 2909.495, 'eval_steps_per_second': 45.967, 'epoch': 17.0}\n", + "{'eval_loss': 0.36420342326164246, 'eval_runtime': 1.0758, 'eval_samples_per_second': 2588.887, 'eval_steps_per_second': 40.902, 'epoch': 17.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -710,7 +695,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: etth2, context length: 512, prediction length 96\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -722,16 +707,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = etth2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 8033, val = 2785, test = 2785\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -763,7 +746,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.28556713461875916, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 0.8802, 'eval_samples_per_second': 3163.949, 'eval_steps_per_second': 49.987}\n", + "{'eval_loss': 0.28556713461875916, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 0.6097, 'eval_samples_per_second': 4567.715, 'eval_steps_per_second': 72.165}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -771,15 +754,29 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: etth2, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 311, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------- Running few-shot 5% --------------------\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", + " warnings.warn(\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n", "Using learning rate = 0.001\n" @@ -789,10 +786,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", - " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -804,7 +797,7 @@ "
\n", " \n", " \n", - " [ 60/250 00:21 < 01:10, 2.70 it/s, Epoch 12/50]\n", + " [ 60/250 00:20 < 01:07, 2.81 it/s, Epoch 12/50]\n", "
\n", "
\n", " \n", @@ -941,7 +934,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.8081231911977133 seconds, Total Train Time = 22.086035013198853\n", + "[TrackingCallback] Mean Epoch Time = 0.7450649539629618 seconds, Total Train Time = 21.270729780197144\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -983,7 +976,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.2842233180999756, 'eval_runtime': 0.9961, 'eval_samples_per_second': 2795.765, 'eval_steps_per_second': 44.17, 'epoch': 12.0}\n", + "{'eval_loss': 0.2842233180999756, 'eval_runtime': 0.985, 'eval_samples_per_second': 2827.373, 'eval_steps_per_second': 44.669, 'epoch': 12.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -991,15 +984,29 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: etth2, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 717, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------- Running few-shot 10% --------------------\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", + " warnings.warn(\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "-------------------- Running few-shot 10% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n", "Using learning rate = 0.001\n" @@ -1009,10 +1016,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", - " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n" ] @@ -1024,7 +1027,7 @@ "
\n", " \n", " \n", - " [132/600 00:20 < 01:13, 6.39 it/s, Epoch 11/50]\n", + " [132/600 00:20 < 01:12, 6.46 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1152,7 +1155,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.8696485215967352 seconds, Total Train Time = 20.990825176239014\n", + "[TrackingCallback] Mean Epoch Time = 0.8654254133051092 seconds, Total Train Time = 20.800609350204468\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1194,37 +1197,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.2839512526988983, 'eval_runtime': 1.0239, 'eval_samples_per_second': 2720.009, 'eval_steps_per_second': 42.973, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: ettm1, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.2839512526988983, 'eval_runtime': 0.9747, 'eval_samples_per_second': 2857.265, 'eval_steps_per_second': 45.142, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.363 0.363 0.364\n", "1 etth2 0.286 0.284 0.284\n", "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = ettm1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 33953, val = 11425, test = 11425\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1256,7 +1245,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.41525664925575256, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 2.4875, 'eval_samples_per_second': 4592.904, 'eval_steps_per_second': 71.959}\n", + "{'eval_loss': 0.41525664925575256, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 2.4021, 'eval_samples_per_second': 4756.348, 'eval_steps_per_second': 74.52}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1264,8 +1253,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: ettm1, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 1607, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1281,8 +1269,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1518,7 +1505,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.0739147901535033 seconds, Total Train Time = 58.48771643638611\n", + "[TrackingCallback] Mean Epoch Time = 1.0886834502220153 seconds, Total Train Time = 58.44044303894043\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1560,7 +1547,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3644302189350128, 'eval_runtime': 1.8346, 'eval_samples_per_second': 6227.482, 'eval_steps_per_second': 97.568, 'epoch': 20.0}\n", + "{'eval_loss': 0.3644302189350128, 'eval_runtime': 1.8305, 'eval_samples_per_second': 6241.505, 'eval_steps_per_second': 97.788, 'epoch': 20.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1568,8 +1555,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: ettm1, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 3309, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1585,8 +1571,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1613,7 +1598,7 @@ "
\n", " \n", " \n", - " [ 936/2600 01:05 < 01:56, 14.31 it/s, Epoch 18/50]\n", + " [ 936/2600 00:58 < 01:44, 15.97 it/s, Epoch 18/50]\n", "
\n", "
\n", " \n", @@ -1804,7 +1789,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.6178780794143677 seconds, Total Train Time = 65.98268413543701\n", + "[TrackingCallback] Mean Epoch Time = 1.4319277869330511 seconds, Total Train Time = 59.22767448425293\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1846,21 +1831,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.37092921137809753, 'eval_runtime': 2.0838, 'eval_samples_per_second': 5482.726, 'eval_steps_per_second': 85.9, 'epoch': 18.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: ettm2, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.37092921137809753, 'eval_runtime': 1.8111, 'eval_samples_per_second': 6308.151, 'eval_steps_per_second': 98.832, 'epoch': 18.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.363 0.363 0.364\n", "1 etth2 0.286 0.284 0.284\n", @@ -1868,16 +1840,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = ettm2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 33953, val = 11425, test = 11425\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1909,7 +1880,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1860235333442688, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 2.6069, 'eval_samples_per_second': 4382.517, 'eval_steps_per_second': 68.663}\n", + "{'eval_loss': 0.1860235333442688, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 2.6842, 'eval_samples_per_second': 4256.323, 'eval_steps_per_second': 66.685}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1917,8 +1888,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: ettm2, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 1607, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1934,8 +1904,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1962,7 +1931,7 @@ "
\n", " \n", " \n", - " [ 338/1300 00:42 < 02:02, 7.87 it/s, Epoch 13/50]\n", + " [ 338/1300 00:38 < 01:49, 8.82 it/s, Epoch 13/50]\n", "
\n", "
\n", " \n", @@ -2108,7 +2077,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.3028539510873647 seconds, Total Train Time = 43.605464220047\n", + "[TrackingCallback] Mean Epoch Time = 1.1133571588076079 seconds, Total Train Time = 38.805434465408325\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2150,7 +2119,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17499123513698578, 'eval_runtime': 2.0779, 'eval_samples_per_second': 5498.384, 'eval_steps_per_second': 86.145, 'epoch': 13.0}\n", + "{'eval_loss': 0.17499123513698578, 'eval_runtime': 1.8276, 'eval_samples_per_second': 6251.376, 'eval_steps_per_second': 97.943, 'epoch': 13.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -2158,8 +2127,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: ettm2, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 3309, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -2175,8 +2143,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2203,7 +2170,7 @@ "
\n", " \n", " \n", - " [ 624/2600 00:42 < 02:16, 14.51 it/s, Epoch 12/50]\n", + " [ 624/2600 00:38 < 02:03, 15.97 it/s, Epoch 12/50]\n", "
\n", "
\n", " \n", @@ -2340,7 +2307,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.621331552664439 seconds, Total Train Time = 43.78216910362244\n", + "[TrackingCallback] Mean Epoch Time = 1.4364634950955708 seconds, Total Train Time = 39.667726039886475\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2382,21 +2349,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17638568580150604, 'eval_runtime': 2.1272, 'eval_samples_per_second': 5370.977, 'eval_steps_per_second': 84.149, 'epoch': 12.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: weather, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.17638568580150604, 'eval_runtime': 1.7716, 'eval_samples_per_second': 6449.151, 'eval_steps_per_second': 101.041, 'epoch': 12.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.363 0.363 0.364\n", "1 etth2 0.286 0.284 0.284\n", @@ -2405,16 +2359,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = weather, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 36280, val = 5175, test = 10444\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2446,7 +2399,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1524711698293686, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 3.3764, 'eval_samples_per_second': 3093.197, 'eval_steps_per_second': 48.572}\n", + "{'eval_loss': 0.1524711698293686, 'eval_model_preparation_time': 0.002, 'eval_runtime': 3.2365, 'eval_samples_per_second': 3226.894, 'eval_steps_per_second': 50.671}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -2454,8 +2407,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: weather, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 1723, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -2471,8 +2423,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2499,7 +2450,7 @@ "
\n", " \n", " \n", - " [ 351/1350 00:40 < 01:56, 8.55 it/s, Epoch 13/50]\n", + " [ 351/1350 00:36 < 01:44, 9.52 it/s, Epoch 13/50]\n", "
\n", "
\n", " \n", @@ -2645,7 +2596,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.393344255594107 seconds, Total Train Time = 41.77857685089111\n", + "[TrackingCallback] Mean Epoch Time = 1.279330877157358 seconds, Total Train Time = 37.41215801239014\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2687,7 +2638,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15006662905216217, 'eval_runtime': 2.5414, 'eval_samples_per_second': 4109.619, 'eval_steps_per_second': 64.533, 'epoch': 13.0}\n", + "{'eval_loss': 0.15006662905216217, 'eval_runtime': 2.2979, 'eval_samples_per_second': 4545.047, 'eval_steps_per_second': 71.37, 'epoch': 13.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -2695,8 +2646,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: weather, context length: 512, prediction length 96\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 3542, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -2712,8 +2662,7 @@ "text": [ "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2740,7 +2689,7 @@ "
\n", " \n", " \n", - " [ 672/2800 00:43 < 02:18, 15.38 it/s, Epoch 12/50]\n", + " [ 672/2800 00:39 < 02:06, 16.85 it/s, Epoch 12/50]\n", "
\n", "
\n", " \n", @@ -2877,7 +2826,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.9348897337913513 seconds, Total Train Time = 44.50661301612854\n", + "[TrackingCallback] Mean Epoch Time = 1.7539679209391277 seconds, Total Train Time = 40.553303480148315\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2919,21 +2868,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.14866013824939728, 'eval_runtime': 2.442, 'eval_samples_per_second': 4276.86, 'eval_steps_per_second': 67.159, 'epoch': 12.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: electricity, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.14866013824939728, 'eval_runtime': 2.3015, 'eval_samples_per_second': 4538.008, 'eval_steps_per_second': 71.259, 'epoch': 12.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.363 0.363 0.364\n", "1 etth2 0.286 0.284 0.284\n", @@ -2943,22 +2879,21 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = electricity, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 17805, val = 2537, test = 5165\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -2975,7 +2910,7 @@ "
\n", " \n", " \n", - " [162/162 00:13]\n", + " [162/162 00:12]\n", "
\n", " " ], @@ -2990,21 +2925,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17006558179855347, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 14.0713, 'eval_samples_per_second': 367.059, 'eval_steps_per_second': 11.513}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: electricity, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.17006558179855347, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 13.1621, 'eval_samples_per_second': 392.415, 'eval_steps_per_second': 12.308}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -3012,17 +2934,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 800, val = 2537, test = 5165\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -3049,7 +2970,7 @@ "
\n", " \n", " \n", - " [1250/1250 07:29, Epoch 50/50]\n", + " [1250/1250 07:25, Epoch 50/50]\n", "
\n", "
\n", " \n", @@ -3532,7 +3453,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 3.216276993751526 seconds, Total Train Time = 450.9349133968353\n", + "[TrackingCallback] Mean Epoch Time = 3.164230923652649 seconds, Total Train Time = 447.498943567276\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -3574,21 +3495,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1425967961549759, 'eval_runtime': 10.3735, 'eval_samples_per_second': 497.905, 'eval_steps_per_second': 15.617, 'epoch': 50.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: electricity, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.1425967961549759, 'eval_runtime': 10.4587, 'eval_samples_per_second': 493.849, 'eval_steps_per_second': 15.49, 'epoch': 50.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 10% --------------------\n" ] }, @@ -3596,17 +3504,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 1695, val = 2537, test = 5165\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -3633,7 +3540,7 @@ "
\n", " \n", " \n", - " [1325/2650 04:31 < 04:32, 4.86 it/s, Epoch 25/50]\n", + " [1325/2650 04:34 < 04:34, 4.82 it/s, Epoch 25/50]\n", "
\n", "
\n", " \n", @@ -3887,7 +3794,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 5.291070413589478 seconds, Total Train Time = 272.9690537452698\n", + "[TrackingCallback] Mean Epoch Time = 5.3488623237609865 seconds, Total Train Time = 275.28469133377075\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -3929,21 +3836,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.13970844447612762, 'eval_runtime': 10.0501, 'eval_samples_per_second': 513.925, 'eval_steps_per_second': 16.119, 'epoch': 25.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: traffic, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.13970844447612762, 'eval_runtime': 10.2592, 'eval_samples_per_second': 503.453, 'eval_steps_per_second': 15.791, 'epoch': 25.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.363 0.363 0.364\n", "1 etth2 0.286 0.284 0.284\n", @@ -3954,22 +3848,21 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = traffic, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 11673, val = 1661, test = 3413\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -3986,7 +3879,7 @@ "
\n", " \n", " \n", - " [427/427 00:23]\n", + " [427/427 00:22]\n", "
\n", " " ], @@ -4001,21 +3894,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5094045996665955, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 23.857, 'eval_samples_per_second': 143.061, 'eval_steps_per_second': 17.898}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: traffic, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.5094045996665955, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 22.6418, 'eval_samples_per_second': 150.739, 'eval_steps_per_second': 18.859}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -4023,17 +3903,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 493, val = 1661, test = 3413\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -4060,7 +3939,7 @@ "
\n", " \n", " \n", - " [3100/3100 11:42, Epoch 50/50]\n", + " [3100/3100 11:41, Epoch 50/50]\n", "
\n", "
\n", " \n", @@ -4543,7 +4422,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 4.735059623718262 seconds, Total Train Time = 703.8364264965057\n", + "[TrackingCallback] Mean Epoch Time = 4.7049186372756955 seconds, Total Train Time = 702.5082604885101\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -4585,21 +4464,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3968665301799774, 'eval_runtime': 17.7251, 'eval_samples_per_second': 192.552, 'eval_steps_per_second': 24.09, 'epoch': 50.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Dataset name: traffic, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.3968665301799774, 'eval_runtime': 17.5608, 'eval_samples_per_second': 194.354, 'eval_steps_per_second': 24.316, 'epoch': 50.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 10% --------------------\n" ] }, @@ -4607,17 +4473,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:186: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", + "/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/dataset.py:199: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", " data_df[\"group\"] = 0 # create a artificial group\n", - "INFO:p-3134008:t-23177103872768:data_handling.py:load_dataset:Data lengths: train = 1081, val = 1661, test = 3413\n", + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", - "WARNING:p-3134008:t-23177103872768:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3134008:t-23177103872768:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -4644,7 +4509,7 @@ "
\n", " \n", " \n", - " [4080/6800 09:01 < 06:01, 7.53 it/s, Epoch 30/50]\n", + " [4080/6800 08:59 < 05:59, 7.56 it/s, Epoch 30/50]\n", "
\n", "
\n", " \n", @@ -4943,7 +4808,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 8.79585785071055 seconds, Total Train Time = 542.8885765075684\n", + "[TrackingCallback] Mean Epoch Time = 8.70900468826294 seconds, Total Train Time = 540.5900394916534\n", "++++++++++++++++++++ Test MSE after few-shot 10% fine-tuning ++++++++++++++++++++\n" ] }, @@ -4985,7 +4850,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4039205312728882, 'eval_runtime': 17.9277, 'eval_samples_per_second': 190.376, 'eval_steps_per_second': 23.818, 'epoch': 30.0}\n", + "{'eval_loss': 0.4039205312728882, 'eval_runtime': 17.7518, 'eval_samples_per_second': 192.262, 'eval_steps_per_second': 24.054, 'epoch': 30.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse fs10_mse\n", "0 etth1 0.363 0.363 0.364\n", @@ -5019,7 +4884,7 @@ " print(\n", " f\"Running zero-shot/few-shot for TTM-{context_length} on dataset = {DATASET}, forecast_len = {forecast_length}\"\n", " )\n", - " print(f\"Model will be loaded from {hf_model_path}/{hf_model_branch}\")\n", + " print(f\"Model will be loaded from {hf_model_path}\")\n", " SUBDIR = f\"{OUT_DIR}/{DATASET}\"\n", "\n", " # Set batch size\n", @@ -5037,7 +4902,7 @@ " ##### Use the pretrained model in zero-shot forecasting #####\n", " #############################################################\n", " # Load model\n", - " zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(hf_model_path, revision=hf_model_branch)\n", + " zeroshot_model = get_model(hf_model_path, context_length=context_length, prediction_length=forecast_length)\n", "\n", " # zeroshot_trainer\n", " zeroshot_trainer = Trainer(\n", @@ -5087,12 +4952,12 @@ "\n", " # change head dropout to 0.7 for ett datasets\n", " if \"ett\" in DATASET:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch, head_dropout=0.7\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length, head_dropout=0.7\n", " )\n", " else:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length\n", " )\n", "\n", " if freeze_backbone:\n", @@ -5206,7 +5071,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -5250,11 +5115,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5264,11 +5129,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5278,11 +5143,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5292,11 +5157,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5306,11 +5171,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5320,11 +5185,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5334,11 +5199,11 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5348,22 +5213,22 @@ ], "text/plain": [ " dataset zs_mse fs5_mse fs10_mse zs_eval_time fs5_mean_epoch_time \\\n", - "0 etth1 0.363 0.363 0.364 1.739 0.877 \n", - "1 etth2 0.286 0.284 0.284 0.880 0.808 \n", - "2 ettm1 0.415 0.364 0.371 2.488 1.074 \n", - "3 ettm2 0.186 0.175 0.176 2.607 1.303 \n", - "4 weather 0.152 0.150 0.149 3.376 1.393 \n", - "5 electricity 0.170 0.143 0.140 14.071 3.216 \n", - "6 traffic 0.509 0.397 0.404 23.857 4.735 \n", + "0 etth1 0.363 0.363 0.364 0.974 0.834 \n", + "1 etth2 0.286 0.284 0.284 0.610 0.745 \n", + "2 ettm1 0.415 0.364 0.371 2.402 1.089 \n", + "3 ettm2 0.186 0.175 0.176 2.684 1.113 \n", + "4 weather 0.152 0.150 0.149 3.236 1.279 \n", + "5 electricity 0.170 0.143 0.140 13.162 3.164 \n", + "6 traffic 0.509 0.397 0.404 22.642 4.705 \n", "\n", " fs5_total_train_time fs10_mean_epoch_time fs10_total_train_time \\\n", - "0 20.826 0.856 32.005 \n", - "1 22.086 0.870 20.991 \n", - "2 58.488 1.618 65.983 \n", - "3 43.605 1.621 43.782 \n", - "4 41.779 1.935 44.507 \n", - "5 450.935 5.291 272.969 \n", - "6 703.836 8.796 542.889 \n", + "0 20.229 0.851 32.145 \n", + "1 21.271 0.865 20.801 \n", + "2 58.440 1.432 59.228 \n", + "3 38.805 1.436 39.668 \n", + "4 37.412 1.754 40.553 \n", + "5 447.499 5.349 275.285 \n", + "6 702.508 8.709 540.590 \n", "\n", " fs5_best_val_metric fs10_best_val_metric \n", "0 0.656 0.655 \n", @@ -5375,7 +5240,7 @@ "6 0.328 0.332 " ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_1024_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_1024_96.ipynb similarity index 75% rename from notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_1024_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_1024_96.ipynb index 99b4f495..44c31b59 100644 --- a/notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_1024_96.ipynb +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_1024_96.ipynb @@ -9,17 +9,13 @@ "\n", "**Using TTM-1024-96 model.**\n", "\n", - "Pre-trained TTM models will be fetched from the [Hugging Face TTM Model Repository](ibm-granite/granite-timeseries-ttm-r2).\n", + "Pre-trained TTM models will be fetched from the [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "1. TTM-R1 pre-trained models can be found here: [TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024_96_v1\"`\n", - "2. TTM-R2 pre-trained models can be found here: [TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024-96-r2\"`\n", - " 3. For 1536-96 model set `TTM_MODEL_REVISION=\"1536-96-r2\"`\n", + "For details, visit the [Hugging Face TTM Model Repository](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "Details about the revisions (R1 and R2) can be found [here](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)." + "1. IBM Granite TTM-R1 pre-trained models can be found here: [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", + "2. IBM Granite TTM-R2 pre-trained models can be found here: [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", + "3. Research-use (non-commercial use only) TTM-R2 pre-trained models can be found here: [Research-Use-TTM-R2](https://huggingface.co/ibm/ttm-research-r2)" ] }, { @@ -39,10 +35,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-10 07:15:39.622201: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2024-10-10 07:15:39.658868: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-05 09:36:47.873779: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-11-05 09:36:58.939912: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-10 07:15:40.389511: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-11-05 09:37:10.025472: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", " warn(f\"Failed to load image Python extension: {e}\")\n" ] @@ -59,7 +55,8 @@ "from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed\n", "from transformers.integrations import INTEGRATION_TO_CALLBACK\n", "\n", - "from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public import TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public.toolkit.get_model import get_model\n", "from tsfm_public.toolkit.lr_finder import optimal_lr_finder\n", "from tsfm_public.toolkit.visualization import plot_predictions\n", "\n", @@ -135,7 +132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Get model path" + "## Set model path" ] }, { @@ -144,9 +141,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Please provide the branch name properly based on context_len and forecast_len\n", - "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r2\"\n", - "hf_model_branch = f\"{context_length}-{forecast_length}-r2\"" + "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r2\"" ] }, { @@ -166,8 +161,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: etth1, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 7521, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -177,15 +171,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = etth1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1024-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -217,7 +210,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.35859495401382446, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 1.7201, 'eval_samples_per_second': 1619.12, 'eval_steps_per_second': 25.58}\n", + "{'eval_loss': 0.35859495401382446, 'eval_model_preparation_time': 0.0028, 'eval_runtime': 8.9505, 'eval_samples_per_second': 311.157, 'eval_steps_per_second': 4.916}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -225,46 +218,29 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: etth1, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-------------------- Running few-shot 5% --------------------\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 285, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 2964960\n", - "Number of params after freezing the backbone 955424\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 955424\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.000298364724028334\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.000298364724028334\n", "Using learning rate = 0.000298364724028334\n" ] @@ -276,7 +252,7 @@ "
\n", " \n", " \n", - " [ 55/250 00:25 < 01:33, 2.08 it/s, Epoch 11/50]\n", + " [ 55/250 00:17 < 01:03, 3.05 it/s, Epoch 11/50]\n", "
\n", "
0.3630.3630.3641.7390.87720.8260.85632.0050.9740.83420.2290.85132.1450.6560.655
0.2860.2840.2840.8800.80822.0860.87020.9910.6100.74521.2710.86520.8010.2080.208
0.4150.3640.3712.4881.07458.4881.61865.9832.4021.08958.4401.43259.2280.4530.428
0.1860.1750.1762.6071.30343.6051.62143.7822.6841.11338.8051.43639.6680.1290.129
0.1520.1500.1493.3761.39341.7791.93544.5073.2361.27937.4121.75440.5530.4230.422
0.1700.1430.14014.0713.216450.9355.291272.96913.1623.164447.4995.349275.2850.1160.115
0.5090.3970.40423.8574.735703.8368.796542.88922.6424.705702.5088.709540.5900.3280.332
\n", " \n", @@ -356,7 +332,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.0783685120669277 seconds, Total Train Time = 27.66278910636902\n", + "[TrackingCallback] Mean Epoch Time = 0.9498170722614635 seconds, Total Train Time = 21.376437425613403\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -382,7 +358,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.35856103897094727, 'eval_runtime': 1.2591, 'eval_samples_per_second': 2211.927, 'eval_steps_per_second': 34.946, 'epoch': 11.0}\n", + "{'eval_loss': 0.35856103897094727, 'eval_runtime': 0.8932, 'eval_samples_per_second': 3117.893, 'eval_steps_per_second': 49.259, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -390,8 +366,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: etth2, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 7521, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -403,15 +378,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = etth2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1024-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -443,7 +417,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.269417405128479, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 0.7466, 'eval_samples_per_second': 3730.016, 'eval_steps_per_second': 58.93}\n", + "{'eval_loss': 0.269417405128479, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 0.8529, 'eval_samples_per_second': 3265.37, 'eval_steps_per_second': 51.589}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -451,8 +425,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: etth2, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 285, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -461,24 +434,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 2964960\n", - "Number of params after freezing the backbone 955424\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 955424\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.000298364724028334\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.000298364724028334\n", "Using learning rate = 0.000298364724028334\n" ] @@ -490,7 +459,7 @@ "
\n", " \n", " \n", - " [ 55/250 00:25 < 01:35, 2.04 it/s, Epoch 11/50]\n", + " [ 55/250 00:17 < 01:05, 2.98 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -570,7 +539,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.018961863084273 seconds, Total Train Time = 26.78283667564392\n", + "[TrackingCallback] Mean Epoch Time = 0.6733335581692782 seconds, Total Train Time = 18.43419575691223\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -596,37 +565,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.26942315697669983, 'eval_runtime': 1.3916, 'eval_samples_per_second': 2001.34, 'eval_steps_per_second': 31.619, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 33441, val = 11425, test = 11425\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.26942315697669983, 'eval_runtime': 0.9311, 'eval_samples_per_second': 2991.213, 'eval_steps_per_second': 47.258, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.359 0.359\n", "1 etth2 0.269 0.269\n", "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = ettm1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1024-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -643,7 +598,7 @@ "
\n", " \n", " \n", - " [179/179 00:02]\n", + " [179/179 00:03]\n", "
\n", " " ], @@ -658,7 +613,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3369019627571106, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 3.0592, 'eval_samples_per_second': 3734.593, 'eval_steps_per_second': 58.511}\n", + "{'eval_loss': 0.3369019627571106, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 3.5784, 'eval_samples_per_second': 3192.741, 'eval_steps_per_second': 50.022}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -666,8 +621,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 1581, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -676,24 +630,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 2964960\n", - "Number of params after freezing the backbone 955424\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 955424\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.0005214008287999684\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.0005214008287999684\n", "Using learning rate = 0.0005214008287999684\n" ] @@ -705,7 +655,7 @@ "
\n", " \n", " \n", - " [ 275/1250 00:38 < 02:18, 7.02 it/s, Epoch 11/50]\n", + " [ 275/1250 00:31 < 01:52, 8.64 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -785,7 +735,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.3170426542108709 seconds, Total Train Time = 39.83929896354675\n", + "[TrackingCallback] Mean Epoch Time = 0.9686962907964533 seconds, Total Train Time = 32.147533893585205\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -811,22 +761,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.33640581369400024, 'eval_runtime': 2.3821, 'eval_samples_per_second': 4796.209, 'eval_steps_per_second': 75.144, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 33441, val = 11425, test = 11425\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.33640581369400024, 'eval_runtime': 1.9157, 'eval_samples_per_second': 5963.974, 'eval_steps_per_second': 93.44, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.359 0.359\n", "1 etth2 0.269 0.269\n", @@ -834,15 +770,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = ettm2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1024-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -859,7 +795,7 @@ "
\n", " \n", " \n", - " [179/179 00:02]\n", + " [179/179 00:03]\n", "
\n", " " ], @@ -874,7 +810,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1764754354953766, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 3.0247, 'eval_samples_per_second': 3777.253, 'eval_steps_per_second': 59.18}\n", + "{'eval_loss': 0.1764754354953766, 'eval_model_preparation_time': 0.0025, 'eval_runtime': 3.4544, 'eval_samples_per_second': 3307.416, 'eval_steps_per_second': 51.819}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -882,8 +818,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 1581, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -892,24 +827,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 2964960\n", - "Number of params after freezing the backbone 955424\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 955424\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.000298364724028334\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.000298364724028334\n", "Using learning rate = 0.000298364724028334\n" ] @@ -921,7 +852,7 @@ "
\n", " \n", " \n", - " [ 275/1250 00:39 < 02:19, 6.99 it/s, Epoch 11/50]\n", + " [ 275/1250 00:32 < 01:55, 8.47 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1001,7 +932,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.3009986010464756 seconds, Total Train Time = 40.168370962142944\n", + "[TrackingCallback] Mean Epoch Time = 1.0002062320709229 seconds, Total Train Time = 32.82567358016968\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1027,21 +958,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17645052075386047, 'eval_runtime': 2.2382, 'eval_samples_per_second': 5104.458, 'eval_steps_per_second': 79.974, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: weather, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.17645052075386047, 'eval_runtime': 1.9039, 'eval_samples_per_second': 6000.805, 'eval_steps_per_second': 94.017, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.359 0.359\n", "1 etth2 0.269 0.269\n", @@ -1050,16 +968,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = weather, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1024-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 35768, val = 5175, test = 10444\n", - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1076,7 +993,7 @@ "
\n", " \n", " \n", - " [164/164 00:05]\n", + " [164/164 00:06]\n", "
\n", " " ], @@ -1091,7 +1008,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15011762082576752, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 5.1462, 'eval_samples_per_second': 2029.447, 'eval_steps_per_second': 31.868}\n", + "{'eval_loss': 0.15011762082576752, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 6.3602, 'eval_samples_per_second': 1642.084, 'eval_steps_per_second': 25.785}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1099,8 +1016,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: weather, context length: 1024, prediction length 96\n", - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 1698, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1109,24 +1025,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 2964960\n", - "Number of params after freezing the backbone 955424\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 955424\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00035938136638046257\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00035938136638046257\n", "Using learning rate = 0.00035938136638046257\n" ] @@ -1138,7 +1050,7 @@ "
\n", " \n", " \n", - " [ 297/1350 00:44 < 02:38, 6.64 it/s, Epoch 11/50]\n", + " [ 297/1350 00:37 < 02:15, 7.80 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1218,7 +1130,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.7708212462338535 seconds, Total Train Time = 45.57381844520569\n", + "[TrackingCallback] Mean Epoch Time = 1.4071654189716687 seconds, Total Train Time = 38.4444363117218\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1244,21 +1156,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1500033736228943, 'eval_runtime': 3.8799, 'eval_samples_per_second': 2691.848, 'eval_steps_per_second': 42.27, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: electricity, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.1500033736228943, 'eval_runtime': 3.4099, 'eval_samples_per_second': 3062.849, 'eval_steps_per_second': 48.095, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.359 0.359\n", "1 etth2 0.269 0.269\n", @@ -1268,16 +1167,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = electricity, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1024-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 17293, val = 2537, test = 5165\n", - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1294,7 +1192,7 @@ "
\n", " \n", " \n", - " [162/162 00:24]\n", + " [162/162 00:26]\n", "
\n", " " ], @@ -1309,21 +1207,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15828542411327362, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 25.1925, 'eval_samples_per_second': 205.021, 'eval_steps_per_second': 6.43}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: electricity, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.15828542411327362, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 26.7977, 'eval_samples_per_second': 192.74, 'eval_steps_per_second': 6.045}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -1331,7 +1216,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 774, val = 2537, test = 5165\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1339,24 +1224,20 @@ "output_type": "stream", "text": [ "Number of params before freezing backbone 2964960\n", - "Number of params after freezing the backbone 955424\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 955424\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 8.111308307896872e-05\n", "OPTIMAL SUGGESTED LEARNING RATE = 8.111308307896872e-05\n", "Using learning rate = 8.111308307896872e-05\n" ] @@ -1368,7 +1249,7 @@ "
\n", " \n", " \n", - " [1250/1250 12:33, Epoch 50/50]\n", + " [1250/1250 12:26, Epoch 50/50]\n", "
\n", "
\n", " \n", @@ -1643,7 +1524,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 5.0357036304473874 seconds, Total Train Time = 755.3726332187653\n", + "[TrackingCallback] Mean Epoch Time = 4.880673928260803 seconds, Total Train Time = 747.7492995262146\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1654,7 +1535,7 @@ "
\n", " \n", " \n", - " [162/162 00:16]\n", + " [162/162 00:18]\n", "
\n", " " ], @@ -1669,21 +1550,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.14718736708164215, 'eval_runtime': 18.5622, 'eval_samples_per_second': 278.254, 'eval_steps_per_second': 8.727, 'epoch': 50.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: traffic, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.14718736708164215, 'eval_runtime': 19.2227, 'eval_samples_per_second': 268.692, 'eval_steps_per_second': 8.428, 'epoch': 50.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.359 0.359\n", "1 etth2 0.269 0.269\n", @@ -1694,16 +1562,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1024 on dataset = traffic, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1024-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 11161, val = 1661, test = 3413\n", - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1720,7 +1587,7 @@ "
\n", " \n", " \n", - " [427/427 00:43]\n", + " [427/427 00:46]\n", "
\n", " " ], @@ -1735,21 +1602,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4737617075443268, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 43.7457, 'eval_samples_per_second': 78.019, 'eval_steps_per_second': 9.761}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Dataset name: traffic, context length: 1024, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.4737617075443268, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 46.8323, 'eval_samples_per_second': 72.877, 'eval_steps_per_second': 9.118}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -1757,7 +1611,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048561:t-22509185540864:data_handling.py:load_dataset:Data lengths: train = 467, val = 1661, test = 3413\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1765,24 +1619,20 @@ "output_type": "stream", "text": [ "Number of params before freezing backbone 2964960\n", - "Number of params after freezing the backbone 955424\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 955424\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048561:t-22509185540864:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048561:t-22509185540864:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00020565123083486514\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00020565123083486514\n", "Using learning rate = 0.00020565123083486514\n" ] @@ -1794,7 +1644,7 @@ "
\n", " \n", " \n", - " [1652/2950 11:10 < 08:47, 2.46 it/s, Epoch 28/50]\n", + " [1652/2950 11:14 < 08:50, 2.45 it/s, Epoch 28/50]\n", "
\n", "
\n", " \n", @@ -1959,7 +1809,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 7.516440740653446 seconds, Total Train Time = 672.2011640071869\n", + "[TrackingCallback] Mean Epoch Time = 7.419389545917511 seconds, Total Train Time = 675.6236307621002\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1970,7 +1820,7 @@ "
\n", " \n", " \n", - " [427/427 00:30]\n", + " [427/427 00:31]\n", "
\n", " " ], @@ -1985,7 +1835,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4179241955280304, 'eval_runtime': 31.9674, 'eval_samples_per_second': 106.765, 'eval_steps_per_second': 13.357, 'epoch': 28.0}\n", + "{'eval_loss': 0.4179241955280304, 'eval_runtime': 32.9888, 'eval_samples_per_second': 103.459, 'eval_steps_per_second': 12.944, 'epoch': 28.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.359 0.359\n", @@ -2015,7 +1865,7 @@ " print(\n", " f\"Running zero-shot/few-shot for TTM-{context_length} on dataset = {DATASET}, forecast_len = {forecast_length}\"\n", " )\n", - " print(f\"Model will be loaded from {hf_model_path}/{hf_model_branch}\")\n", + " print(f\"Model will be loaded from {hf_model_path}\")\n", " SUBDIR = f\"{OUT_DIR}/{DATASET}\"\n", "\n", " # Set batch size\n", @@ -2033,7 +1883,7 @@ " ##### Use the pretrained model in zero-shot forecasting #####\n", " #############################################################\n", " # Load model\n", - " zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(hf_model_path, revision=hf_model_branch)\n", + " zeroshot_model = get_model(hf_model_path, context_length=context_length, prediction_length=forecast_length)\n", "\n", " # zeroshot_trainer\n", " zeroshot_trainer = Trainer(\n", @@ -2086,13 +1936,14 @@ " )\n", "\n", " # change head dropout to 0.7 for ett datasets\n", + " # change head dropout to 0.7 for ett datasets\n", " if \"ett\" in DATASET:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch, head_dropout=0.7\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length, head_dropout=0.7\n", " )\n", " else:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length\n", " )\n", "\n", " if freeze_backbone:\n", @@ -2254,9 +2105,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2264,9 +2115,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2274,9 +2125,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2284,9 +2135,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2294,9 +2145,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2304,9 +2155,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2314,9 +2165,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2325,22 +2176,22 @@ ], "text/plain": [ " dataset zs_mse fs5_mse zs_eval_time fs5_mean_epoch_time \\\n", - "0 etth1 0.359 0.359 1.720 1.078 \n", - "1 etth2 0.269 0.269 0.747 1.019 \n", - "2 ettm1 0.337 0.336 3.059 1.317 \n", - "3 ettm2 0.176 0.176 3.025 1.301 \n", - "4 weather 0.150 0.150 5.146 1.771 \n", - "5 electricity 0.158 0.147 25.192 5.036 \n", - "6 traffic 0.474 0.418 43.746 7.516 \n", + "0 etth1 0.359 0.359 8.950 0.950 \n", + "1 etth2 0.269 0.269 0.853 0.673 \n", + "2 ettm1 0.337 0.336 3.578 0.969 \n", + "3 ettm2 0.176 0.176 3.454 1.000 \n", + "4 weather 0.150 0.150 6.360 1.407 \n", + "5 electricity 0.158 0.147 26.798 4.881 \n", + "6 traffic 0.474 0.418 46.832 7.419 \n", "\n", " fs5_total_train_time fs5_best_val_metric \n", - "0 27.663 0.666 \n", - "1 26.783 0.239 \n", - "2 39.839 0.395 \n", - "3 40.168 0.122 \n", - "4 45.574 0.394 \n", - "5 755.373 0.117 \n", - "6 672.201 0.345 " + "0 21.376 0.666 \n", + "1 18.434 0.239 \n", + "2 32.148 0.395 \n", + "3 32.826 0.122 \n", + "4 38.444 0.394 \n", + "5 747.749 0.117 \n", + "6 675.624 0.345 " ] }, "execution_count": 6, diff --git a/notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_1536_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_1536_96.ipynb similarity index 74% rename from notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_1536_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_1536_96.ipynb index 19f40653..54748c60 100644 --- a/notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_1536_96.ipynb +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_1536_96.ipynb @@ -9,17 +9,13 @@ "\n", "**Using TTM-1536-96 model.**\n", "\n", - "Pre-trained TTM models will be fetched from the [Hugging Face TTM Model Repository](ibm-granite/granite-timeseries-ttm-r2).\n", + "Pre-trained TTM models will be fetched from the [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "1. TTM-R1 pre-trained models can be found here: [TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024_96_v1\"`\n", - "2. TTM-R2 pre-trained models can be found here: [TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024-96-r2\"`\n", - " 3. For 1536-96 model set `TTM_MODEL_REVISION=\"1536-96-r2\"`\n", + "For details, visit the [Hugging Face TTM Model Repository](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "Details about the revisions (R1 and R2) can be found [here](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)." + "1. IBM Granite TTM-R1 pre-trained models can be found here: [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", + "2. IBM Granite TTM-R2 pre-trained models can be found here: [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", + "3. Research-use (non-commercial use only) TTM-R2 pre-trained models can be found here: [Research-Use-TTM-R2](https://huggingface.co/ibm/ttm-research-r2)" ] }, { @@ -39,10 +35,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-10 07:15:38.441950: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2024-10-10 07:15:38.481580: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-05 09:42:15.453567: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-11-05 09:42:16.131037: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-10 07:15:39.205059: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-11-05 09:42:19.644826: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", " warn(f\"Failed to load image Python extension: {e}\")\n" ] @@ -59,7 +55,8 @@ "from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed\n", "from transformers.integrations import INTEGRATION_TO_CALLBACK\n", "\n", - "from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public import TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public.toolkit.get_model import get_model\n", "from tsfm_public.toolkit.lr_finder import optimal_lr_finder\n", "from tsfm_public.toolkit.visualization import plot_predictions\n", "\n", @@ -135,7 +132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Get model path" + "## Set model path" ] }, { @@ -144,9 +141,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Please provide the branch name properly based on context_len and forecast_len\n", - "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r2\"\n", - "hf_model_branch = f\"{context_length}-{forecast_length}-r2\"" + "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r2\"" ] }, { @@ -166,8 +161,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: etth1, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 7009, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -177,43 +171,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1536 on dataset = etth1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1536-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "87646c2e40c54efda572d0951f308a82", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "config.json: 0%| | 0.00/1.57k [00:00\n", " \n", " \n", - " [ 55/250 00:26 < 01:36, 2.02 it/s, Epoch 11/50]\n", + " [ 55/250 00:19 < 01:12, 2.68 it/s, Epoch 11/50]\n", " \n", "
etth10.3590.3591.7201.07827.6638.9500.95021.3760.666
etth20.2690.2690.7471.01926.7830.8530.67318.4340.239
ettm10.3370.3363.0591.31739.8393.5780.96932.1480.395
ettm20.1760.1763.0251.30140.1683.4541.00032.8260.122
weather0.1500.1505.1461.77145.5746.3601.40738.4440.394
electricity0.1580.14725.1925.036755.37326.7984.881747.7490.117
traffic0.4740.41843.7467.516672.20146.8327.419675.6240.345
\n", " \n", @@ -384,7 +332,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.0921787131916394 seconds, Total Train Time = 28.327817678451538\n", + "[TrackingCallback] Mean Epoch Time = 0.7860906557603315 seconds, Total Train Time = 21.260364055633545\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -410,7 +358,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3571341633796692, 'eval_runtime': 1.4299, 'eval_samples_per_second': 1947.631, 'eval_steps_per_second': 30.77, 'epoch': 11.0}\n", + "{'eval_loss': 0.3571341633796692, 'eval_runtime': 1.0557, 'eval_samples_per_second': 2638.079, 'eval_steps_per_second': 41.679, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -418,8 +366,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: etth2, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 7009, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -431,15 +378,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1536 on dataset = etth2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1536-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -471,7 +417,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.2743358612060547, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 0.9901, 'eval_samples_per_second': 2812.989, 'eval_steps_per_second': 44.442}\n", + "{'eval_loss': 0.2743358612060547, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 1.0095, 'eval_samples_per_second': 2758.711, 'eval_steps_per_second': 43.585}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -479,8 +425,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: etth2, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 260, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -489,24 +434,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 3081120\n", - "Number of params after freezing the backbone 1054560\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 1054560\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00020565123083486514\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00020565123083486514\n", "Using learning rate = 0.00020565123083486514\n" ] @@ -518,7 +459,7 @@ "
\n", " \n", " \n", - " [ 95/250 00:46 < 01:17, 2.00 it/s, Epoch 19/50]\n", + " [ 95/250 00:35 < 00:58, 2.64 it/s, Epoch 19/50]\n", "
\n", "
\n", " \n", @@ -638,7 +579,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.0002899671855725 seconds, Total Train Time = 47.401732206344604\n", + "[TrackingCallback] Mean Epoch Time = 0.7202145802347284 seconds, Total Train Time = 35.765501976013184\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -664,7 +605,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.27716049551963806, 'eval_runtime': 1.386, 'eval_samples_per_second': 2009.436, 'eval_steps_per_second': 31.747, 'epoch': 19.0}\n", + "{'eval_loss': 0.27716049551963806, 'eval_runtime': 1.0979, 'eval_samples_per_second': 2536.694, 'eval_steps_per_second': 40.077, 'epoch': 19.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -672,8 +613,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 32929, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -686,15 +626,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1536 on dataset = ettm1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1536-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -726,7 +665,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.32653480768203735, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 3.4627, 'eval_samples_per_second': 3299.436, 'eval_steps_per_second': 51.694}\n", + "{'eval_loss': 0.32653480768203735, 'eval_model_preparation_time': 0.002, 'eval_runtime': 3.4801, 'eval_samples_per_second': 3282.953, 'eval_steps_per_second': 51.435}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -734,8 +673,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 1556, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -744,24 +682,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 3081120\n", - "Number of params after freezing the backbone 1054560\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 1054560\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00043287612810830566\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00043287612810830566\n", "Using learning rate = 0.00043287612810830566\n" ] @@ -773,7 +707,7 @@ "
\n", " \n", " \n", - " [ 275/1250 00:44 < 02:37, 6.20 it/s, Epoch 11/50]\n", + " [ 275/1250 00:37 < 02:13, 7.29 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -853,7 +787,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.4104089736938477 seconds, Total Train Time = 44.97520208358765\n", + "[TrackingCallback] Mean Epoch Time = 1.092024196277965 seconds, Total Train Time = 38.078644037246704\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -879,7 +813,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3312471807003021, 'eval_runtime': 2.5794, 'eval_samples_per_second': 4429.24, 'eval_steps_per_second': 69.395, 'epoch': 11.0}\n", + "{'eval_loss': 0.3312471807003021, 'eval_runtime': 2.3232, 'eval_samples_per_second': 4917.742, 'eval_steps_per_second': 77.048, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -887,8 +821,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 32929, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -902,15 +835,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1536 on dataset = ettm2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1536-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -942,7 +874,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.16795998811721802, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 3.518, 'eval_samples_per_second': 3247.549, 'eval_steps_per_second': 50.881}\n", + "{'eval_loss': 0.16795998811721802, 'eval_model_preparation_time': 0.002, 'eval_runtime': 3.4881, 'eval_samples_per_second': 3275.398, 'eval_steps_per_second': 51.317}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -950,8 +882,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 1556, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -960,24 +891,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 3081120\n", - "Number of params after freezing the backbone 1054560\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 1054560\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00011768119524349978\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00011768119524349978\n", "Using learning rate = 0.00011768119524349978\n" ] @@ -989,7 +916,7 @@ "
\n", " \n", " \n", - " [ 275/1250 00:44 < 02:37, 6.18 it/s, Epoch 11/50]\n", + " [ 275/1250 00:38 < 02:15, 7.18 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1069,7 +996,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.3997448791157117 seconds, Total Train Time = 45.14118027687073\n", + "[TrackingCallback] Mean Epoch Time = 1.114632953297008 seconds, Total Train Time = 38.65457510948181\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1095,22 +1022,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.1680709272623062, 'eval_runtime': 2.6241, 'eval_samples_per_second': 4353.841, 'eval_steps_per_second': 68.213, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: weather, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 35256, val = 5175, test = 10444\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.1680709272623062, 'eval_runtime': 2.3584, 'eval_samples_per_second': 4844.357, 'eval_steps_per_second': 75.898, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.357 0.357\n", "1 etth2 0.274 0.277\n", @@ -1119,15 +1032,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1536 on dataset = weather, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1536-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1159,7 +1072,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.14976251125335693, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 6.5327, 'eval_samples_per_second': 1598.717, 'eval_steps_per_second': 25.104}\n", + "{'eval_loss': 0.14976251125335693, 'eval_model_preparation_time': 0.002, 'eval_runtime': 6.4797, 'eval_samples_per_second': 1611.808, 'eval_steps_per_second': 25.31}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1167,8 +1080,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: weather, context length: 1536, prediction length 96\n", - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 1672, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1177,24 +1089,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 3081120\n", - "Number of params after freezing the backbone 1054560\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 1054560\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00020565123083486514\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00020565123083486514\n", "Using learning rate = 0.00020565123083486514\n" ] @@ -1206,7 +1114,7 @@ "
\n", " \n", " \n", - " [ 297/1350 00:53 < 03:10, 5.53 it/s, Epoch 11/50]\n", + " [ 297/1350 00:48 < 02:51, 6.13 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1286,7 +1194,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.9716925404288552 seconds, Total Train Time = 54.368701219558716\n", + "[TrackingCallback] Mean Epoch Time = 1.7186961824243718 seconds, Total Train Time = 48.810704946517944\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1312,21 +1220,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.14924383163452148, 'eval_runtime': 4.6955, 'eval_samples_per_second': 2224.257, 'eval_steps_per_second': 34.927, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: electricity, context length: 1536, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.14924383163452148, 'eval_runtime': 4.4687, 'eval_samples_per_second': 2337.136, 'eval_steps_per_second': 36.7, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.357 0.357\n", "1 etth2 0.274 0.277\n", @@ -1336,16 +1231,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1536 on dataset = electricity, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1536-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 16781, val = 2537, test = 5165\n", - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1362,7 +1256,7 @@ "
\n", " \n", " \n", - " [162/162 00:34]\n", + " [162/162 00:33]\n", "
\n", " " ], @@ -1377,21 +1271,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15529614686965942, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 34.5318, 'eval_samples_per_second': 149.572, 'eval_steps_per_second': 4.691}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: electricity, context length: 1536, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.15529614686965942, 'eval_model_preparation_time': 0.002, 'eval_runtime': 33.5263, 'eval_samples_per_second': 154.058, 'eval_steps_per_second': 4.832}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -1399,7 +1280,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 748, val = 2537, test = 5165\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1407,24 +1288,20 @@ "output_type": "stream", "text": [ "Number of params before freezing backbone 3081120\n", - "Number of params after freezing the backbone 1054560\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 1054560\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00020565123083486514\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00020565123083486514\n", "Using learning rate = 0.00020565123083486514\n" ] @@ -1436,7 +1313,7 @@ "
\n", " \n", " \n", - " [1104/1200 16:02 < 01:23, 1.15 it/s, Epoch 46/50]\n", + " [1104/1200 15:21 < 01:20, 1.20 it/s, Epoch 46/50]\n", "
\n", "
\n", " \n", @@ -1691,7 +1568,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 6.774247827737228 seconds, Total Train Time = 964.9241693019867\n", + "[TrackingCallback] Mean Epoch Time = 6.346862710040549 seconds, Total Train Time = 923.6657681465149\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1702,7 +1579,7 @@ "
\n", " \n", " \n", - " [162/162 00:25]\n", + " [162/162 00:24]\n", "
\n", " " ], @@ -1717,21 +1594,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.13803862035274506, 'eval_runtime': 26.8199, 'eval_samples_per_second': 192.581, 'eval_steps_per_second': 6.04, 'epoch': 46.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: traffic, context length: 1536, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.13803862035274506, 'eval_runtime': 26.4286, 'eval_samples_per_second': 195.432, 'eval_steps_per_second': 6.13, 'epoch': 46.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.357 0.357\n", "1 etth2 0.274 0.277\n", @@ -1742,16 +1606,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-1536 on dataset = traffic, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/1536-96-r2\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 10649, val = 1661, test = 3413\n", - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1768,7 +1631,7 @@ "
\n", " \n", " \n", - " [427/427 01:02]\n", + " [427/427 00:58]\n", "
\n", " " ], @@ -1783,21 +1646,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.4634234607219696, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 62.6042, 'eval_samples_per_second': 54.517, 'eval_steps_per_second': 6.821}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Dataset name: traffic, context length: 1536, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.4634234607219696, 'eval_model_preparation_time': 0.002, 'eval_runtime': 58.6051, 'eval_samples_per_second': 58.237, 'eval_steps_per_second': 7.286}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -1805,7 +1655,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048562:t-22362052518656:data_handling.py:load_dataset:Data lengths: train = 442, val = 1661, test = 3413\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1813,34 +1663,24 @@ "output_type": "stream", "text": [ "Number of params before freezing backbone 3081120\n", - "Number of params after freezing the backbone 1054560\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 1054560\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048562:t-22362052518656:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 5.590810182512223e-05\n", "OPTIMAL SUGGESTED LEARNING RATE = 5.590810182512223e-05\n", "Using learning rate = 5.590810182512223e-05\n" ] }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048562:t-22362052518656:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" - ] - }, { "data": { "text/html": [ @@ -1848,7 +1688,7 @@ "
\n", " \n", " \n", - " [ 616/2800 06:03 < 21:32, 1.69 it/s, Epoch 11/50]\n", + " [ 616/2800 05:51 < 20:48, 1.75 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1928,7 +1768,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 9.786083113063466 seconds, Total Train Time = 365.34510469436646\n", + "[TrackingCallback] Mean Epoch Time = 9.234334555539219 seconds, Total Train Time = 352.2789981365204\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1954,7 +1794,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.46613699197769165, 'eval_runtime': 46.0615, 'eval_samples_per_second': 74.097, 'eval_steps_per_second': 9.27, 'epoch': 11.0}\n", + "{'eval_loss': 0.46613699197769165, 'eval_runtime': 45.3952, 'eval_samples_per_second': 75.184, 'eval_steps_per_second': 9.406, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.357 0.357\n", @@ -1984,7 +1824,7 @@ " print(\n", " f\"Running zero-shot/few-shot for TTM-{context_length} on dataset = {DATASET}, forecast_len = {forecast_length}\"\n", " )\n", - " print(f\"Model will be loaded from {hf_model_path}/{hf_model_branch}\")\n", + " print(f\"Model will be loaded from {hf_model_path}\")\n", " SUBDIR = f\"{OUT_DIR}/{DATASET}\"\n", "\n", " # Set batch size\n", @@ -2002,7 +1842,7 @@ " ##### Use the pretrained model in zero-shot forecasting #####\n", " #############################################################\n", " # Load model\n", - " zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(hf_model_path, revision=hf_model_branch)\n", + " zeroshot_model = get_model(hf_model_path, context_length=context_length, prediction_length=forecast_length)\n", "\n", " # zeroshot_trainer\n", " zeroshot_trainer = Trainer(\n", @@ -2055,13 +1895,14 @@ " )\n", "\n", " # change head dropout to 0.7 for ett datasets\n", + " # change head dropout to 0.7 for ett datasets\n", " if \"ett\" in DATASET:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch, head_dropout=0.7\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length, head_dropout=0.7\n", " )\n", " else:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length\n", " )\n", "\n", " if freeze_backbone:\n", @@ -2223,9 +2064,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2233,9 +2074,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2243,9 +2084,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2253,9 +2094,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2263,9 +2104,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2273,9 +2114,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2283,9 +2124,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2294,22 +2135,22 @@ ], "text/plain": [ " dataset zs_mse fs5_mse zs_eval_time fs5_mean_epoch_time \\\n", - "0 etth1 0.357 0.357 1.996 1.092 \n", - "1 etth2 0.274 0.277 0.990 1.000 \n", - "2 ettm1 0.327 0.331 3.463 1.410 \n", - "3 ettm2 0.168 0.168 3.518 1.400 \n", - "4 weather 0.150 0.149 6.533 1.972 \n", - "5 electricity 0.155 0.138 34.532 6.774 \n", - "6 traffic 0.463 0.466 62.604 9.786 \n", + "0 etth1 0.357 0.357 1.802 0.786 \n", + "1 etth2 0.274 0.277 1.010 0.720 \n", + "2 ettm1 0.327 0.331 3.480 1.092 \n", + "3 ettm2 0.168 0.168 3.488 1.115 \n", + "4 weather 0.150 0.149 6.480 1.719 \n", + "5 electricity 0.155 0.138 33.526 6.347 \n", + "6 traffic 0.463 0.466 58.605 9.234 \n", "\n", " fs5_total_train_time fs5_best_val_metric \n", - "0 28.328 0.655 \n", - "1 47.402 0.228 \n", - "2 44.975 0.401 \n", - "3 45.141 0.123 \n", - "4 54.369 0.394 \n", - "5 964.924 0.113 \n", - "6 365.345 0.391 " + "0 21.260 0.655 \n", + "1 35.766 0.228 \n", + "2 38.079 0.401 \n", + "3 38.655 0.123 \n", + "4 48.811 0.394 \n", + "5 923.666 0.113 \n", + "6 352.279 0.391 " ] }, "execution_count": 6, diff --git a/notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_512_96.ipynb b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_512_96.ipynb similarity index 77% rename from notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_512_96.ipynb rename to notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_512_96.ipynb index 6b2217bd..dff3a8dc 100644 --- a/notebooks/hfdemo/tinytimemixer/ttm-r2_benchmarking_512_96.ipynb +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/sample_notebooks/ttm-r2_benchmarking_512_96.ipynb @@ -9,17 +9,13 @@ "\n", "**Using TTM-512-96 model.**\n", "\n", - "Pre-trained TTM models will be fetched from the [Hugging Face TTM Model Repository](ibm-granite/granite-timeseries-ttm-r2).\n", + "Pre-trained TTM models will be fetched from the [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "1. TTM-R1 pre-trained models can be found here: [TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024_96_v1\"`\n", - "2. TTM-R2 pre-trained models can be found here: [TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024-96-r2\"`\n", - " 3. For 1536-96 model set `TTM_MODEL_REVISION=\"1536-96-r2\"`\n", + "For details, visit the [Hugging Face TTM Model Repository](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "Details about the revisions (R1 and R2) can be found [here](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)." + "1. IBM Granite TTM-R1 pre-trained models can be found here: [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", + "2. IBM Granite TTM-R2 pre-trained models can be found here: [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", + "3. Research-use (non-commercial use only) TTM-R2 pre-trained models can be found here: [Research-Use-TTM-R2](https://huggingface.co/ibm/ttm-research-r2)" ] }, { @@ -39,10 +35,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-10 07:15:37.180528: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2024-10-10 07:15:37.217865: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-05 09:39:43.950830: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-11-05 09:39:44.021779: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-10 07:15:37.936129: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-11-05 09:39:47.880980: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", " warn(f\"Failed to load image Python extension: {e}\")\n" ] @@ -59,7 +55,8 @@ "from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed\n", "from transformers.integrations import INTEGRATION_TO_CALLBACK\n", "\n", - "from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public import TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public.toolkit.get_model import get_model\n", "from tsfm_public.toolkit.lr_finder import optimal_lr_finder\n", "from tsfm_public.toolkit.visualization import plot_predictions\n", "\n", @@ -135,7 +132,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Get model path" + "## Set model path" ] }, { @@ -144,9 +141,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Please provide the branch name properly based on context_len and forecast_len\n", - "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r2\"\n", - "hf_model_branch = \"main\"" + "hf_model_path = \"ibm-granite/granite-timeseries-ttm-r2\"" ] }, { @@ -166,8 +161,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 8033, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -177,15 +171,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = etth1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -217,7 +210,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3628121316432953, 'eval_model_preparation_time': 0.0025, 'eval_runtime': 1.5528, 'eval_samples_per_second': 1793.585, 'eval_steps_per_second': 28.337}\n", + "{'eval_loss': 0.3628121316432953, 'eval_model_preparation_time': 0.0028, 'eval_runtime': 1.5167, 'eval_samples_per_second': 1836.244, 'eval_steps_per_second': 29.011}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -225,48 +218,25 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-------------------- Running few-shot 5% --------------------\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 311, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "OPTIMAL SUGGESTED LEARNING RATE = 0.00017073526474706903\n", + "Using learning rate = 0.00017073526474706903\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LR Finder: Suggested learning rate = 0.00017073526474706903\n", - "OPTIMAL SUGGESTED LEARNING RATE = 0.00017073526474706903\n", - "Using learning rate = 0.00017073526474706903\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -276,7 +246,7 @@ "
\n", " \n", " \n", - " [105/250 00:46 < 01:04, 2.23 it/s, Epoch 21/50]\n", + " [105/250 00:38 < 00:54, 2.64 it/s, Epoch 21/50]\n", "
\n", "
etth10.3570.3571.9961.09228.3281.8020.78621.2600.655
etth20.2740.2770.9901.00047.4021.0100.72035.7660.228
ettm10.3270.3313.4631.41044.9753.4801.09238.0790.401
ettm20.1680.1683.5181.40045.1413.4881.11538.6550.123
weather0.1500.1496.5331.97254.3696.4801.71948.8110.394
electricity0.1550.13834.5326.774964.92433.5266.347923.6660.113
traffic0.4630.46662.6049.786365.34558.6059.234352.2790.391
\n", " \n", @@ -406,7 +376,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.0237201736086892 seconds, Total Train Time = 48.076265811920166\n", + "[TrackingCallback] Mean Epoch Time = 0.8227878184545607 seconds, Total Train Time = 40.316823959350586\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -432,7 +402,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.36197009682655334, 'eval_runtime': 1.2626, 'eval_samples_per_second': 2205.783, 'eval_steps_per_second': 34.849, 'epoch': 21.0}\n", + "{'eval_loss': 0.36197009682655334, 'eval_runtime': 0.9814, 'eval_samples_per_second': 2837.639, 'eval_steps_per_second': 44.832, 'epoch': 21.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -440,8 +410,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: etth2, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 8033, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -453,15 +422,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = etth2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -493,7 +461,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.2757423520088196, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 0.5981, 'eval_samples_per_second': 4656.095, 'eval_steps_per_second': 73.561}\n", + "{'eval_loss': 0.2757423520088196, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 0.6069, 'eval_samples_per_second': 4588.66, 'eval_steps_per_second': 72.496}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -501,8 +469,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: etth2, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 311, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -512,25 +479,15 @@ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "OPTIMAL SUGGESTED LEARNING RATE = 0.0002477076355991711\n", + "Using learning rate = 0.0002477076355991711\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LR Finder: Suggested learning rate = 0.0002477076355991711\n", - "OPTIMAL SUGGESTED LEARNING RATE = 0.0002477076355991711\n", - "Using learning rate = 0.0002477076355991711\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -540,7 +497,7 @@ "
\n", " \n", " \n", - " [ 90/250 00:39 < 01:12, 2.21 it/s, Epoch 18/50]\n", + " [ 90/250 00:33 < 01:01, 2.61 it/s, Epoch 18/50]\n", "
\n", "
\n", " \n", @@ -655,7 +612,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.9844856394661797 seconds, Total Train Time = 40.708521604537964\n", + "[TrackingCallback] Mean Epoch Time = 0.8318122625350952 seconds, Total Train Time = 34.44136691093445\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -681,7 +638,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.2727772295475006, 'eval_runtime': 1.2979, 'eval_samples_per_second': 2145.709, 'eval_steps_per_second': 33.9, 'epoch': 18.0}\n", + "{'eval_loss': 0.2727772295475006, 'eval_runtime': 1.3208, 'eval_samples_per_second': 2108.521, 'eval_steps_per_second': 33.312, 'epoch': 18.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -689,8 +646,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: ettm1, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 33953, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -703,15 +659,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = ettm1, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -743,7 +698,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3376680314540863, 'eval_model_preparation_time': 0.002, 'eval_runtime': 2.4601, 'eval_samples_per_second': 4644.037, 'eval_steps_per_second': 72.76}\n", + "{'eval_loss': 0.3376680314540863, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 2.4056, 'eval_samples_per_second': 4749.271, 'eval_steps_per_second': 74.409}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -751,8 +706,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: ettm1, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 1607, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -761,24 +715,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", - "Number of params after freezing the backbone 289696\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 289696\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00035938136638046257\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00035938136638046257\n", "Using learning rate = 0.00035938136638046257\n" ] @@ -790,7 +740,7 @@ "
\n", " \n", " \n", - " [ 286/1300 00:35 < 02:06, 8.04 it/s, Epoch 11/50]\n", + " [ 286/1300 00:32 < 01:56, 8.67 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -870,7 +820,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.2804227525537664 seconds, Total Train Time = 36.33181691169739\n", + "[TrackingCallback] Mean Epoch Time = 1.1163660179484973 seconds, Total Train Time = 33.472071409225464\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -896,7 +846,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3408427834510803, 'eval_runtime': 2.0453, 'eval_samples_per_second': 5585.903, 'eval_steps_per_second': 87.517, 'epoch': 11.0}\n", + "{'eval_loss': 0.3408427834510803, 'eval_runtime': 1.7904, 'eval_samples_per_second': 6381.402, 'eval_steps_per_second': 99.98, 'epoch': 11.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -904,8 +854,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: ettm2, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 33953, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -919,15 +868,14 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = ettm2, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -959,7 +907,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17649634182453156, 'eval_model_preparation_time': 0.0023, 'eval_runtime': 2.484, 'eval_samples_per_second': 4599.395, 'eval_steps_per_second': 72.061}\n", + "{'eval_loss': 0.17649634182453156, 'eval_model_preparation_time': 0.0018, 'eval_runtime': 2.771, 'eval_samples_per_second': 4123.041, 'eval_steps_per_second': 64.597}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -967,8 +915,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: ettm2, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 1607, val = 11425, test = 11425\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -978,25 +925,15 @@ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "OPTIMAL SUGGESTED LEARNING RATE = 0.00035938136638046257\n", + "Using learning rate = 0.00035938136638046257\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LR Finder: Suggested learning rate = 0.00035938136638046257\n", - "OPTIMAL SUGGESTED LEARNING RATE = 0.00035938136638046257\n", - "Using learning rate = 0.00035938136638046257\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1006,7 +943,7 @@ "
\n", " \n", " \n", - " [ 286/1300 00:35 < 02:07, 7.96 it/s, Epoch 11/50]\n", + " [ 286/1300 00:33 < 01:57, 8.59 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1086,7 +1023,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.302830847826871 seconds, Total Train Time = 36.47719216346741\n", + "[TrackingCallback] Mean Epoch Time = 1.1548236716877331 seconds, Total Train Time = 33.74325489997864\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1112,21 +1049,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.17622655630111694, 'eval_runtime': 2.0595, 'eval_samples_per_second': 5547.589, 'eval_steps_per_second': 86.916, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: weather, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.17622655630111694, 'eval_runtime': 1.8641, 'eval_samples_per_second': 6128.997, 'eval_steps_per_second': 96.025, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.363 0.362\n", "1 etth2 0.276 0.273\n", @@ -1135,16 +1059,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = weather, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 36280, val = 5175, test = 10444\n", - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1176,7 +1099,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15046171844005585, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 3.2796, 'eval_samples_per_second': 3184.582, 'eval_steps_per_second': 50.007}\n", + "{'eval_loss': 0.15046171844005585, 'eval_model_preparation_time': 0.002, 'eval_runtime': 3.6313, 'eval_samples_per_second': 2876.072, 'eval_steps_per_second': 45.162}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1184,8 +1107,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: weather, context length: 512, prediction length 96\n", - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 1723, val = 5175, test = 10444\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1194,24 +1116,20 @@ "text": [ "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", - "Number of params after freezing the backbone 289696\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 289696\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.0033516026509388406\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.0033516026509388406\n", "Using learning rate = 0.0033516026509388406\n" ] @@ -1223,7 +1141,7 @@ "
\n", " \n", " \n", - " [ 297/1350 00:33 < 02:01, 8.70 it/s, Epoch 11/50]\n", + " [ 297/1350 00:32 < 01:55, 9.09 it/s, Epoch 11/50]\n", "
\n", "
\n", " \n", @@ -1303,7 +1221,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 1.437580780549483 seconds, Total Train Time = 34.955886125564575\n", + "[TrackingCallback] Mean Epoch Time = 1.2926849235187878 seconds, Total Train Time = 33.269383668899536\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1329,21 +1247,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.15043412148952484, 'eval_runtime': 2.3537, 'eval_samples_per_second': 4437.221, 'eval_steps_per_second': 69.677, 'epoch': 11.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: electricity, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.15043412148952484, 'eval_runtime': 2.4543, 'eval_samples_per_second': 4255.351, 'eval_steps_per_second': 66.821, 'epoch': 11.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.363 0.362\n", "1 etth2 0.276 0.273\n", @@ -1353,16 +1258,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = electricity, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 17805, val = 2537, test = 5165\n", - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1394,21 +1298,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.18014171719551086, 'eval_model_preparation_time': 0.002, 'eval_runtime': 13.8645, 'eval_samples_per_second': 372.533, 'eval_steps_per_second': 11.684}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: electricity, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.18014171719551086, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 14.0186, 'eval_samples_per_second': 368.439, 'eval_steps_per_second': 11.556}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -1416,7 +1307,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 800, val = 2537, test = 5165\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1424,24 +1315,20 @@ "output_type": "stream", "text": [ "Number of params before freezing backbone 805280\n", - "Number of params after freezing the backbone 289696\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 289696\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.00017073526474706903\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.00017073526474706903\n", "Using learning rate = 0.00017073526474706903\n" ] @@ -1453,7 +1340,7 @@ "
\n", " \n", " \n", - " [1250/1250 07:32, Epoch 50/50]\n", + " [1250/1250 07:26, Epoch 50/50]\n", "
\n", "
\n", " \n", @@ -1728,7 +1615,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 3.2049708461761472 seconds, Total Train Time = 454.1625530719757\n", + "[TrackingCallback] Mean Epoch Time = 3.198044848442078 seconds, Total Train Time = 448.517240524292\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -1754,21 +1641,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.14508052170276642, 'eval_runtime': 10.5959, 'eval_samples_per_second': 487.454, 'eval_steps_per_second': 15.289, 'epoch': 50.0}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: traffic, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.14508052170276642, 'eval_runtime': 10.1986, 'eval_samples_per_second': 506.44, 'eval_steps_per_second': 15.884, 'epoch': 50.0}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.363 0.362\n", "1 etth2 0.276 0.273\n", @@ -1779,16 +1653,15 @@ "\n", "====================================================================================================\n", "Running zero-shot/few-shot for TTM-512 on dataset = traffic, forecast_len = 96\n", - "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2/main\n" + "Model will be loaded from ibm-granite/granite-timeseries-ttm-r2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 11673, val = 1661, test = 3413\n", - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -1820,21 +1693,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.5177494287490845, 'eval_model_preparation_time': 0.002, 'eval_runtime': 23.5325, 'eval_samples_per_second': 145.033, 'eval_steps_per_second': 18.145}\n", - "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Dataset name: traffic, context length: 512, prediction length 96\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "{'eval_loss': 0.5177494287490845, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 23.5839, 'eval_samples_per_second': 144.717, 'eval_steps_per_second': 18.106}\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "-------------------- Running few-shot 5% --------------------\n" ] }, @@ -1842,7 +1702,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-3048548:t-23085973639936:data_handling.py:load_dataset:Data lengths: train = 493, val = 1661, test = 3413\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -1850,24 +1710,20 @@ "output_type": "stream", "text": [ "Number of params before freezing backbone 805280\n", - "Number of params after freezing the backbone 289696\n", - "LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "LR Finder: Using GPU:0.\n" + "Number of params after freezing the backbone 289696\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-3048548:t-23085973639936:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n", - "INFO:p-3048548:t-23085973639936:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "LR Finder: Suggested learning rate = 0.0002477076355991711\n", "OPTIMAL SUGGESTED LEARNING RATE = 0.0002477076355991711\n", "Using learning rate = 0.0002477076355991711\n" ] @@ -1879,7 +1735,7 @@ "
\n", " \n", " \n", - " [3100/3100 11:48, Epoch 50/50]\n", + " [3100/3100 11:35, Epoch 50/50]\n", "
\n", "
\n", " \n", @@ -2154,7 +2010,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 4.759211735725403 seconds, Total Train Time = 709.794725894928\n", + "[TrackingCallback] Mean Epoch Time = 4.670924897193909 seconds, Total Train Time = 696.410046339035\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -2180,7 +2036,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.40992745757102966, 'eval_runtime': 17.7726, 'eval_samples_per_second': 192.037, 'eval_steps_per_second': 24.026, 'epoch': 50.0}\n", + "{'eval_loss': 0.40992745757102966, 'eval_runtime': 17.4543, 'eval_samples_per_second': 195.539, 'eval_steps_per_second': 24.464, 'epoch': 50.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " dataset zs_mse fs5_mse\n", "0 etth1 0.363 0.362\n", @@ -2210,7 +2066,7 @@ " print(\n", " f\"Running zero-shot/few-shot for TTM-{context_length} on dataset = {DATASET}, forecast_len = {forecast_length}\"\n", " )\n", - " print(f\"Model will be loaded from {hf_model_path}/{hf_model_branch}\")\n", + " print(f\"Model will be loaded from {hf_model_path}\")\n", " SUBDIR = f\"{OUT_DIR}/{DATASET}\"\n", "\n", " # Set batch size\n", @@ -2228,7 +2084,7 @@ " ##### Use the pretrained model in zero-shot forecasting #####\n", " #############################################################\n", " # Load model\n", - " zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(hf_model_path, revision=hf_model_branch)\n", + " zeroshot_model = get_model(hf_model_path, context_length=context_length, prediction_length=forecast_length)\n", "\n", " # zeroshot_trainer\n", " zeroshot_trainer = Trainer(\n", @@ -2281,13 +2137,14 @@ " )\n", "\n", " # change head dropout to 0.7 for ett datasets\n", + " # change head dropout to 0.7 for ett datasets\n", " if \"ett\" in DATASET:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch, head_dropout=0.7\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length, head_dropout=0.7\n", " )\n", " else:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " hf_model_path, revision=hf_model_branch\n", + " finetune_forecast_model = get_model(\n", + " hf_model_path, context_length=context_length, prediction_length=forecast_length\n", " )\n", "\n", " if freeze_backbone:\n", @@ -2449,9 +2306,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2459,9 +2316,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2469,9 +2326,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2479,9 +2336,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2489,9 +2346,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2499,9 +2356,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2509,9 +2366,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2520,22 +2377,22 @@ ], "text/plain": [ " dataset zs_mse fs5_mse zs_eval_time fs5_mean_epoch_time \\\n", - "0 etth1 0.363 0.362 1.553 1.024 \n", - "1 etth2 0.276 0.273 0.598 0.984 \n", - "2 ettm1 0.338 0.341 2.460 1.280 \n", - "3 ettm2 0.176 0.176 2.484 1.303 \n", - "4 weather 0.150 0.150 3.280 1.438 \n", - "5 electricity 0.180 0.145 13.864 3.205 \n", - "6 traffic 0.518 0.410 23.532 4.759 \n", + "0 etth1 0.363 0.362 1.517 0.823 \n", + "1 etth2 0.276 0.273 0.607 0.832 \n", + "2 ettm1 0.338 0.341 2.406 1.116 \n", + "3 ettm2 0.176 0.176 2.771 1.155 \n", + "4 weather 0.150 0.150 3.631 1.293 \n", + "5 electricity 0.180 0.145 14.019 3.198 \n", + "6 traffic 0.518 0.410 23.584 4.671 \n", "\n", " fs5_total_train_time fs5_best_val_metric \n", - "0 48.076 0.662 \n", - "1 40.709 0.217 \n", - "2 36.332 0.407 \n", - "3 36.477 0.123 \n", - "4 34.956 0.405 \n", - "5 454.163 0.120 \n", - "6 709.795 0.333 " + "0 40.317 0.662 \n", + "1 34.441 0.217 \n", + "2 33.472 0.407 \n", + "3 33.743 0.123 \n", + "4 33.269 0.405 \n", + "5 448.517 0.120 \n", + "6 696.410 0.333 " ] }, "execution_count": 6, diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/summarize_results.py b/notebooks/hfdemo/tinytimemixer/full_benchmarking/summarize_results.py new file mode 100644 index 00000000..89b729bd --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/summarize_results.py @@ -0,0 +1,88 @@ +import argparse +import os +import re + +import pandas as pd + + +parser = argparse.ArgumentParser(description="TTM pretrain arguments.") +# Adding a positional argument +parser.add_argument( + "--result_dir", + "-rd", + type=str, + required=True, + help="Directory containing results after running benchmarking script.", +) +args = parser.parse_args() + +# Path to the main folder containing subfolders +main_folder_path = args.result_dir + +# List to collect dataframes +all_data = [] + +# Iterate through all items in the main folder +for folder_name in os.listdir(main_folder_path): + # Check if the folder name matches the pattern fl-XX_ + print(folder_name) + match = re.search(r"fl-(\d+)_", folder_name) + c_match = re.search(r"cl-(\d+)_", folder_name) + print(match, c_match) + if match: + # Extract XX from the folder name + XX = int(match.group(1)) + + cl = int(c_match.group(1)) + + print("reading", XX, cl) + folder_path = os.path.join(main_folder_path, folder_name) + + # Check if results_zero_few.csv exists in the folder + csv_file_path = os.path.join(folder_path, "results_zero_few.csv") + if os.path.exists(csv_file_path): + # Load the CSV file + df = pd.read_csv(csv_file_path) + + # Add a new column 'FL' with value XX + df["FL"] = XX + df["CL"] = cl + + # Append the dataframe to the list + all_data.append(df) + +# Concatenate all dataframes into one +final_df = pd.concat(all_data, ignore_index=True) + +custom_order = ["etth1", "etth2", "ettm1", "ettm2", "weather", "electricity", "traffic"] + + +final_df["dataset_sorted"] = pd.Categorical(final_df["dataset"], categories=custom_order, ordered=True) + +final_df = final_df.sort_values(by=["CL", "dataset_sorted", "FL"], ascending=[True, True, True]) + + +# Save to a new CSV file or process further +out_file_name = os.path.basename(os.path.normpath(main_folder_path)) +final_df[["dataset", "CL", "FL", "zs_mse", "fs5_mse"]].to_csv(f"combined_{out_file_name}.csv", index=False) + +final_df = final_df.drop(columns=["Unnamed: 0", "dataset_sorted", "FL"]) +cols = final_df.columns +cols_index = [ + "dataset", + "CL", +] +cols_others = [_ for _ in cols if _ not in cols_index] +cols_ord = cols_index + cols_others +final_df = final_df[cols_ord] +avg_df = final_df.groupby(["dataset", "CL"], as_index=False, sort=False).mean() +avg_df = avg_df.round(decimals=3) +avg_df["CL"] = avg_df["CL"].astype(int) +avg_df.to_csv( + f"combined_avg_{out_file_name}.csv", + index=False, +) + + +print(f"All CSV files have been combined and saved as combined_{out_file_name}.csv") +print(f"Average scores per-dataset are as combined_avg_{out_file_name}.csv") diff --git a/notebooks/hfdemo/tinytimemixer/full_benchmarking/ttm_full_benchmarking.py b/notebooks/hfdemo/tinytimemixer/full_benchmarking/ttm_full_benchmarking.py new file mode 100644 index 00000000..32539261 --- /dev/null +++ b/notebooks/hfdemo/tinytimemixer/full_benchmarking/ttm_full_benchmarking.py @@ -0,0 +1,305 @@ +""" +# TTM zero-shot and few-shot benchmarking on multiple datasets +Pre-trained TTM models will be fetched from the HuggingFace TTM Model Repositories as described below. + +1. TTM-Granite-R1 pre-trained models can be found here: [TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1) +2. TTM-Granite-R2 pre-trained models can be found here: [TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2) +3. TTM-Research-Use pre-trained models can be found here: [TTM-Research-Use Model Card](https://huggingface.co/ibm/ttm-research-r2) + +Every model card has a suite of TTM models. Please read the respective model cards for usage instructions. +""" + +## Imports +import math +import tempfile +import warnings + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from torch.optim import AdamW +from torch.optim.lr_scheduler import OneCycleLR +from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed +from transformers.integrations import INTEGRATION_TO_CALLBACK + +from tsfm_public import TrackingCallback, count_parameters, load_dataset +from tsfm_public.models.tinytimemixer.utils import get_ttm_args +from tsfm_public.toolkit.get_model import get_model +from tsfm_public.toolkit.lr_finder import optimal_lr_finder +from tsfm_public.toolkit.visualization import plot_predictions + + +warnings.filterwarnings("ignore") + +# Arguments +args = get_ttm_args() + +# Set seed +set_seed(args.random_seed) + +## Important arguments +# Specify model parameters +CONTEXT_LENGTH = args.context_length +FORECAST_LENGTH = args.forecast_length +FREEZE_BACKBONE = True + +# Other args +EPOCHS = args.num_epochs +NUM_WORKERS = args.num_workers + +# Make sure all the datasets in the following `list_datasets` are +# saved in the `DATA_ROOT_PATH` folder. Or, change it accordingly. +# Refer to the load_datasets() function +# in notebooks/hfdemo/tinytimemixer/utils/ttm_utils.py +# to see how it is used. +DATA_ROOT_PATH = args.data_root_path + +# This is where results will be saved +OUT_DIR = args.save_dir + +MODEL_PATH = args.hf_model_path + +print(f"{'*' * 20} Pre-training a TTM for context len = {CONTEXT_LENGTH}, forecast len = {FORECAST_LENGTH} {'*' * 20}") + +## List of benchmark datasets (TTM was not pre-trained on any of these) +list_datasets = [ + "etth1", + "etth2", + "ettm1", + "ettm2", + "weather", + "electricity", + "traffic", +] + + +all_results = { + "dataset": [], + "zs_mse": [], + "fs5_mse": [], +} + +# Loop over data +for DATASET in list_datasets: + try: + print() + print("=" * 100) + print( + f"Running zero-shot/few-shot for TTM-{CONTEXT_LENGTH} on dataset = {DATASET}, forecast_len = {FORECAST_LENGTH}" + ) + + print(f"Model will be loaded from {MODEL_PATH}") + SUBDIR = f"{OUT_DIR}/{DATASET}" + + # Set batch size + if DATASET == "traffic": + BATCH_SIZE = 8 + elif DATASET == "electricity": + BATCH_SIZE = 32 + else: + BATCH_SIZE = 64 + + # Data prep: Get dataset + _, _, dset_test = load_dataset( + DATASET, + CONTEXT_LENGTH, + FORECAST_LENGTH, + dataset_root_path=DATA_ROOT_PATH, + use_frequency_token=args.enable_prefix_tuning, + enable_padding=False, + ) + + ############################################################# + ##### Use the pretrained model in zero-shot forecasting ##### + ############################################################# + # Load model + zeroshot_model = get_model( + model_path=MODEL_PATH, context_length=CONTEXT_LENGTH, prediction_length=FORECAST_LENGTH + ) + + # zeroshot_trainer + zeroshot_trainer = Trainer( + model=zeroshot_model, + args=TrainingArguments( + output_dir=tempfile.mkdtemp(), + per_device_eval_batch_size=BATCH_SIZE, + seed=args.random_seed, + ), + eval_dataset=dset_test, + ) + + # evaluate = zero-shot performance + print("+" * 20, "Test MSE zero-shot", "+" * 20) + zeroshot_output = zeroshot_trainer.evaluate(dset_test) + print(zeroshot_output) + print("+" * 60) + + # Plot + plot_predictions( + model=zeroshot_trainer.model, + dset=dset_test, + plot_dir=SUBDIR, + num_plots=10, + plot_prefix="test_zeroshot", + channel=0, + ) + plt.close() + + # write results + all_results["dataset"].append(DATASET) + all_results["zs_mse"].append(zeroshot_output["eval_loss"]) + + except Exception as e: + print(f"Reason for exception: {e}") + # write dummy results + all_results["dataset"].append(DATASET) + all_results["zs_mse"].append(np.nan) + + ################################################################ + ## Use the pretrained model in few-shot 5% and 10% forecasting # + ################################################################ + try: + for fewshot_percent in [5]: + # Set learning rate + learning_rate = None # `None` value indicates that the optimal_lr_finder() will be used + + print("-" * 20, f"Running few-shot {fewshot_percent}%", "-" * 20) + # Data prep: Get dataset + dset_train, dset_val, dset_test = load_dataset( + DATASET, + CONTEXT_LENGTH, + FORECAST_LENGTH, + fewshot_fraction=fewshot_percent / 100, + dataset_root_path=DATA_ROOT_PATH, + use_frequency_token=args.enable_prefix_tuning, + enable_padding=False, + ) + + # change head dropout to 0.7 for ett datasets + if "ett" in DATASET: + finetune_forecast_model = get_model( + model_path=MODEL_PATH, + context_length=CONTEXT_LENGTH, + prediction_length=FORECAST_LENGTH, + head_dropout=0.7, + ) + else: + finetune_forecast_model = get_model( + model_path=MODEL_PATH, + context_length=CONTEXT_LENGTH, + prediction_length=FORECAST_LENGTH, + ) + + if FREEZE_BACKBONE: + print( + "Number of params before freezing backbone", + count_parameters(finetune_forecast_model), + ) + + # Freeze the backbone of the model + for param in finetune_forecast_model.backbone.parameters(): + param.requires_grad = False + + # Count params + print( + "Number of params after freezing the backbone", + count_parameters(finetune_forecast_model), + ) + + if learning_rate is None: + learning_rate, finetune_forecast_model = optimal_lr_finder( + finetune_forecast_model, + dset_train, + batch_size=BATCH_SIZE, + enable_prefix_tuning=args.enable_prefix_tuning, + ) + print("OPTIMAL SUGGESTED LEARNING RATE =", learning_rate) + + print(f"Using learning rate = {learning_rate}") + + # This is to save space during exhaustive benchmarking, use specific directory if the saved models are needed + tmp_dir = tempfile.mkdtemp() + + finetune_forecast_args = TrainingArguments( + output_dir=tmp_dir, + overwrite_output_dir=True, + learning_rate=learning_rate, + num_train_epochs=EPOCHS, + do_eval=True, + evaluation_strategy="epoch", + per_device_train_batch_size=BATCH_SIZE, + per_device_eval_batch_size=BATCH_SIZE, + dataloader_num_workers=NUM_WORKERS, + report_to=None, + save_strategy="epoch", + logging_strategy="epoch", + save_total_limit=1, + logging_dir=tmp_dir, # Make sure to specify a logging directory + load_best_model_at_end=True, # Load the best model when training ends + metric_for_best_model="eval_loss", # Metric to monitor for early stopping + greater_is_better=False, # For loss + seed=args.random_seed, + ) + + # Create the early stopping callback + early_stopping_callback = EarlyStoppingCallback( + early_stopping_patience=10, # Number of epochs with no improvement after which to stop + early_stopping_threshold=0.0, # Minimum improvement required to consider as improvement + ) + tracking_callback = TrackingCallback() + + # Optimizer and scheduler + optimizer = AdamW(finetune_forecast_model.parameters(), lr=learning_rate) + scheduler = OneCycleLR( + optimizer, + learning_rate, + epochs=EPOCHS, + steps_per_epoch=math.ceil(len(dset_train) / (BATCH_SIZE)), + ) + + finetune_forecast_trainer = Trainer( + model=finetune_forecast_model, + args=finetune_forecast_args, + train_dataset=dset_train, + eval_dataset=dset_val, + callbacks=[early_stopping_callback, tracking_callback], + optimizers=(optimizer, scheduler), + ) + finetune_forecast_trainer.remove_callback(INTEGRATION_TO_CALLBACK["codecarbon"]) + + # Fine tune + finetune_forecast_trainer.train() + + # Evaluation + print( + "+" * 20, + f"Test MSE after few-shot {fewshot_percent}% fine-tuning", + "+" * 20, + ) + fewshot_output = finetune_forecast_trainer.evaluate(dset_test) + print(fewshot_output) + print("+" * 60) + + # Plot + plot_predictions( + model=finetune_forecast_trainer.model, + dset=dset_test, + plot_dir=SUBDIR, + num_plots=10, + plot_prefix=f"test_fewshot_{fewshot_percent}", + channel=0, + ) + plt.close() + + # write results + all_results[f"fs{fewshot_percent}_mse"].append(fewshot_output["eval_loss"]) + + except Exception as e: + print(f"Reason for exception: {e}") + # write dummy results + all_results[f"fs{fewshot_percent}_mse"].append(np.nan) + + df_out = pd.DataFrame(all_results).round(3) + print(df_out[["dataset", "zs_mse", "fs5_mse"]]) + df_out.to_csv(f"{OUT_DIR}/results_zero_few.csv") + df_out.to_csv(f"{OUT_DIR}/results_zero_few.csv") diff --git a/notebooks/hfdemo/ttm_getting_started.ipynb b/notebooks/hfdemo/ttm_getting_started.ipynb index eda55a82..a6a540b4 100644 --- a/notebooks/hfdemo/ttm_getting_started.ipynb +++ b/notebooks/hfdemo/ttm_getting_started.ipynb @@ -14,19 +14,11 @@ "1. **Zero-shot**: The pre-trained TTM will be directly used to evaluate on the `test` split of the target data. Note that the TTM was NOT pre-trained on the target data.\n", "2. **Few-shot**: The pre-trained TTM will be quickly fine-tuned on only 5% of the `train` split of the target data, and subsequently, evaluated on the `test` part of the target data.\n", "\n", - "Note: Alternatively, this notebook can be modified to try the TTM-1024-96 or TTM-1536-96 model.\n", + "Note: Alternatively, this notebook can be modified to try any other TTM model from a suite of TTM models. For details, visit the [Hugging Face TTM Model Repository](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2).\n", "\n", - "Pre-trained TTM models will be fetched from the [Hugging Face TTM Model Repository](ibm-granite/granite-timeseries-ttm-r2).\n", - "\n", - "1. TTM-R1 pre-trained models can be found here: [TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024_96_v1\"`\n", - "2. TTM-R2 pre-trained models can be found here: [TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", - " 1. For 512-96 model set `TTM_MODEL_REVISION=\"main\"`\n", - " 2. For 1024-96 model set `TTM_MODEL_REVISION=\"1024-96-r2\"`\n", - " 3. For 1536-96 model set `TTM_MODEL_REVISION=\"1536-96-r2\"`\n", - "\n", - "Details about the revisions (R1 and R2) can be found [here](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)." + "1. IBM Granite TTM-R1 pre-trained models can be found here: [Granite-TTM-R1 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1)\n", + "2. IBM Granite TTM-R2 pre-trained models can be found here: [Granite-TTM-R2 Model Card](https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2)\n", + "3. Research-use (non-commercial use only) TTM-R2 pre-trained models can be found here: [Research-Use-TTM-R2](https://huggingface.co/ibm/ttm-research-r2)" ] }, { @@ -60,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "f63ae353-96df-4380-89f6-1e6cebf684fb", "metadata": {}, "outputs": [ @@ -68,10 +60,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-19 05:27:55.601887: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2024-10-19 05:27:55.640724: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-04 10:42:59.846438: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-11-04 10:42:59.884441: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-19 05:27:57.789885: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-11-04 10:43:01.679490: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "/dccstor/dnn_forecasting/conda_envs/envs/fm/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", " warn(f\"Failed to load image Python extension: {e}\")\n" ] @@ -87,14 +79,15 @@ "from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed\n", "from transformers.integrations import INTEGRATION_TO_CALLBACK\n", "\n", - "from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public import TrackingCallback, count_parameters, load_dataset\n", + "from tsfm_public.toolkit.get_model import get_model\n", "from tsfm_public.toolkit.lr_finder import optimal_lr_finder\n", "from tsfm_public.toolkit.visualization import plot_predictions" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "894ac389-94e4-4956-8d09-6509d9d452e6", "metadata": {}, "outputs": [], @@ -117,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "a826c4f3-1c6c-4088-b6af-f430f45fd380", "metadata": {}, "outputs": [], @@ -126,11 +119,13 @@ "SEED = 42\n", "set_seed(SEED)\n", "\n", - "# TTM Revision (1 or 2)\n", - "TTM_REVISION = 2\n", + "# TTM Model path. The default model path is Granite-R2. Below, you can choose other TTM releases.\n", + "TTM_MODEL_PATH = \"ibm-granite/granite-timeseries-ttm-r2\"\n", + "# TTM_MODEL_PATH = \"ibm-granite/granite-timeseries-ttm-r1\"\n", + "# TTM_MODEL_PATH = \"ibm/ttm-research-r2\"\n", "\n", "# Context length, Or Length of the history.\n", - "# Currently supported values are: 512/1024/1536 for TTM-R-2, and 512/1024 for TTM-R1\n", + "# Currently supported values are: 512/1024/1536 for Granite-TTM-R2 and Research-Use-TTM-R2, and 512/1024 for Granite-TTM-R1\n", "CONTEXT_LENGTH = 512\n", "\n", "# Dataset\n", @@ -146,68 +141,6 @@ "OUT_DIR = \"ttm_finetuned_models/\"" ] }, - { - "cell_type": "markdown", - "id": "0e255508-17c3-468b-8feb-6dcb80c67503", - "metadata": {}, - "source": [ - "#### Automatically set TTM_MODEL_PATH and TTM_MODEL_REVISION" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "915ea800-83d8-49dd-9f49-e4b0e209552a", - "metadata": {}, - "outputs": [], - "source": [ - "# ----- TTM model path -----\n", - "if TTM_REVISION == 1:\n", - " TTM_MODEL_PATH = \"ibm-granite/granite-timeseries-ttm-r1\"\n", - " # ----- TTM model branch -----\n", - " # For R1 models\n", - " if CONTEXT_LENGTH == 512:\n", - " TTM_MODEL_REVISION = \"main\"\n", - " elif CONTEXT_LENGTH == 1024:\n", - " TTM_MODEL_REVISION = \"1024_96_v1\"\n", - " else:\n", - " raise ValueError(f\"Unsupported CONTEXT_LENGTH for TTM_MODEL_PATH={TTM_MODEL_PATH}\")\n", - "elif TTM_REVISION == 2:\n", - " TTM_MODEL_PATH = \"ibm-granite/granite-timeseries-ttm-r2\"\n", - " # ----- TTM model branch -----\n", - " # For R2 models\n", - " if CONTEXT_LENGTH == 512:\n", - " TTM_MODEL_REVISION = \"main\"\n", - " elif CONTEXT_LENGTH == 1024:\n", - " TTM_MODEL_REVISION = \"1024-96-r2\"\n", - " elif CONTEXT_LENGTH == 1536:\n", - " TTM_MODEL_REVISION = \"1536-96-r2\"\n", - " else:\n", - " raise ValueError(f\"Unsupported CONTEXT_LENGTH for TTM_MODEL_PATH={TTM_MODEL_PATH}\")\n", - "else:\n", - " raise ValueError(\"Wrong TTM_REVISION. Stay tuned for future models.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1525dea1-e4f1-40ee-bfcc-5cb3bc00644b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Chosen TTM model:\n", - "ibm-granite/granite-timeseries-ttm-r2, revision = main\n" - ] - } - ], - "source": [ - "print(\"Chosen TTM model:\")\n", - "print(f\"{TTM_MODEL_PATH}, revision = {TTM_MODEL_REVISION}\")" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -219,19 +152,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "7935d099", "metadata": {}, "outputs": [], "source": [ - "def zeroshot_eval(dataset_name, batch_size, context_length=512, forecast_length=96, prediction_filter_length=None):\n", - " if prediction_filter_length is not None:\n", - " if prediction_filter_length >= forecast_length:\n", - " raise ValueError(\n", - " \"`prediction_filter_length` should be less than the original `forecast_length` of the pre-trained TTM model.\"\n", - " )\n", - " forecast_length = forecast_length - prediction_filter_length\n", - "\n", + "def zeroshot_eval(dataset_name, batch_size, context_length=512, forecast_length=96):\n", " # Get data\n", " _, _, dset_test = load_dataset(\n", " dataset_name=dataset_name,\n", @@ -242,17 +168,8 @@ " )\n", "\n", " # Load model\n", - " if prediction_filter_length is None:\n", - " zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(TTM_MODEL_PATH, revision=TTM_MODEL_REVISION)\n", - " else:\n", - " if prediction_filter_length <= forecast_length:\n", - " zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " TTM_MODEL_PATH,\n", - " revision=TTM_MODEL_REVISION,\n", - " prediction_filter_length=prediction_filter_length,\n", - " )\n", - " else:\n", - " raise ValueError(\"`prediction_filter_length` should be <= `forecast_length\")\n", + " zeroshot_model = get_model(TTM_MODEL_PATH, context_length=context_length, prediction_length=forecast_length)\n", + "\n", " temp_dir = tempfile.mkdtemp()\n", " # zeroshot_trainer\n", " zeroshot_trainer = Trainer(\n", @@ -291,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "078c945a-9da7-4729-a95d-43cd615d0934", "metadata": {}, "outputs": [], @@ -306,19 +223,11 @@ " freeze_backbone=True,\n", " num_epochs=50,\n", " save_dir=OUT_DIR,\n", - " prediction_filter_length=None,\n", "):\n", " out_dir = os.path.join(save_dir, dataset_name)\n", "\n", " print(\"-\" * 20, f\"Running few-shot {fewshot_percent}%\", \"-\" * 20)\n", "\n", - " if prediction_filter_length is not None:\n", - " if prediction_filter_length >= forecast_length:\n", - " raise ValueError(\n", - " \"`prediction_filter_length` should be less than the original `forecast_length` of the pre-trained TTM model.\"\n", - " )\n", - " forecast_length = forecast_length - prediction_filter_length\n", - "\n", " # Data prep: Get dataset\n", " dset_train, dset_val, dset_test = load_dataset(\n", " dataset_name,\n", @@ -330,33 +239,14 @@ "\n", " # change head dropout to 0.7 for ett datasets\n", " if \"ett\" in dataset_name:\n", - " if prediction_filter_length is None:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " TTM_MODEL_PATH, revision=TTM_MODEL_REVISION, head_dropout=0.7\n", - " )\n", - " elif prediction_filter_length <= forecast_length:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " TTM_MODEL_PATH,\n", - " revision=TTM_MODEL_REVISION,\n", - " head_dropout=0.7,\n", - " prediction_filter_length=prediction_filter_length,\n", - " )\n", - " else:\n", - " raise ValueError(\"`prediction_filter_length` should be <= `forecast_length\")\n", + " finetune_forecast_model = get_model(\n", + " TTM_MODEL_PATH, context_length=context_length, prediction_length=forecast_length, head_dropout=0.7\n", + " )\n", " else:\n", - " if prediction_filter_length is None:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " TTM_MODEL_PATH,\n", - " revision=TTM_MODEL_REVISION,\n", - " )\n", - " elif prediction_filter_length <= forecast_length:\n", - " finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(\n", - " TTM_MODEL_PATH,\n", - " revision=TTM_MODEL_REVISION,\n", - " prediction_filter_length=prediction_filter_length,\n", - " )\n", - " else:\n", - " raise ValueError(\"`prediction_filter_length` should be <= `forecast_length\")\n", + " finetune_forecast_model = get_model(\n", + " TTM_MODEL_PATH, context_length=context_length, prediction_length=forecast_length\n", + " )\n", + "\n", " if freeze_backbone:\n", " print(\n", " \"Number of params before freezing backbone\",\n", @@ -471,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "6a84d458-76ca-4e2a-a756-59981e9847f1", "metadata": {}, "outputs": [ @@ -479,9 +369,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96\n", - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Data lengths: train = 8033, val = 2785, test = 2785\n", - "WARNING:p-1398676:t-22879593206528:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -513,7 +402,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3628121316432953, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 2.1915, 'eval_samples_per_second': 1270.832, 'eval_steps_per_second': 20.078}\n" + "{'eval_loss': 0.3628121316432953, 'eval_model_preparation_time': 0.0019, 'eval_runtime': 0.9077, 'eval_samples_per_second': 3068.323, 'eval_steps_per_second': 48.476}\n" ] }, { @@ -528,7 +417,7 @@ } ], "source": [ - "zeroshot_eval(dataset_name=TARGET_DATASET, context_length=CONTEXT_LENGTH, batch_size=64)" + "zeroshot_eval(dataset_name=TARGET_DATASET, context_length=CONTEXT_LENGTH, forecast_length=96, batch_size=64)" ] }, { @@ -542,7 +431,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "id": "b145fead-50fb-4e3e-89fc-a0c238755e64", "metadata": {}, "outputs": [ @@ -550,8 +439,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 96\n", - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Data lengths: train = 311, val = 2785, test = 2785\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { @@ -565,7 +453,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:p-1398676:t-22879593206528:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -584,7 +472,7 @@ "
\n", " \n", " \n", - " [ 70/250 00:15 < 00:41, 4.31 it/s, Epoch 14/50]\n", + " [ 70/250 00:15 < 00:39, 4.51 it/s, Epoch 14/50]\n", "
\n", "
etth10.3630.3621.5531.02448.0761.5170.82340.3170.662
etth20.2760.2730.5980.98440.7090.6070.83234.4410.217
ettm10.3380.3412.4601.28036.3322.4061.11633.4720.407
ettm20.1760.1762.4841.30336.4772.7711.15533.7430.123
weather0.1500.1503.2801.43834.9563.6311.29333.2690.405
electricity0.1800.14513.8643.205454.16314.0193.198448.5170.120
traffic0.5180.41023.5324.759709.79523.5844.671696.4100.333
\n", " \n", @@ -679,7 +567,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.5175502811159406 seconds, Total Train Time = 17.007731676101685\n", + "[TrackingCallback] Mean Epoch Time = 0.4760245255061558 seconds, Total Train Time = 16.35079002380371\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -705,7 +593,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.36187952756881714, 'eval_runtime': 0.6357, 'eval_samples_per_second': 4381.114, 'eval_steps_per_second': 69.217, 'epoch': 14.0}\n", + "{'eval_loss': 0.36187952756881714, 'eval_runtime': 0.6159, 'eval_samples_per_second': 4522.198, 'eval_steps_per_second': 71.446, 'epoch': 14.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -722,7 +610,12 @@ ], "source": [ "fewshot_finetune_eval(\n", - " dataset_name=TARGET_DATASET, context_length=CONTEXT_LENGTH, batch_size=64, fewshot_percent=5, learning_rate=0.001\n", + " dataset_name=TARGET_DATASET,\n", + " context_length=CONTEXT_LENGTH,\n", + " forecast_length=96,\n", + " batch_size=64,\n", + " fewshot_percent=5,\n", + " learning_rate=0.001,\n", ")" ] }, @@ -732,11 +625,10 @@ "id": "cee2dcc1-bcb8-47ee-8ba7-ff3104159ed6", "metadata": {}, "source": [ - "## Example: Automatically truncating the forecast horizon\n", + "## Example: TTM for other forecast horizon lengths\n", "\n", "Here, we demonstrate that a pre-trained 512-96 TTM model (i.e., context length = 512, forecast horizon = 96) \n", "can be used for a task having forecast horizon less than 96 time points.\n", - "We need to specify the argument `prediction_filter_length` while loading the model. That's it!\n", "\n", "Note that the model performance might be sacrificed by some margin while truncating the model forecast. It is recommended to try \n", "this feature in your validation data for your experiment, to verify if the model performance is in the acceptable threshold. \n", @@ -756,7 +648,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "id": "b4eff2e1-acfd-4c5b-8463-e084ba831cdf", "metadata": {}, "outputs": [ @@ -764,9 +656,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 48\n", - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Data lengths: train = 8081, val = 2833, test = 2833\n", - "WARNING:p-1398676:t-22879593206528:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n", + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -798,7 +689,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.34208083152770996, 'eval_model_preparation_time': 0.002, 'eval_runtime': 0.7317, 'eval_samples_per_second': 3871.608, 'eval_steps_per_second': 61.497}\n" + "{'eval_loss': 0.34208083152770996, 'eval_model_preparation_time': 0.0021, 'eval_runtime': 0.6053, 'eval_samples_per_second': 4680.197, 'eval_steps_per_second': 74.341}\n" ] }, { @@ -813,7 +704,7 @@ } ], "source": [ - "zeroshot_eval(dataset_name=TARGET_DATASET, context_length=CONTEXT_LENGTH, batch_size=64, prediction_filter_length=48)" + "zeroshot_eval(dataset_name=TARGET_DATASET, context_length=CONTEXT_LENGTH, forecast_length=48, batch_size=64)" ] }, { @@ -827,7 +718,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "id": "4b56cd24-bae6-4cc6-9a3c-52f965014eb0", "metadata": {}, "outputs": [ @@ -835,29 +726,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Dataset name: etth1, context length: 512, prediction length 48\n", - "INFO:p-1398676:t-22879593206528:data_handling.py:load_dataset:Data lengths: train = 359, val = 2833, test = 2833\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-------------------- Running few-shot 5% --------------------\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:p-1398676:t-22879593206528:lr_finder.py:optimal_lr_finder:LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.\n", - "INFO:p-1398676:t-22879593206528:lr_finder.py:optimal_lr_finder:LR Finder: Using GPU:0.\n" + "WARNING:/dccstor/dnn_forecasting/arindam/FM/HF/public_tsfm/tsfm/tsfm_public/toolkit/get_model.py:Requested `prediction_length` is not exactly equal to any of the available TTM prediction lengths. Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "-------------------- Running few-shot 5% --------------------\n", "Number of params before freezing backbone 805280\n", "Number of params after freezing the backbone 289696\n" ] @@ -866,8 +742,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:p-1398676:t-22879593206528:lr_finder.py:optimal_lr_finder:LR Finder: Suggested learning rate = 0.0013219411484660286\n", - "WARNING:p-1398676:t-22879593206528:other.py:check_os_kernel:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" + "WARNING:accelerate.utils.other:Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n" ] }, { @@ -885,7 +760,7 @@ "
\n", " \n", " \n", - " [ 72/300 00:13 < 00:44, 5.16 it/s, Epoch 12/50]\n", + " [ 72/300 00:13 < 00:42, 5.36 it/s, Epoch 12/50]\n", "
\n", "
\n", " \n", @@ -970,7 +845,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TrackingCallback] Mean Epoch Time = 0.465019166469574 seconds, Total Train Time = 13.872522592544556\n", + "[TrackingCallback] Mean Epoch Time = 0.4244766632715861 seconds, Total Train Time = 13.371108055114746\n", "++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++\n" ] }, @@ -996,7 +871,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'eval_loss': 0.3415098488330841, 'eval_runtime': 0.6799, 'eval_samples_per_second': 4166.882, 'eval_steps_per_second': 66.188, 'epoch': 12.0}\n", + "{'eval_loss': 0.3415098488330841, 'eval_runtime': 0.6066, 'eval_samples_per_second': 4670.135, 'eval_steps_per_second': 74.181, 'epoch': 12.0}\n", "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" ] }, @@ -1015,8 +890,8 @@ "fewshot_finetune_eval(\n", " dataset_name=TARGET_DATASET,\n", " context_length=CONTEXT_LENGTH,\n", + " forecast_length=48,\n", " batch_size=64,\n", - " prediction_filter_length=48,\n", " fewshot_percent=5,\n", " learning_rate=None,\n", ")" diff --git a/tests/toolkit/test_get_model.py b/tests/toolkit/test_get_model.py new file mode 100644 index 00000000..8292a307 --- /dev/null +++ b/tests/toolkit/test_get_model.py @@ -0,0 +1,68 @@ +# Copyright contributors to the TSFM project +# + +"""Tests get_model""" + +import tempfile + +from tsfm_public.toolkit.get_model import get_model + + +def test_get_model(): + mp = "ibm-granite/granite-timeseries-ttm-r2" + cl = 512 + fl = 10 + model = get_model(model_path=mp, context_length=cl, prediction_length=fl, dropout=0.4, decoder_num_layers=1) + assert model.config.prediction_length == 96 + assert model.config.context_length == cl + assert model.config.d_model == 192 + + tmp_dir = tempfile.mkdtemp() + model.save_pretrained(tmp_dir) + model = get_model(tmp_dir) + assert model.config.d_model == 192 + + mp = "ibm-granite/granite-timeseries-ttm-r2" + cl = 1536 + fl = 200 + model = get_model(model_path=mp, context_length=cl, prediction_length=fl, decoder_adaptive_patching_levels=2) + assert model.config.prediction_length == 336 + assert model.config.context_length == cl + assert model.config.d_model == 384 + + mp = "ibm-granite/granite-timeseries-ttm-r1" + cl = 1024 + fl = 56 + model = get_model(model_path=mp, context_length=cl, prediction_length=fl, head_dropout=0.3) + assert model.config.prediction_length == 96 + assert model.config.context_length == cl + assert model.config.d_model == 192 + + mp = "ibm/TTM" + cl = 512 + fl = 90 + model = get_model(model_path=mp, context_length=cl, prediction_length=fl) + assert model.config.prediction_length == 96 + assert model.config.context_length == cl + assert model.config.d_model == 192 + + mp = "ibm-granite/granite-timeseries-ttm-r1" + for cl in [512, 1024]: + for fl in [96]: + model = get_model(model_path=mp, context_length=cl, prediction_length=fl) + assert model.config.prediction_length == fl + assert model.config.context_length == cl + + mp = "ibm-granite/granite-timeseries-ttm-r2" + for cl in [512, 1024, 1536]: + for fl in [96, 192, 336, 720]: + model = get_model(model_path=mp, context_length=cl, prediction_length=fl) + assert model.config.prediction_length == fl + assert model.config.context_length == cl + + mp = "ibm/ttm-research-r2" + for cl in [512, 1024, 1536]: + for fl in [96, 192, 336, 720]: + model = get_model(model_path=mp, context_length=cl, prediction_length=fl) + assert model.config.prediction_length == fl + assert model.config.context_length == cl diff --git a/tsfm_public/models/tinytimemixer/utils/ttm_args.py b/tsfm_public/models/tinytimemixer/utils/ttm_args.py index c4ef3ed6..c9b1ff1d 100644 --- a/tsfm_public/models/tinytimemixer/utils/ttm_args.py +++ b/tsfm_public/models/tinytimemixer/utils/ttm_args.py @@ -138,11 +138,28 @@ def get_ttm_args(): default=1, help="Whether to freeze the backbone during few-shot finetuning.", ) + parser.add_argument( + "--enable_prefix_tuning", + "-ept", + type=int, + required=False, + default=0, + help="Enable prefix tuning in TTM.", + ) + parser.add_argument( + "--hf_model_path", + "-hmp", + type=str, + required=False, + default="ibm-granite/granite-timeseries-ttm-r2", + help="Hugginface model card path.", + ) # Parsing the arguments args = parser.parse_args() args.early_stopping = int_to_bool(args.early_stopping) args.freeze_backbone = int_to_bool(args.freeze_backbone) + args.enable_prefix_tuning = int_to_bool(args.enable_prefix_tuning) args.d_model = args.patch_length * args.d_model_scale args.decoder_d_model = args.patch_length * args.decoder_d_model_scale diff --git a/tsfm_public/resources/model_paths_config/__init__.py b/tsfm_public/resources/model_paths_config/__init__.py new file mode 100644 index 00000000..4f85bd0b --- /dev/null +++ b/tsfm_public/resources/model_paths_config/__init__.py @@ -0,0 +1,2 @@ +# Copyright contributors to the TSFM project +# diff --git a/tsfm_public/resources/model_paths_config/ttm.yaml b/tsfm_public/resources/model_paths_config/ttm.yaml new file mode 100644 index 00000000..14accdd1 --- /dev/null +++ b/tsfm_public/resources/model_paths_config/ttm.yaml @@ -0,0 +1,160 @@ +ibm-granite-models: + r1-512-96-nofreq: + release: r1 + model_card: ibm-granite/granite-timeseries-ttm-r1 + revision: main + context_length: 512 + prediction_length: 96 + r1-1024-96-nofreq: + release: r1 + model_card: ibm-granite/granite-timeseries-ttm-r1 + revision: 1024_96_v1 + context_length: 1024 + prediction_length: 96 + r2-512-96-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: main + context_length: 512 + prediction_length: 96 + r2-512-192-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 512-192-r2 + context_length: 512 + prediction_length: 192 + r2-512-336-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 512-336-r2 + context_length: 512 + prediction_length: 336 + r2-512-720-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 512-720-r2 + context_length: 512 + prediction_length: 720 + r2-1024-96-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1024-96-r2 + context_length: 1024 + prediction_length: 96 + r2-1024-192-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1024-192-r2 + context_length: 1024 + prediction_length: 192 + r2-1024-336-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1024-336-r2 + context_length: 1024 + prediction_length: 336 + r2-1024-720-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1024-720-r2 + context_length: 1024 + prediction_length: 720 + r2-1536-96-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1536-96-r2 + context_length: 1536 + prediction_length: 96 + r2-1536-192-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1536-192-r2 + context_length: 1536 + prediction_length: 192 + r2-1536-336-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1536-336-r2 + context_length: 1536 + prediction_length: 336 + r2-1536-720-nofreq: + release: r2 + model_card: ibm-granite/granite-timeseries-ttm-r2 + revision: 1536-720-r2 + context_length: 1536 + prediction_length: 720 + + +research-use-models: + r2-512-96-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: main + context_length: 512 + prediction_length: 96 + r2-512-192-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 512-192-ft-r2 + context_length: 512 + prediction_length: 192 + r2-512-336-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 512-336-ft-r2 + context_length: 512 + prediction_length: 336 + r2-512-720-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 512-720-ft-r2 + context_length: 512 + prediction_length: 720 + r2-1024-96-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1024-96-ft-r2 + context_length: 1024 + prediction_length: 96 + r2-1024-192-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1024-192-ft-r2 + context_length: 1024 + prediction_length: 192 + r2-1024-336-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1024-336-ft-r2 + context_length: 1024 + prediction_length: 336 + r2-1024-720-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1024-720-ft-r2 + context_length: 1024 + prediction_length: 720 + r2-1536-96-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1536-96-ft-r2 + context_length: 1536 + prediction_length: 96 + r2-1536-192-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1536-192-ft-r2 + context_length: 1536 + prediction_length: 192 + r2-1536-336-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1536-336-ft-r2 + context_length: 1536 + prediction_length: 336 + r2-1536-720-freq: + release: r2 + model_card: ibm/ttm-research-r2 + revision: 1536-720-ft-r2 + context_length: 1536 + prediction_length: 720 \ No newline at end of file diff --git a/tsfm_public/toolkit/data_handling.py b/tsfm_public/toolkit/data_handling.py index bec80822..caadf084 100644 --- a/tsfm_public/toolkit/data_handling.py +++ b/tsfm_public/toolkit/data_handling.py @@ -1,3 +1,5 @@ +# Copyright contributors to the TSFM project +# """Utilities for handling datasets""" import glob @@ -25,6 +27,7 @@ def load_dataset( dataset_root_path: str = "datasets/", dataset_path: Optional[str] = None, use_frequency_token: bool = False, + enable_padding: bool = True, ): LOGGER.info(f"Dataset name: {dataset_name}, context length: {context_length}, prediction length {forecast_length}") @@ -76,6 +79,7 @@ def load_dataset( fewshot_fraction=fewshot_fraction, fewshot_location=fewshot_location, use_frequency_token=use_frequency_token, + enable_padding=enable_padding, ) LOGGER.info(f"Data lengths: train = {len(train_dataset)}, val = {len(valid_dataset)}, test = {len(test_dataset)}") diff --git a/tsfm_public/toolkit/get_model.py b/tsfm_public/toolkit/get_model.py new file mode 100644 index 00000000..5066e22c --- /dev/null +++ b/tsfm_public/toolkit/get_model.py @@ -0,0 +1,141 @@ +# Copyright contributors to the TSFM project +# +"""Utilities to support model loading""" + +import logging +import os +from importlib import resources + +import yaml + +from tsfm_public.models.tinytimemixer import TinyTimeMixerForPrediction + + +LOGGER = logging.getLogger(__file__) + +SUPPORTED_LENGTHS = { + 1: {"CL": [512, 1024], "FL": [96]}, + 2: { + "CL": [512, 1024, 1536], + "FL": [96, 192, 336, 720], + }, + 3: { + "CL": [512, 1024, 1536], + "FL": [96, 192, 336, 720], + }, +} + + +def check_ttm_model_path(model_path): + if ( + "ibm/TTM" in model_path + or "ibm-granite/granite-timeseries-ttm-r1" in model_path + or "ibm-granite/granite-timeseries-ttm-v1" in model_path + or "ibm-granite/granite-timeseries-ttm-1m" in model_path + ): + return 1 + elif "ibm-granite/granite-timeseries-ttm-r2" in model_path: + return 2 + elif "ibm/ttm-research-r2" in model_path: + return 3 + else: + return 0 + + +def get_model( + model_path, + model_name: str = "ttm", + context_length: int = None, + prediction_length: int = None, + freq_prefix_tuning: bool = None, + **kwargs, +): + LOGGER.info(f"Loading model from: {model_path}") + + if model_name.lower() == "ttm": + model_path_type = check_ttm_model_path(model_path) + prediction_filter_length = 0 + ttm_model_revision = None + if model_path_type != 0: + if context_length is None or prediction_length is None: + raise ValueError( + "Provide `context_length` and `prediction_length` when `model_path` is a hugginface model path." + ) + + # Get right TTM model + config_dir = resources.files("tsfm_public.resources.model_paths_config") + + with open(os.path.join(config_dir, "ttm.yaml"), "r") as file: + model_revisions = yaml.safe_load(file) + + if prediction_length <= 96: + selected_prediction_length = 96 + elif prediction_length <= 192: + selected_prediction_length = 192 + elif prediction_length <= 336: + selected_prediction_length = 336 + elif prediction_length <= 720: + selected_prediction_length = 720 + else: + raise ValueError("Currently supported maximum prediction_length = 720") + + LOGGER.info(f"Selected prediction_length = {selected_prediction_length}") + + prediction_filter_length = prediction_length + + if freq_prefix_tuning is None: + # Default model preference (freq / nofreq) + if model_path_type == 1 or model_path_type == 2: # for granite use nofreq models + freq_prefix = "nofreq" + elif model_path_type == 3: # for research-use use freq models + freq_prefix = "freq" + else: + freq_prefix = None + else: + if freq_prefix_tuning: + freq_prefix = "freq" + else: + freq_prefix = "nofreq" + + try: + if model_path_type == 1 or model_path_type == 2: + ttm_model_revision = model_revisions["ibm-granite-models"][ + f"r{model_path_type}-{context_length}-{selected_prediction_length}-{freq_prefix}" + ]["revision"] + elif model_path_type == 3: + ttm_model_revision = model_revisions["research-use-models"][ + f"r2-{context_length}-{selected_prediction_length}-{freq_prefix}" + ]["revision"] + else: + raise Exception( + "Wrong model path type calculation. Possible reason: the model card path is wrong." + ) + except KeyError: + raise ValueError( + f"Model not found, possibly because of wrong context_length. Supported context lengths (CL) and forecast/prediction lengths (FL) for Model Card: {model_path} are {SUPPORTED_LENGTHS[model_path_type]}" + ) + + # Load model + if prediction_filter_length == 0: + model = TinyTimeMixerForPrediction.from_pretrained(model_path, revision=ttm_model_revision, **kwargs) + else: + LOGGER.warning( + f"Requested `prediction_length` ({prediction_length}) is not exactly equal to any of the available TTM prediction lengths.\n\ + Hence, TTM will forecast using the `prediction_filter_length` argument to provide the requested prediction length.\n\ + Supported context lengths (CL) and forecast/prediction lengths (FL) for Model Card: {model_path} are\n\ + {SUPPORTED_LENGTHS[model_path_type]}" + ) + model = TinyTimeMixerForPrediction.from_pretrained( + model_path, + revision=ttm_model_revision, + prediction_filter_length=prediction_filter_length, + **kwargs, + ) + LOGGER.info("Model loaded successfully!") + LOGGER.info( + f"[TTM] context_len = {model.config.context_length}, forecast_len = {model.config.prediction_length}" + ) + else: + raise ValueError("Currently supported values for `model_name` = 'ttm'.") + + return model diff --git a/tsfm_public/toolkit/visualization.py b/tsfm_public/toolkit/visualization.py index 2a42b643..658a469e 100644 --- a/tsfm_public/toolkit/visualization.py +++ b/tsfm_public/toolkit/visualization.py @@ -364,14 +364,15 @@ def plot_predictions( else: batch = dset[index] - ts_y_hat = np.arange(plot_context, plot_context + prediction_length) + feasible_plot_context = min(plot_context, batch["past_values"].shape[0]) + ts_y_hat = np.arange(feasible_plot_context, feasible_plot_context + prediction_length) y_hat = predictions_subset[i] - ts_y = np.arange(plot_context + prediction_length) + ts_y = np.arange(feasible_plot_context + prediction_length) y = batch["future_values"][:, channel].squeeze().numpy() - x = batch["past_values"][-plot_context:, channel].squeeze().numpy() + x = batch["past_values"][-feasible_plot_context:, channel].squeeze().numpy() y = np.concatenate((x, y), axis=0) - border = plot_context + border = feasible_plot_context plot_title = f"Example {indices[i]}" # Plot predicted values with a dashed line