breaking: remove deprecated behavior (#946)

Nixtla · Nov 26, 2024 · 133f72b · 133f72b
1 parent 1616de6
commit 133f72b
Show file tree

Hide file tree

Showing 65 changed files with 10,122 additions and 264,014 deletions.
diff --git a/action_files/conftest.py b/action_files/conftest.py
@@ -1,13 +1,9 @@
-import os
-
 import numpy as np
 import pandas as pd
 import pytest
 
 from statsforecast.utils import generate_series
 
-os.environ['NIXTLA_ID_AS_COL'] = '1'
-
 
 @pytest.fixture
 def n_series():

diff --git a/experiments/m3/src/experiment.py b/experiments/m3/src/experiment.py
@@ -25,14 +25,13 @@ def main(dataset: str = 'M3', group: str = 'Other') -> None:
         AutoARIMA(season_length=seasonality),
         DynamicOptimizedTheta(season_length=seasonality),
     ]
-    
+
     start = time.time()
-    fcst = StatsForecast(df=train, models=models, freq=freq, n_jobs=cpu_count())
-    forecasts = fcst.forecast(h=horizon)
+    fcst = StatsForecast(models=models, freq=freq, n_jobs=cpu_count())
+    forecasts = fcst.forecast(df=train, h=horizon)
     end = time.time()
     print(end - start)
 
-    forecasts = forecasts.reset_index()
     forecasts['StatisticalEnsemble'] = forecasts.set_index(['unique_id', 'ds']).median(axis=1).values
     forecasts.to_csv(f'data/StatisticalEnsemble-forecasts-{dataset}-{group}.csv', index=False)
 

diff --git a/experiments/mfles/m4_experiments.py b/experiments/mfles/m4_experiments.py
@@ -5,7 +5,6 @@
 from pathlib import Path
 
 os.environ['NIXTLA_NUMBA_CACHE'] = '1'
-os.environ['NIXTLA_ID_AS_COL'] = '1'
 
 import pandas as pd
 from datasetsforecast.m4 import M4, M4Info

diff --git a/experiments/tbats/experiment.py b/experiments/tbats/experiment.py
@@ -3,7 +3,6 @@
 from multiprocessing import cpu_count
 os.environ['NIXTLA_NUMBA_RELEASE_GIL'] = '1'
 os.environ['NIXTLA_NUMBA_CACHE'] = '1'
-os.environ['NIXTLA_ID_AS_COL'] = '1'
 
 import fire
 import pandas as pd
@@ -57,4 +56,4 @@ def main(dataset: str = 'M3', group: str = 'Other', model: str='AutoTBATS') -> N
     time_df.to_csv(f'data/{model}-time-{dataset}-{group}.csv', index=False)
 
 if __name__ == '__main__':
-    fire.Fire(main)
+    fire.Fire(main)
diff --git a/experiments/tbats/py_tbats.py b/experiments/tbats/py_tbats.py
@@ -4,7 +4,6 @@
 import concurrent.futures
 os.environ['NIXTLA_NUMBA_RELEASE_GIL'] = '1'
 os.environ['NIXTLA_NUMBA_CACHE'] = '1'
-os.environ['NIXTLA_ID_AS_COL'] = '1'
 
 import fire
 import pandas as pd

diff --git a/nbs/docs/distributed/dask.ipynb b/nbs/docs/distributed/dask.ipynb
@@ -34,6 +34,20 @@
     "Before running on Dask, it's recommended to test on a smaller Pandas dataset to make sure everything is working. This example also helps show the small differences when using Dask."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from statsforecast.core import StatsForecast\n",
+    "from statsforecast.models import ( \n",
+    "    AutoARIMA,\n",
+    "    AutoETS,\n",
+    ")\n",
+    "from statsforecast.utils import generate_series"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -60,38 +74,39 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
+       "      <th>unique_id</th>\n",
        "      <th>ds</th>\n",
        "      <th>AutoETS</th>\n",
        "    </tr>\n",
-       "    <tr>\n",
-       "      <th>unique_id</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>0</td>\n",
        "      <td>2000-08-10</td>\n",
        "      <td>5.261609</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
        "      <td>2000-08-11</td>\n",
        "      <td>6.196357</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
        "      <td>2000-08-12</td>\n",
        "      <td>0.282309</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
+       "      <th>3</th>\n",
+       "      <td>0</td>\n",
        "      <td>2000-08-13</td>\n",
        "      <td>1.264195</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
        "      <td>2000-08-14</td>\n",
        "      <td>2.262453</td>\n",
        "    </tr>\n",
@@ -100,13 +115,12 @@
        "</div>"
       ],
       "text/plain": [
-       "                  ds   AutoETS\n",
-       "unique_id                     \n",
-       "0         2000-08-10  5.261609\n",
-       "0         2000-08-11  6.196357\n",
-       "0         2000-08-12  0.282309\n",
-       "0         2000-08-13  1.264195\n",
-       "0         2000-08-14  2.262453"
+       "  unique_id         ds   AutoETS\n",
+       "0         0 2000-08-10  5.261609\n",
+       "1         0 2000-08-11  6.196357\n",
+       "2         0 2000-08-12  0.282309\n",
+       "3         0 2000-08-13  1.264195\n",
+       "4         0 2000-08-14  2.262453"
       ]
      },
      "execution_count": null,
@@ -115,13 +129,6 @@
     }
    ],
    "source": [
-    "from statsforecast.core import StatsForecast\n",
-    "from statsforecast.models import ( \n",
-    "    AutoARIMA,\n",
-    "    AutoETS,\n",
-    ")\n",
-    "from statsforecast.utils import generate_series\n",
-    "\n",
     "n_series = 4\n",
     "horizon = 7\n",
     "\n",
@@ -140,7 +147,7 @@
    "source": [
     "## Executing on Dask\n",
     "\n",
-    "To run the forecasts distributed on Dask, just pass in a Dask DataFrame instead. Instead of having the `unique_id` as an index, it needs to be a column because Dask handles the index differently."
+    "To run the forecasts distributed on Dask, just pass in a Dask DataFrame instead."
    ]
   },
   {
@@ -149,13 +156,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import dask.dataframe as dd\n",
-    "\n",
-    "# Make unique_id a column\n",
-    "series = series.reset_index()\n",
-    "series['unique_id'] = series['unique_id'].astype(str)\n",
-    "\n",
-    "ddf = dd.from_pandas(series, npartitions=4)"
+    "import dask.dataframe as dd"
    ]
   },
   {
@@ -193,44 +194,44 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>0</td>\n",
-       "      <td>2000-08-10</td>\n",
+       "      <td>2000-08-10 00:00:00</td>\n",
        "      <td>5.261609</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>0</td>\n",
-       "      <td>2000-08-11</td>\n",
+       "      <td>2000-08-11 00:00:00</td>\n",
        "      <td>6.196357</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>0</td>\n",
-       "      <td>2000-08-12</td>\n",
+       "      <td>2000-08-12 00:00:00</td>\n",
        "      <td>0.282309</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>0</td>\n",
-       "      <td>2000-08-13</td>\n",
+       "      <td>2000-08-13 00:00:00</td>\n",
        "      <td>1.264195</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>0</td>\n",
-       "      <td>2000-08-14</td>\n",
+       "      <td>2000-08-14 00:00:00</td>\n",
        "      <td>2.262453</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  unique_id         ds   AutoETS\n",
-       "0         0 2000-08-10  5.261609\n",
-       "1         0 2000-08-11  6.196357\n",
-       "2         0 2000-08-12  0.282309\n",
-       "3         0 2000-08-13  1.264195\n",
-       "4         0 2000-08-14  2.262453"
+       "  unique_id                   ds   AutoETS\n",
+       "0         0  2000-08-10 00:00:00  5.261609\n",
+       "1         0  2000-08-11 00:00:00  6.196357\n",
+       "2         0  2000-08-12 00:00:00  0.282309\n",
+       "3         0  2000-08-13 00:00:00  1.264195\n",
+       "4         0  2000-08-14 00:00:00  2.262453"
       ]
      },
      "execution_count": null,
@@ -239,6 +240,8 @@
     }
    ],
    "source": [
+    "series['unique_id'] = series['unique_id'].astype(str)\n",
+    "ddf = dd.from_pandas(series, npartitions=4)\n",
     "sf.forecast(df=ddf, h=horizon).compute().head()"
    ]
   }