model.py

#SARIMA MODEL

#STEP 1
#Importing the libraries
import csv
import warnings
import itertools
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

#setting the felds for prediction
fields = ['Modal_Price', 'Price_Date']
df= pd.read_csv("pure_cotton.csv",skipinitialspace=True, usecols=fields)

#Creating 
df.Price_Date = pd.to_datetime(df.Price_Date, errors='coerce')
df=df.set_index('Price_Date')
df.head(2)

data = df.copy()
y = data

# The 'MS' string groups the data in buckets by start of the month
y = y['Modal_Price'].resample('MS').mean()

# The term bfill means that we use the value before filling in missing values
y = y.fillna(y.bfill())

#----------------------------------------------------------------------------
#STEP 2
plt.title('Time Series for cotton prices')
plt.xlabel('Years')
plt.ylabel('Price in Rs.')
y.plot(figsize=(15, 6))
plt.show()
#----------------------------------------------------------------------------
#STEP 3 & 4
# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0, 2)

# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))

# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))


warnings.filterwarnings("ignore") # specify to ignore warning messages

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)

            results = mod.fit()

            print('SARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue

results.plot_diagnostics(figsize=(15, 12))
plt.title('Diagnosis')
plt.show()

#----------------------------------------------------------------------------
#Static Forecast -- Mostly used for a month prediction
enter = input("Enter the date : ")
pred = results.get_prediction(start=pd.to_datetime(enter), dynamic=False)
pred_ci = pred.conf_int()
"""
pred = results.get_prediction(start=pd.to_datetime('2020-01-01'), dynamic=False)
pred_ci = pred.conf_int()
"""
#pred_ci.to_csv.append("predicted_cotton_price_static.csv")

ax = y['1990':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7)

ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Crop Price')
plt.legend()
plt.show()

#STEP 5 
y_forecasted = pred.predicted_mean
y_truth = y['2020-01-01':]

#STEP 6 
#meanabsolute percentage error
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_truth, y_forecasted)/100

#----------------------------------------------------------------------------
#Dynamic Forecast -- Used for prediction of same
pred_dynamic = results.get_prediction(start=pd.to_datetime('2020-01-01'), dynamic=True, full_results=True)
pred_dynamic_ci = pred_dynamic.conf_int()

ax = y['1990':].plot(label='observed', figsize=(20, 15))
pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax)

ax.fill_between(pred_dynamic_ci.index,
                pred_dynamic_ci.iloc[:, 0],
                pred_dynamic_ci.iloc[:, 1], color='k', alpha=.25)

ax.fill_betweenx(ax.get_ylim(), pd.to_datetime('2020-01-01'), y.index[-1],
                 alpha=.1, zorder=-1)

ax.set_xlabel('Date')
ax.set_ylabel('CROP PRICE')

plt.legend()
plt.show()


# Extract the predicted and true values of our time series
y_forecasted = pred_dynamic.predicted_mean
y_truth = y['2020-01-01':]

from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_truth, y_forecasted)/100
#----------------------------------------------------------------------------
#Dynamic Forecast -- Used for prediction of next years
# Get forecast 20 steps ahead in future
pred_uc = results.get_forecast(steps=20)

# Get confidence intervals of forecasts
pred_ci = pred_uc.conf_int()
#saved the file 
pred_ci.to_csv("predicted_cotton_price_dynamic.csv")

ax = y.plot(label='observed', figsize=(20, 15))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('CROP PRICE')

plt.legend()
plt.show()
#----------------------------------------------------------------------------