forked from Johnson0722/Spatial-temporal-modeling-prediction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ARIMA_45min.py
116 lines (90 loc) · 4.04 KB
/
ARIMA_45min.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import metrics
import csv
import sys
from statsmodels.tsa.arima_model import ARMA
def loadDataSet():
file2 = file("all_surround_cells.csv","rb")
reader2 = csv.reader(file2)
surround_cells = []
for line in reader2:
surround_cells.append(str(round(float(line[1]),5))+'_'+str(round(float(line[0]),5)))
traffic_dataFrame = pd.read_csv("/home/johnson/tensorflow/row Data/nj06downbsloc45min_new.csv")
MyTrafficFrame = traffic_dataFrame.reindex(columns=surround_cells) #[946 rows * 39 columns]
##miss data operation
MyTrafficFrame = MyTrafficFrame.dropna(axis = 1,thresh = 935) #[946 rows * 19 columns]
MyTrafficFrame = MyTrafficFrame.interpolate() #interpolate values
return MyTrafficFrame,MyTrafficFrame['32.05278_118.77965'] #type(MyTrafficFrame.values) == <type 'numpy.ndarray'>
def normalization(x): #type of input is <type 'numpy.ndarray'>
min_max_scaler = preprocessing.MinMaxScaler()
return min_max_scaler.fit_transform(x)
def autocorrelation(x,lags): #Temporal correlation
n = len(x)
x = np.array(x)
result = [np.correlate(x[i:] - x[i:].mean(),x[:n-i]-x[:n-i].mean())[0]\
/(x[i:].std()*x[:n-i].std()*(n-i)) for i in range(1,lags+1)]
return result
def testStationarity(ts):
dftest = adfuller(ts)
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
return dfoutput
def draw_acf_pacf(ts):
fig = plt.figure(facecolor='white')
ax1 = fig.add_subplot(211)
plot_acf(ts ,ax=ax1)
ax2 = fig.add_subplot(212)
plot_pacf(ts,ax=ax2)
plt.show()
def proper_model(data_ts, maxLag):
init_bic = sys.maxint
init_p = 0
init_q = 0
init_properModel = None
for p in np.arange(maxLag):
for q in np.arange(maxLag):
model = ARMA(data_ts, order=(p, q))
try:
results_ARMA = model.fit(disp=-1, method='css')
except:
continue
bic = results_ARMA.bic
if bic < init_bic:
init_p = p
init_q = q
init_properModel = results_ARMA
init_bic = bic
return init_bic, init_p, init_q, init_properModel
_,traffic = loadDataSet()
rng = pd.date_range('6/1/2014 00:00',periods = 946,freq = '45Min')
traffic = normalization(traffic)
traffic_train = traffic[:700]
traffic_test = traffic[700:] #type(traffic) = <class 'pandas.core.series.Series'>
traffic_train = pd.Series(traffic_train,index = rng[:700])
traffic_test = pd.Series(traffic_test,index = rng[700:])
traffic = pd.Series(traffic)
traffic_diff1 = traffic_train.diff(1)
#print proper_model(traffic_train,50)
#print testStationarity(traffic)
#draw_acf_pacf(traffic)
model = ARMA(traffic_train, order=(4, 2))
result_arma = model.fit( disp=-1, method='css')
predict_ts_train = result_arma.predict()
predict_ts_test = result_arma.predict('6/22/2014 21:00:00','6/30/2014 12:45:00',dynamic = True)
fig1 = plt.figure(1)
plt.plot(traffic_train,'r')
plt.plot(predict_ts_train,'b--')
fig2 = plt.figure(2)
plt.plot(traffic_test,'r')
plt.plot(predict_ts_test,'b--')
plt.show()
traffic_test.to_csv('/home/johnson/tensorflow/pic/45min/result_45min/ARIMA_test_result')
predict_ts_test.to_csv('/home/johnson/tensorflow/pic/45min/result_45min/ARIMA_predict_result')
print metrics.mean_squared_error(traffic_test,predict_ts_test) ##MSE = 0.0157727596516
print metrics.mean_absolute_error(traffic_test,predict_ts_test) ##MAE = 0.0859137763706