Series de tiempo

De entre las series de tiempo para cada enfermedad en el periodo descrito, no todas contienen información de casos reportados durante el periodo de tiempo establecido para la investigación, por lo que se extraen los que reportan al menos la mitad del periodo (260 semanas)

# https://stackoverflow.com/a/16916611
print('Iniciales {}'.format(len(cie))) 
cie = cie.filter(lambda x: x['sem'].count() >= 260 )
cie.reset_index(drop=True, inplace=True)
cie = cie.groupby('cie')
print('Restantes {}'.format(len(cie)))

Iniciales 138
Restantes 40

print('Iniciales {}'.format(len(cieG))) 
cieG = cieG.filter(lambda x: x['sem'].count() >= 260 )
cieG.reset_index(drop=True, inplace=True)
cieG = cieG.groupby(cieG.cie.str[0])
print('Restantes {}'.format(len(cieG)))

Iniciales 22
Restantes 12

Así, de 138 series de tiempo de enfermedades, se obtienen 40 en las que al menos se cuenta con datos semanales de 5 años. Para dichas enfermedades se obtienen los pesos de la regresión lineal y se obtiene la serie de tiempo sin la tendencia y las autocorrelaciones (eliminando la a92.3 porque viene vacía)

from scipy import signal
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.tsa.stattools import acf

ciesF = [] # CIEs Características
ciesTSt = [] # CIEs Series de tiempo

for name, group in cie:
    if name == 'a92.3':
        continue
    
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.detrend.html
    detrended = signal.detrend(group.casos)
    
    a, b, r, p, e = stats.linregress(group['sem'], group.casos)
    print("y = f(x) = {} x + {}".format(a, b))
    print("error", e)
    print("p = ", p)
    print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
    print("R^2", r**2)
    plt.figure(figsize=(12, 2))
    plt.plot(group['sem'], group.casos)
    plt.plot(group['sem'], detrended, c='black')
    plt.plot(group['sem'], (a * group['sem'] + b), label = 'y = {:.1f}x + {:.0f}'.format(a, b), color = 'red', linewidth = 3)
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Casos normalizados")
    plt.show()
    
    # https://stackoverflow.com/questions/48497756/time-series-distance-metric
    plt.figure(figsize=(12, 2))
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.cumsum.html
    plt.plot(group['sem'], group.casos.cumsum(), c='green')
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Acumulado de Casos normalizados")
    plt.show()
    
    # https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/
    plot_acf(detrended, lags=52)
    # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
    plt.title(name)
    plt.xlabel("Retraso en semanas")
    plt.ylabel('Correlación')
    plt.show()
    
    temp = [a, b]
    # https://stackoverflow.com/a/3748071
    temp.extend(acf(detrended, nlags=52))
    temp.append(name)
    ciesF.append(temp)
    
    temp2 = list(group.casos)
    #temp2.append(name)
    ciesTSt.append(temp2)

y = f(x) = 4.2687498879775535e-11 x + -5.61276490864389e-09
error 2.0113605583063735e-11
p =  0.03438325581830106
pendiente significativa
R^2 0.010390302941758996

y = f(x) = -1.7653075450894884e-06 x + 0.0019754909758688734
error 1.215881678974069e-07
p =  1.0774404716157897e-37
pendiente significativa
R^2 0.3765563116502911

y = f(x) = -3.0752338979568535e-09 x + 1.2904965711605356e-05
error 8.36749265385657e-10
p =  0.0002687345236732541
pendiente significativa
R^2 0.03144815483703095

y = f(x) = -1.0785666679174943e-08 x + 6.155839207125693e-06
error 3.564598970127455e-10
p =  3.9269145731807006e-112
pendiente significativa
R^2 0.6622135065357393

y = f(x) = -9.213574424729245e-09 x + 1.2957954379028175e-05
error 6.75300549213463e-10
p =  4.863591366039641e-36
pendiente significativa
R^2 0.2798612445705242

y = f(x) = -3.831012274635742e-09 x + 4.559110665311131e-06
error 2.056660576324692e-10
p =  2.1083769747172313e-58
pendiente significativa
R^2 0.4247090552255535

y = f(x) = -1.4627725963638185e-11 x + 4.5153183905681134e-08
error 8.6165080095987e-12
p =  0.09023493838847157
pendiente no significativa
R^2 0.006068835261027791

y = f(x) = -1.7642681104091792e-10 x + 6.894953499860091e-07
error 6.403728082254567e-11
p =  0.0061121040534305735
pendiente significativa
R^2 0.01699628644380042

y = f(x) = 3.8938781393720075e-11 x + 4.4324399670136904e-08
error 2.301537019363991e-11
p =  0.09131901816285937
pendiente no significativa
R^2 0.00590350974760678

y = f(x) = 2.3434709729609935e-13 x + -3.6003147872535345e-11
error 1.478629682740248e-13
p =  0.1136746875979033
pendiente no significativa
R^2 0.005407586602284264

y = f(x) = -3.4234098194666664e-11 x + 1.8622107713377432e-08
error 7.415007265413988e-12
p =  4.945787391949576e-06
pendiente significativa
R^2 0.040422588588555235

y = f(x) = 2.725400812299939e-10 x + 1.9471946148640167e-08
error 7.110945660238779e-11
p =  0.00014458146666075644
pendiente significativa
R^2 0.031274848966871695

y = f(x) = -4.255464112013022e-09 x + 2.7470903702510135e-06
error 3.3548559373967543e-10
p =  6.366969283108536e-32
pendiente significativa
R^2 0.25462455709847065

y = f(x) = -1.0744562226275755e-10 x + 5.3269706349552e-08
error 6.010845348981309e-11
p =  0.07452781641973526
pendiente no significativa
R^2 0.007081769917450651

y = f(x) = -4.51442956629053e-11 x + 5.003109663849265e-08
error 8.825884482273087e-12
p =  4.625709147491021e-07
pendiente significativa
R^2 0.05414963353204715

y = f(x) = -2.8757621426513095e-09 x + 8.65805695809489e-06
error 2.5195362618720955e-09
p =  0.25425697912167106
pendiente no significativa
R^2 0.002603944949826924

y = f(x) = -1.4644681914017878e-07 x + 0.00011211051217541277
error 1.1228826202874234e-08
p =  2.0750705342062423e-33
pendiente significativa
R^2 0.2653193213169377

y = f(x) = -2.722125710622442e-10 x + 1.262294141156984e-07
error 1.588658069624221e-11
p =  7.30165010543899e-51
pendiente significativa
R^2 0.3985877970692943

y = f(x) = -7.433434732564669e-09 x + 6.901511580636186e-06
error 4.809110869935852e-10
p =  3.1523565347787724e-42
pendiente significativa
R^2 0.3841635208468614

y = f(x) = -1.2411343091809244e-10 x + 2.1160165305354125e-07
error 2.468137006599137e-11
p =  7.217507598355145e-07
pendiente significativa
R^2 0.05458192227303572

y = f(x) = 4.306189172696877e-11 x + 4.5311776302657916e-07
error 4.983732447188263e-11
p =  0.3880813876059074
pendiente no significativa
R^2 0.0018723188046217538

y = f(x) = -9.851452950958728e-10 x + 1.4372871687382767e-06
error 2.0422528814979408e-10
p =  2.004159852045715e-06
pendiente significativa
R^2 0.05484537751521497

y = f(x) = -4.5977874526241305e-09 x + 2.970356116391645e-06
error 1.2200834000344058e-10
p =  5.731116914266081e-137
pendiente significativa
R^2 0.7713322815069262

y = f(x) = -3.141197049928086e-08 x + 0.00011159382796415527
error 1.555599789385595e-08
p =  0.04432369209037648
pendiente significativa
R^2 0.013065683803978847

y = f(x) = -1.0673850709576887e-11 x + 4.355250762325799e-09
error 3.1880871555405942e-12
p =  0.0008764994132686745
pendiente significativa
R^2 0.02236467960180641

y = f(x) = -1.1834633525276088e-09 x + 7.100912129285122e-07
error 9.393463801467669e-11
p =  1.2559331815198554e-31
pendiente significativa
R^2 0.24928894104283042

y = f(x) = -2.6544241562890327e-10 x + 3.2049521522763566e-07
error 3.7319118941308474e-11
p =  4.610321182741757e-12
pendiente significativa
R^2 0.10270484494415096

y = f(x) = -4.556172046800565e-10 x + 3.7500114147077754e-07
error 3.423510008372152e-11
p =  1.8548314561638924e-34
pendiente significativa
R^2 0.27626166419878756

y = f(x) = -6.932314608726674e-06 x + 0.008805967823835786
error 6.162044690299544e-07
p =  7.503334430143699e-26
pendiente significativa
R^2 0.23113873276962404

y = f(x) = -4.3876585629436324e-08 x + 5.08928603405923e-05
error 4.181979380800392e-09
p =  2.3727736779313174e-23
pendiente significativa
R^2 0.1846709286259034

y = f(x) = 3.0607452607643954e-14 x + 8.632235917582137e-10
error 1.081125720208934e-12
p =  0.977427221531179
pendiente no significativa
R^2 1.8298987789071008e-06

y = f(x) = -7.142618310354189e-10 x + 1.178302593061399e-06
error 1.457944295445013e-10
p =  1.332329783777257e-06
pendiente significativa
R^2 0.04918267323938655

y = f(x) = -1.3158987800860395e-08 x + 1.643445025799908e-05
error 1.6084785128648306e-09
p =  1.0885896084621633e-14
pendiente significativa
R^2 0.19747207526346663

y = f(x) = -5.001283641779608e-08 x + 8.79623291848092e-05
error 5.714736801137942e-09
p =  3.9731738436330694e-17
pendiente significativa
R^2 0.14220420601744926

y = f(x) = -8.130421944150189e-10 x + 1.4827816461880552e-06
error 5.134961166576471e-10
p =  0.11399979312553642
pendiente no significativa
R^2 0.005185003922499099

y = f(x) = -2.3438815029578847e-11 x + 1.5962437413039917e-07
error 5.628337741371575e-11
p =  0.6772793134611802
pendiente no significativa
R^2 0.00037523732655554817

y = f(x) = -5.193298199318447e-08 x + 4.533358442590547e-05
error 1.6692637054937939e-09
p =  1.6078008523125964e-114
pendiente significativa
R^2 0.6811910446622308

y = f(x) = -9.132331371853938e-10 x + 1.3202325787207294e-06
error 1.431149104840528e-10
p =  4.359158383428425e-10
pendiente significativa
R^2 0.08264080526994147

y = f(x) = 3.638834463785146e-10 x + 1.1174278631513553e-06
error 1.1201987356227443e-10
p =  0.001257639893824037
pendiente significativa
R^2 0.025453964627936122

for name, group in cieG:
    
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.detrend.html
    detrended = signal.detrend(group.casos)
    
    a, b, r, p, e = stats.linregress(group['sem'], group.casos)
    print("y = f(x) = {} x + {}".format(a, b))
    print("error", e)
    print("p = ", p)
    print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
    print("R^2", r**2)
    plt.figure(figsize=(12, 2))
    plt.plot(group['sem'], group.casos)
    plt.plot(group['sem'], detrended, c='black')
    plt.plot(group['sem'], (a * group['sem'] + b), label = 'y = {:.1f}x + {:.0f}'.format(a, b), color = 'red', linewidth = 3)
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Casos normalizados")
    plt.show()
    
    # https://stackoverflow.com/questions/48497756/time-series-distance-metric
    plt.figure(figsize=(12, 2))
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.cumsum.html
    plt.plot(group['sem'], group.casos.cumsum(), c='green')
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Acumulado de Casos normalizados")
    plt.show()
    
    plt.figure()
    # https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/
    plot_acf(detrended, lags=52)
    # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
    plt.title(name)
    plt.xlabel("Retraso en semanas")
    plt.ylabel('Correlación')
    plt.show()

y = f(x) = -2.3224356070873255e-07 x + 0.00018642953885186607
error 2.458069138569354e-08
p =  4.2189376399252386e-21
pendiente significativa
R^2 0.008834750473414046

<Figure size 432x288 with 0 Axes>

y = f(x) = -1.9503026794867653e-08 x + 2.1003475197080275e-05
error 2.9914826904487065e-09
p =  7.747452841909533e-11
pendiente significativa
R^2 0.008415817644051377

<Figure size 432x288 with 0 Axes>

y = f(x) = -7.687412126573934e-08 x + 6.301895314999713e-05
error 1.1457310486128478e-07
p =  0.5026730466383222
pendiente no significativa
R^2 0.0012420716051614582

<Figure size 432x288 with 0 Axes>

y = f(x) = 4.324626671299835e-10 x + 2.04941342972253e-07
error 7.239655510808932e-11
p =  4.1887790791163235e-09
pendiente significativa
R^2 0.06134442012808605

<Figure size 432x288 with 0 Axes>

y = f(x) = 6.114012516943938e-08 x + -9.37021948204884e-06
error 5.454249521195378e-09
p =  7.943541732517958e-27
pendiente significativa
R^2 0.1593291079485521

<Figure size 432x288 with 0 Axes>

y = f(x) = -5.4762876304630354e-06 x + 0.004047543918036931
error 6.313606290646645e-07
p =  1.2891385768087959e-17
pendiente significativa
R^2 0.057202405083796086

<Figure size 432x288 with 0 Axes>

y = f(x) = 3.0607452607643954e-14 x + 8.632235917582137e-10
error 1.081125720208934e-12
p =  0.977427221531179
pendiente no significativa
R^2 1.8298987789071008e-06

<Figure size 432x288 with 0 Axes>

y = f(x) = -4.552145867367468e-08 x + 4.150055938885172e-05
error 5.28039387715845e-09
p =  1.7027198403366345e-17
pendiente significativa
R^2 0.04850098103773122

<Figure size 432x288 with 0 Axes>

y = f(x) = -5.496981234999186e-10 x + 8.572895488021172e-07
error 2.657552949170129e-10
p =  0.038856309457124164
pendiente significativa
R^2 0.004272969521096534

<Figure size 432x288 with 0 Axes>

y = f(x) = -3.11383856620073e-08 x + 3.0350821580836582e-05
error 3.4613396780786333e-09
p =  2.313921331364859e-18
pendiente significativa
R^2 0.10649538120403663

<Figure size 432x288 with 0 Axes>

y = f(x) = -9.132331371853938e-10 x + 1.3202325787207294e-06
error 1.431149104840528e-10
p =  4.359158383428425e-10
pendiente significativa
R^2 0.08264080526994147

<Figure size 432x288 with 0 Axes>

y = f(x) = 3.638834463785146e-10 x + 1.1174278631513553e-06
error 1.1201987356227443e-10
p =  0.001257639893824037
pendiente significativa
R^2 0.025453964627936122

<Figure size 432x288 with 0 Axes>

Se extraen las características de cada CIE en tanto serie de tiempo. A saber, su pendiente, ordenada en el origen y las autocorrelaciones con retraso de 1 a 52 semanas (eliminando el retraso de 0 semanas)

ciesF = pd.DataFrame(ciesF)

# https://stackoverflow.com/a/11346337
colNames = ['m', 'b']
for i in range (53):
    colNames.append('ac' + str(i))
colNames.append('cie')
ciesF.columns = colNames

ciesF = ciesF.drop(['ac0'], axis=1)

ciesF.sort_values(by=['m'], ascending=False)

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	m	b	ac1	ac2	ac3	ac4	ac5	ac6	ac7	ac8	...	ac44	ac45	ac46	ac47	ac48	ac49	ac50	ac51	ac52	cie
38	3.638834e-10	1.117428e-06	0.497210	0.371337	0.338770	0.295026	0.246889	0.170797	0.177532	0.174927	...	0.095734	0.079953	0.094433	0.045073	0.020609	0.002565	-0.025604	0.001100	0.014393	z21
11	2.725401e-10	1.947195e-08	0.089659	-0.019229	-0.025867	-0.002608	-0.028985	-0.001130	-0.058919	-0.019433	...	-0.034949	-0.062431	-0.002219	0.029734	0.005434	-0.050439	0.078625	0.029168	-0.002898	a37
20	4.306189e-11	4.531178e-07	0.299805	0.225857	0.068495	0.057942	0.001655	0.017533	0.033656	0.100137	...	0.050684	0.016318	0.061034	0.053793	0.021663	0.059753	-0.005982	0.033164	-0.047455	b17.1
0	4.268750e-11	-5.612765e-09	0.393749	0.122484	0.010328	0.026661	0.015698	0.021141	-0.002905	-0.010120	...	-0.017536	-0.017607	-0.017679	-0.017751	-0.017822	-0.017893	-0.017965	-0.018036	-0.018107	a00
8	3.893878e-11	4.432440e-08	0.393243	0.260146	0.044702	0.032172	0.012605	0.025166	0.014480	0.009281	...	-0.017366	0.067969	0.005142	0.003701	-0.020010	0.007456	0.042142	0.077540	0.090818	a27
9	2.343471e-13	-3.600315e-11	-0.007522	-0.007561	-0.007600	-0.007639	-0.007678	-0.007717	-0.007756	-0.007795	...	-0.000592	-0.000605	-0.000618	-0.000631	-0.000644	-0.000657	-0.000670	-0.000683	-0.000696	a33
30	3.060745e-14	8.632236e-10	0.055952	0.062823	0.076371	0.006623	0.030863	-0.037294	-0.060305	0.059805	...	-0.057127	-0.057262	-0.035717	-0.035852	-0.057666	-0.057801	-0.057936	-0.058070	-0.058205	p35.0
24	-1.067385e-11	4.355251e-09	0.077117	0.322187	0.046591	0.036037	-0.023600	0.020128	-0.018800	-0.026529	...	-0.023442	0.017590	-0.018073	-0.018183	-0.023882	0.032515	-0.001876	0.011733	0.004771	b50
6	-1.462773e-11	4.515318e-08	0.085993	0.048897	0.064574	0.118768	-0.023565	-0.026439	0.023959	-0.036252	...	0.064103	0.019108	-0.021642	-0.019077	0.015544	0.031099	-0.056185	-0.073401	-0.004459	a17.0
35	-2.343882e-11	1.596244e-07	0.550097	0.512561	0.454005	0.411662	0.363330	0.254477	0.225435	0.181761	...	-0.032387	-0.007867	0.004137	-0.020714	0.015187	-0.003246	0.008389	0.020997	0.018492	u98
10	-3.423410e-11	1.862211e-08	-0.081445	0.057375	-0.049496	0.012841	0.056102	-0.020204	-0.009801	-0.032175	...	-0.005745	0.005151	-0.043027	0.056617	-0.031916	0.114628	-0.035333	0.052850	-0.016788	a34
14	-4.514430e-11	5.003110e-08	0.076912	0.149122	0.088945	0.072881	0.114257	0.074084	0.108017	0.108573	...	0.036326	0.015114	0.051616	0.032731	-0.039804	0.002215	0.025619	0.016111	-0.024214	a50
13	-1.074456e-10	5.326971e-08	0.003941	-0.012528	-0.008077	-0.013007	-0.007537	-0.016467	0.003030	-0.001938	...	-0.011364	-0.003043	-0.003882	0.012084	0.000458	-0.001701	0.007212	-0.004964	-0.004440	a39.0
19	-1.241134e-10	2.116017e-07	0.289203	0.226359	0.276856	0.274657	0.253692	0.247411	0.247509	0.206441	...	0.021663	0.093129	0.023837	0.146601	0.018644	0.036760	0.043459	-0.015475	-0.004356	b16
7	-1.764268e-10	6.894953e-07	0.522652	0.363609	0.361372	0.385651	0.326709	0.325553	0.318398	0.252245	...	0.196099	0.182101	0.163183	0.167669	0.235813	0.206676	0.215066	0.167751	0.183912	a23
26	-2.654424e-10	3.204952e-07	0.110847	0.069672	0.107836	0.080559	0.128797	-0.027006	0.042914	-0.009807	...	0.029668	0.076565	0.089312	0.024519	0.008848	0.059614	0.099755	0.046717	-0.020862	g00-g03
17	-2.722126e-10	1.262294e-07	0.488103	0.474707	0.426456	0.414057	0.317635	0.304248	0.261418	0.248762	...	0.072010	0.087184	0.117220	0.163608	0.087383	0.129308	0.128205	0.131949	0.139945	b06
27	-4.556172e-10	3.750011e-07	0.232941	0.165279	0.155966	0.155952	0.159776	0.110074	0.261158	0.168664	...	0.016427	0.004149	-0.044051	0.007307	-0.018886	-0.029089	0.021237	0.039481	0.027578	i00-i02
31	-7.142618e-10	1.178303e-06	0.791010	0.750958	0.691090	0.610929	0.512450	0.434146	0.366592	0.258712	...	0.341533	0.408299	0.435233	0.469282	0.511489	0.522807	0.498786	0.480615	0.470837	t60
34	-8.130422e-10	1.482782e-06	0.026306	0.067629	0.068971	0.106583	0.028512	0.007521	0.073457	-0.032228	...	0.055582	0.128333	0.022517	0.066484	0.006181	0.028253	0.168174	0.105951	0.083272	u97
37	-9.132331e-10	1.320233e-06	0.826727	0.781848	0.727522	0.675235	0.624828	0.550919	0.484088	0.401629	...	0.459125	0.506974	0.547571	0.601010	0.631255	0.670476	0.649844	0.640989	0.621946	x20
21	-9.851453e-10	1.437287e-06	0.149215	0.272471	0.152501	0.089044	0.100963	0.106820	0.087191	-0.016662	...	0.075923	0.128809	0.068058	0.029803	0.013137	-0.020469	-0.029275	-0.055377	-0.035842	b20-b24
25	-1.183463e-09	7.100912e-07	0.388003	0.416624	0.313638	0.387392	0.284052	0.250379	0.164883	0.231499	...	0.188028	0.150510	0.140413	0.147020	0.207482	0.171138	0.209809	0.209396	0.200214	b51
15	-2.875762e-09	8.658057e-06	0.895262	0.842421	0.792705	0.707147	0.622664	0.527720	0.432752	0.356828	...	0.243528	0.298282	0.370211	0.423460	0.466082	0.511227	0.539638	0.552434	0.540235	a90
2	-3.075234e-09	1.290497e-05	0.667701	0.505751	0.436771	0.352219	0.289719	0.228429	0.172198	0.107678	...	0.223528	0.189754	0.185963	0.183744	0.179380	0.151920	0.143924	0.134758	0.057089	a01.0
5	-3.831012e-09	4.559111e-06	0.380632	0.195406	0.107774	0.091505	0.073293	0.073178	0.027202	0.086867	...	0.121356	0.134870	0.096912	0.037600	0.126218	0.126044	0.177925	0.161334	0.038959	a15-a16
12	-4.255464e-09	2.747090e-06	0.140500	0.121196	0.127669	0.099854	0.073198	0.027292	-0.002968	-0.016134	...	0.035622	0.029450	0.045802	0.047865	0.060802	0.095082	0.062623	0.058167	0.053912	a38
22	-4.597787e-09	2.970356e-06	0.564047	0.398251	0.335638	0.284887	0.179178	0.078869	0.009511	-0.045524	...	0.070795	0.069166	0.100421	0.164831	0.185766	0.150027	0.154843	0.220636	0.151552	b26
18	-7.433435e-09	6.901512e-06	0.765747	0.587239	0.516973	0.418962	0.348350	0.239331	0.135074	0.112791	...	0.326428	0.312633	0.307638	0.296214	0.237261	0.190637	0.124537	0.059086	0.007564	b15
4	-9.213574e-09	1.295795e-05	0.605797	0.479571	0.410353	0.367218	0.341321	0.311389	0.234742	0.219977	...	0.255961	0.229158	0.221748	0.178246	0.191535	0.225850	0.294085	0.313643	0.295370	a05
3	-1.078567e-08	6.155839e-06	0.620012	0.595386	0.535631	0.462663	0.438088	0.373148	0.289653	0.300332	...	0.178095	0.234208	0.255625	0.278374	0.284554	0.274991	0.301086	0.266200	0.231869	a03
32	-1.315899e-08	1.643445e-05	0.903920	0.826842	0.732156	0.614444	0.497397	0.376862	0.268490	0.160477	...	0.183030	0.131180	0.085666	0.037463	-0.008971	-0.066770	-0.114970	-0.152466	-0.191697	t63 excepto t63.2
23	-3.141197e-08	1.115938e-04	0.767118	0.547797	0.395471	0.291686	0.210417	0.128438	0.081658	0.051735	...	-0.017768	-0.024711	-0.028894	-0.060319	-0.058954	-0.048837	-0.020363	-0.004847	-0.005577	b30
29	-4.387659e-08	5.089286e-05	0.927733	0.858740	0.790416	0.727838	0.637424	0.545003	0.449916	0.365855	...	0.569450	0.607758	0.624284	0.633800	0.647336	0.649295	0.625177	0.589587	0.547403	j12
33	-5.001284e-08	8.796233e-05	0.881185	0.793629	0.694008	0.596382	0.456681	0.319222	0.189304	0.062305	...	0.314212	0.388623	0.451051	0.497185	0.527848	0.531338	0.510950	0.475725	0.426713	t63.2
36	-5.193298e-08	4.533358e-05	0.844756	0.768593	0.723830	0.666654	0.614451	0.564508	0.510689	0.441227	...	0.355990	0.405624	0.456501	0.503929	0.521726	0.555444	0.542070	0.525043	0.500933	w54
16	-1.464468e-07	1.121105e-04	0.946239	0.892301	0.837622	0.778140	0.711102	0.629254	0.536098	0.431731	...	0.530872	0.565769	0.593141	0.613168	0.623759	0.618582	0.598455	0.570398	0.530427	b01
1	-1.765308e-06	1.975491e-03	0.831565	0.690654	0.594390	0.516446	0.437316	0.326857	0.221760	0.104962	...	0.289442	0.214354	0.137897	0.059001	-0.016753	-0.061065	-0.108941	-0.158795	-0.225823	a01-a03
28	-6.932315e-06	8.805968e-03	0.853479	0.727355	0.620092	0.535945	0.445607	0.349639	0.267460	0.175938	...	0.200076	0.214129	0.232129	0.213833	0.204222	0.194258	0.186803	0.151652	0.122209	j00-j06

39 rows × 55 columns

Se agrupan las CIEs por casos dados en una semana, como series de tiempo.

ciesTS = pd.DataFrame(ciesTSt)
t = list(cie.groups.keys())
t.remove('a92.3')
ciesTS['cie'] = t

ciesTS.sample(3)

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	0	1	2	3	4	5	6	7	8	9	...	499	500	501	502	503	504	505	506	507	cie
12	3.413076e-06	2.295700e-06	2.031593e-06	2.620755e-06	3.331812e-06	3.372444e-06	3.331812e-06	4.347608e-06	3.392760e-06	3.453708e-06	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	a38
20	4.875822e-07	5.078982e-07	4.063185e-07	4.266345e-07	4.266345e-07	6.297937e-07	4.875822e-07	6.501096e-07	3.656867e-07	3.453708e-07	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	b17.1
25	7.110574e-07	7.110574e-07	1.015796e-07	9.548485e-07	1.422115e-07	3.047389e-07	2.031593e-07	2.641070e-07	5.891619e-07	1.828433e-07	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	b51

3 rows × 509 columns

Pronóstico

Una vez eliminada la tendencia se puede comprobar que las series de tiempo para cada enfermedad son estacionales

from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries, w, name):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(w).mean()
    rolstd = timeseries.rolling(w).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries,label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title(name)
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
    
for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    test_stationarity(temp, 52, ciesTS.iloc[i, -1])

Results of Dickey-Fuller Test:
Test Statistic                -1.356054e+01
p-value                        2.314720e-25
#Lags Used                     0.000000e+00
Number of Observations Used    4.300000e+02
Critical Value (1%)           -3.445649e+00
Critical Value (5%)           -2.868285e+00
Critical Value (10%)          -2.570363e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -3.424746
p-value                          0.010144
#Lags Used                       2.000000
Number of Observations Used    348.000000
Critical Value (1%)             -3.449282
Critical Value (5%)             -2.869881
Critical Value (10%)            -2.571214
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -5.719534e+00
p-value                        6.993272e-07
#Lags Used                     2.000000e+00
Number of Observations Used    4.150000e+02
Critical Value (1%)           -3.446206e+00
Critical Value (5%)           -2.868530e+00
Critical Value (10%)          -2.570493e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -2.106851
p-value                          0.241769
#Lags Used                       7.000000
Number of Observations Used    461.000000
Critical Value (1%)             -3.444615
Critical Value (5%)             -2.867830
Critical Value (10%)            -2.570120
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -3.760647
p-value                          0.003337
#Lags Used                       4.000000
Number of Observations Used    476.000000
Critical Value (1%)             -3.444163
Critical Value (5%)             -2.867631
Critical Value (10%)            -2.570014
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -2.133072
p-value                          0.231426
#Lags Used                      10.000000
Number of Observations Used    461.000000
Critical Value (1%)             -3.444615
Critical Value (5%)             -2.867830
Critical Value (10%)            -2.570120
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -8.720275e+00
p-value                        3.400303e-14
#Lags Used                     3.000000e+00
Number of Observations Used    4.700000e+02
Critical Value (1%)           -3.444340e+00
Critical Value (5%)           -2.867709e+00
Critical Value (10%)          -2.570056e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -4.078834
p-value                          0.001050
#Lags Used                       5.000000
Number of Observations Used    435.000000
Critical Value (1%)             -3.445473
Critical Value (5%)             -2.868207
Critical Value (10%)            -2.570321
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -1.063365e+01
p-value                        5.137862e-19
#Lags Used                     2.000000e+00
Number of Observations Used    4.810000e+02
Critical Value (1%)           -3.444018e+00
Critical Value (5%)           -2.867568e+00
Critical Value (10%)          -2.569980e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                 -21.517435
p-value                          0.000000
#Lags Used                       0.000000
Number of Observations Used    463.000000
Critical Value (1%)             -3.444553
Critical Value (5%)             -2.867803
Critical Value (10%)            -2.570106
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -1.468685e+01
p-value                        3.093388e-27
#Lags Used                     1.000000e+00
Number of Observations Used    5.060000e+02
Critical Value (1%)           -3.443340e+00
Critical Value (5%)           -2.867269e+00
Critical Value (10%)          -2.569821e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -3.809081
p-value                          0.002816
#Lags Used                      11.000000
Number of Observations Used    445.000000
Critical Value (1%)             -3.445131
Critical Value (5%)             -2.868057
Critical Value (10%)            -2.570241
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -5.053787
p-value                          0.000017
#Lags Used                       4.000000
Number of Observations Used    468.000000
Critical Value (1%)             -3.444400
Critical Value (5%)             -2.867736
Critical Value (10%)            -2.570070
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                 -20.908062
p-value                          0.000000
#Lags Used                       0.000000
Number of Observations Used    449.000000
Critical Value (1%)             -3.444998
Critical Value (5%)             -2.867999
Critical Value (10%)            -2.570210
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -2.973026
p-value                          0.037491
#Lags Used                      11.000000
Number of Observations Used    447.000000
Critical Value (1%)             -3.445064
Critical Value (5%)             -2.868028
Critical Value (10%)            -2.570226
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -5.304842
p-value                          0.000005
#Lags Used                      12.000000
Number of Observations Used    488.000000
Critical Value (1%)             -3.443821
Critical Value (5%)             -2.867481
Critical Value (10%)            -2.569934
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -6.501620e+00
p-value                        1.158034e-08
#Lags Used                     9.000000e+00
Number of Observations Used    4.630000e+02
Critical Value (1%)           -3.444553e+00
Critical Value (5%)           -2.867803e+00
Critical Value (10%)          -2.570106e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -1.996208
p-value                          0.288238
#Lags Used                      11.000000
Number of Observations Used    433.000000
Critical Value (1%)             -3.445543
Critical Value (5%)             -2.868238
Critical Value (10%)            -2.570338
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -3.403355
p-value                          0.010845
#Lags Used                       8.000000
Number of Observations Used    376.000000
Critical Value (1%)             -3.447862
Critical Value (5%)             -2.869258
Critical Value (10%)            -2.570881
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -3.818212
p-value                          0.002726
#Lags Used                       6.000000
Number of Observations Used    433.000000
Critical Value (1%)             -3.445543
Critical Value (5%)             -2.868238
Critical Value (10%)            -2.570338
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -9.927614e+00
p-value                        2.878961e-17
#Lags Used                     1.000000e+00
Number of Observations Used    3.980000e+02
Critical Value (1%)           -3.446888e+00
Critical Value (5%)           -2.868829e+00
Critical Value (10%)          -2.570653e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -7.628780e+00
p-value                        2.034981e-11
#Lags Used                     2.000000e+00
Number of Observations Used    4.000000e+02
Critical Value (1%)           -3.446804e+00
Critical Value (5%)           -2.868793e+00
Critical Value (10%)          -2.570634e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -1.383949
p-value                          0.589947
#Lags Used                      14.000000
Number of Observations Used    408.000000
Critical Value (1%)             -3.446480
Critical Value (5%)             -2.868650
Critical Value (10%)            -2.570557
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -6.469065e+00
p-value                        1.381678e-08
#Lags Used                     1.000000e+00
Number of Observations Used    3.080000e+02
Critical Value (1%)           -3.451761e+00
Critical Value (5%)           -2.870970e+00
Critical Value (10%)          -2.571794e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -1.049502e+01
p-value                        1.121338e-18
#Lags Used                     1.000000e+00
Number of Observations Used    4.900000e+02
Critical Value (1%)           -3.443766e+00
Critical Value (5%)           -2.867457e+00
Critical Value (10%)          -2.569921e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -3.050080
p-value                          0.030475
#Lags Used                       8.000000
Number of Observations Used    471.000000
Critical Value (1%)             -3.444310
Critical Value (5%)             -2.867696
Critical Value (10%)            -2.570049
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -4.194474
p-value                          0.000673
#Lags Used                       8.000000
Number of Observations Used    435.000000
Critical Value (1%)             -3.445473
Critical Value (5%)             -2.868207
Critical Value (10%)            -2.570321
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -5.300617
p-value                          0.000005
#Lags Used                      17.000000
Number of Observations Used    448.000000
Critical Value (1%)             -3.445031
Critical Value (5%)             -2.868013
Critical Value (10%)            -2.570218
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -5.196448
p-value                          0.000009
#Lags Used                       0.000000
Number of Observations Used    422.000000
Critical Value (1%)             -3.445941
Critical Value (5%)             -2.868413
Critical Value (10%)            -2.570431
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -6.052569e+00
p-value                        1.267267e-07
#Lags Used                     1.300000e+01
Number of Observations Used    4.740000e+02
Critical Value (1%)           -3.444221e+00
Critical Value (5%)           -2.867657e+00
Critical Value (10%)          -2.570028e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -1.053776e+01
p-value                        8.810400e-19
#Lags Used                     2.000000e+00
Number of Observations Used    4.370000e+02
Critical Value (1%)           -3.445403e+00
Critical Value (5%)           -2.868177e+00
Critical Value (10%)          -2.570305e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -5.316712
p-value                          0.000005
#Lags Used                      14.000000
Number of Observations Used    451.000000
Critical Value (1%)             -3.444933
Critical Value (5%)             -2.867970
Critical Value (10%)            -2.570195
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -4.113528
p-value                          0.000920
#Lags Used                       4.000000
Number of Observations Used    269.000000
Critical Value (1%)             -3.454896
Critical Value (5%)             -2.872345
Critical Value (10%)            -2.572528
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -4.874372
p-value                          0.000039
#Lags Used                      11.000000
Number of Observations Used    452.000000
Critical Value (1%)             -3.444900
Critical Value (5%)             -2.867956
Critical Value (10%)            -2.570187
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -5.375052
p-value                          0.000004
#Lags Used                       8.000000
Number of Observations Used    474.000000
Critical Value (1%)             -3.444221
Critical Value (5%)             -2.867657
Critical Value (10%)            -2.570028
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -5.342202
p-value                          0.000004
#Lags Used                       5.000000
Number of Observations Used    458.000000
Critical Value (1%)             -3.444709
Critical Value (5%)             -2.867871
Critical Value (10%)            -2.570142
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                  -1.813468
p-value                          0.373796
#Lags Used                       2.000000
Number of Observations Used    452.000000
Critical Value (1%)             -3.444900
Critical Value (5%)             -2.867956
Critical Value (10%)            -2.570187
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -6.673468e+00
p-value                        4.526341e-09
#Lags Used                     1.100000e+01
Number of Observations Used    4.420000e+02
Critical Value (1%)           -3.445232e+00
Critical Value (5%)           -2.868101e+00
Critical Value (10%)          -2.570265e+00
dtype: float64

Results of Dickey-Fuller Test:
Test Statistic                -6.644475e+00
p-value                        5.308127e-09
#Lags Used                     2.000000e+00
Number of Observations Used    4.030000e+02
Critical Value (1%)           -3.446681e+00
Critical Value (5%)           -2.868739e+00
Critical Value (10%)          -2.570605e+00
dtype: float64

Por ello es posible pronosticarlas con el método de Holt-Winter

# https://www.analyticsvidhya.com/blog/2018/02/time-series-forecasting-methods/
# https://machinelearningmastery.com/time-series-forecasting-methods-in-python-cheat-sheet/
# https://towardsdatascience.com/time-series-in-python-exponential-smoothing-and-arima-processes-2c67f2a52788

from statsmodels.tsa.api import Holt, SimpleExpSmoothing, ExponentialSmoothing

pronosticos = []
real = []
for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    
    # https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1
    # Train = 0.7
    train = round(0.7 * len(temp))
    f = ExponentialSmoothing(np.asarray(temp.iloc[0:train])).fit(smoothing_level = 0.1)
    # https://stackoverflow.com/a/50786171
    pred = f.predict(start=train + 1, end=len(temp))
    fcast = f.forecast(len(temp) - train)
    plt.figure(figsize=(12, 4))
    plt.plot(temp)
    plt.plot(f.fittedvalues, c='black')
    plt.plot(range(train, len(temp)), fcast, c='red')
    #plt.plot(range(train, len(temp)), pred, c='red')
    plt.title(ciesTS.iloc[i, 508])
    plt.legend(["Serie de tiempo", "Ajuste de Holt-Winter", "Pronóstico de Holt-Winter"])
    plt.show()
    
    
    # https://stackoverflow.com/a/15863028
    real.append(temp.iloc[-1])
    pronosticos.append(fcast[0])

Que presenta muy buen ajuste respecto a los datos reales

a, b, r, p, e = stats.linregress(real, pronosticos)
print("y = f(x) = {:.4f} x + {:.4f}".format(a, b))
print("error", e)
print("p = ", p)
print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
print("R^2", r**2)

plt.title('Logaritmo del pronóstico', fontsize = 20)
plt.xlabel('Logaritmo del último caso normalizado')
plt.ylabel('Pronóstico')
plt.scatter(real, pronosticos)
plt.show()

y = f(x) = 1.1122 x + 0.0000
error 0.019116083562680818
p =  5.559074078899032e-38
pendiente significativa
R^2 0.9891871972161727

Lo que se evidencia claramente al utilizar escala logarítmica en los datos

plt.title('Logaritmo del pronóstico', fontsize = 20)
plt.xlabel('Logaritmo del último caso normalizado')
plt.ylabel('Pronóstico')
plt.scatter(np.log(real), np.log(pronosticos))
plt.show()

C:\Users\bena8\AppData\Local\Programs\Python\Python37-32\lib\site-packages\ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in log
  after removing the cwd from sys.path.

# https://machinelearningmastery.com/time-series-forecast-uncertainty-using-confidence-intervals-python/
# https://machinelearningmastery.com/make-sample-forecasts-arima-python/
from statsmodels.tsa.arima_model import ARIMA

def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    train = round(0.9 * len(temp))
    ts = difference(np.asarray(temp), interval=len(temp) - train - 1)
    m = ARIMA(ts, order=(3, 1, 0))  
    f = m.fit(trend = 'nc', disp = 0)
    pred = f.predict(start=train + 1, end=len(temp))
    tempDiff = temp - temp.shift()
    plt.figure(figsize=(16, 4))
    plt.plot(tempDiff)
    plt.plot(f.fittedvalues, color='red')
    plt.plot(range(train, len(temp)), pred, c='black')
    plt.legend(['Diferencia', 'Valores ajustados de ARIMA', 'Pronóstico de ARIMA'])
    plt.show()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

P9.md

P9.md

Series de tiempo

Pronóstico

Files

P9.md

Latest commit

History

P9.md

File metadata and controls

Series de tiempo

Pronóstico