Skip to content

Latest commit

 

History

History
3341 lines (2224 loc) · 65.8 KB

P9.md

File metadata and controls

3341 lines (2224 loc) · 65.8 KB

Series de tiempo

De entre las series de tiempo para cada enfermedad en el periodo descrito, no todas contienen información de casos reportados durante el periodo de tiempo establecido para la investigación, por lo que se extraen los que reportan al menos la mitad del periodo (260 semanas)

# https://stackoverflow.com/a/16916611
print('Iniciales {}'.format(len(cie))) 
cie = cie.filter(lambda x: x['sem'].count() >= 260 )
cie.reset_index(drop=True, inplace=True)
cie = cie.groupby('cie')
print('Restantes {}'.format(len(cie)))
Iniciales 138
Restantes 40
print('Iniciales {}'.format(len(cieG))) 
cieG = cieG.filter(lambda x: x['sem'].count() >= 260 )
cieG.reset_index(drop=True, inplace=True)
cieG = cieG.groupby(cieG.cie.str[0])
print('Restantes {}'.format(len(cieG)))
Iniciales 22
Restantes 12

Así, de 138 series de tiempo de enfermedades, se obtienen 40 en las que al menos se cuenta con datos semanales de 5 años. Para dichas enfermedades se obtienen los pesos de la regresión lineal y se obtiene la serie de tiempo sin la tendencia y las autocorrelaciones (eliminando la a92.3 porque viene vacía)

from scipy import signal
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.tsa.stattools import acf

ciesF = [] # CIEs Características
ciesTSt = [] # CIEs Series de tiempo

for name, group in cie:
    if name == 'a92.3':
        continue
    
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.detrend.html
    detrended = signal.detrend(group.casos)
    
    a, b, r, p, e = stats.linregress(group['sem'], group.casos)
    print("y = f(x) = {} x + {}".format(a, b))
    print("error", e)
    print("p = ", p)
    print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
    print("R^2", r**2)
    plt.figure(figsize=(12, 2))
    plt.plot(group['sem'], group.casos)
    plt.plot(group['sem'], detrended, c='black')
    plt.plot(group['sem'], (a * group['sem'] + b), label = 'y = {:.1f}x + {:.0f}'.format(a, b), color = 'red', linewidth = 3)
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Casos normalizados")
    plt.show()
    
    # https://stackoverflow.com/questions/48497756/time-series-distance-metric
    plt.figure(figsize=(12, 2))
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.cumsum.html
    plt.plot(group['sem'], group.casos.cumsum(), c='green')
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Acumulado de Casos normalizados")
    plt.show()
    
    # https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/
    plot_acf(detrended, lags=52)
    # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
    plt.title(name)
    plt.xlabel("Retraso en semanas")
    plt.ylabel('Correlación')
    plt.show()
    
    temp = [a, b]
    # https://stackoverflow.com/a/3748071
    temp.extend(acf(detrended, nlags=52))
    temp.append(name)
    ciesF.append(temp)
    
    temp2 = list(group.casos)
    #temp2.append(name)
    ciesTSt.append(temp2)
y = f(x) = 4.2687498879775535e-11 x + -5.61276490864389e-09
error 2.0113605583063735e-11
p =  0.03438325581830106
pendiente significativa
R^2 0.010390302941758996

png

png

png

y = f(x) = -1.7653075450894884e-06 x + 0.0019754909758688734
error 1.215881678974069e-07
p =  1.0774404716157897e-37
pendiente significativa
R^2 0.3765563116502911

png

png

png

y = f(x) = -3.0752338979568535e-09 x + 1.2904965711605356e-05
error 8.36749265385657e-10
p =  0.0002687345236732541
pendiente significativa
R^2 0.03144815483703095

png

png

png

y = f(x) = -1.0785666679174943e-08 x + 6.155839207125693e-06
error 3.564598970127455e-10
p =  3.9269145731807006e-112
pendiente significativa
R^2 0.6622135065357393

png

png

png

y = f(x) = -9.213574424729245e-09 x + 1.2957954379028175e-05
error 6.75300549213463e-10
p =  4.863591366039641e-36
pendiente significativa
R^2 0.2798612445705242

png

png

png

y = f(x) = -3.831012274635742e-09 x + 4.559110665311131e-06
error 2.056660576324692e-10
p =  2.1083769747172313e-58
pendiente significativa
R^2 0.4247090552255535

png

png

png

y = f(x) = -1.4627725963638185e-11 x + 4.5153183905681134e-08
error 8.6165080095987e-12
p =  0.09023493838847157
pendiente no significativa
R^2 0.006068835261027791

png

png

png

y = f(x) = -1.7642681104091792e-10 x + 6.894953499860091e-07
error 6.403728082254567e-11
p =  0.0061121040534305735
pendiente significativa
R^2 0.01699628644380042

png

png

png

y = f(x) = 3.8938781393720075e-11 x + 4.4324399670136904e-08
error 2.301537019363991e-11
p =  0.09131901816285937
pendiente no significativa
R^2 0.00590350974760678

png

png

png

y = f(x) = 2.3434709729609935e-13 x + -3.6003147872535345e-11
error 1.478629682740248e-13
p =  0.1136746875979033
pendiente no significativa
R^2 0.005407586602284264

png

png

png

y = f(x) = -3.4234098194666664e-11 x + 1.8622107713377432e-08
error 7.415007265413988e-12
p =  4.945787391949576e-06
pendiente significativa
R^2 0.040422588588555235

png

png

png

y = f(x) = 2.725400812299939e-10 x + 1.9471946148640167e-08
error 7.110945660238779e-11
p =  0.00014458146666075644
pendiente significativa
R^2 0.031274848966871695

png

png

png

y = f(x) = -4.255464112013022e-09 x + 2.7470903702510135e-06
error 3.3548559373967543e-10
p =  6.366969283108536e-32
pendiente significativa
R^2 0.25462455709847065

png

png

png

y = f(x) = -1.0744562226275755e-10 x + 5.3269706349552e-08
error 6.010845348981309e-11
p =  0.07452781641973526
pendiente no significativa
R^2 0.007081769917450651

png

png

png

y = f(x) = -4.51442956629053e-11 x + 5.003109663849265e-08
error 8.825884482273087e-12
p =  4.625709147491021e-07
pendiente significativa
R^2 0.05414963353204715

png

png

png

y = f(x) = -2.8757621426513095e-09 x + 8.65805695809489e-06
error 2.5195362618720955e-09
p =  0.25425697912167106
pendiente no significativa
R^2 0.002603944949826924

png

png

png

y = f(x) = -1.4644681914017878e-07 x + 0.00011211051217541277
error 1.1228826202874234e-08
p =  2.0750705342062423e-33
pendiente significativa
R^2 0.2653193213169377

png

png

png

y = f(x) = -2.722125710622442e-10 x + 1.262294141156984e-07
error 1.588658069624221e-11
p =  7.30165010543899e-51
pendiente significativa
R^2 0.3985877970692943

png

png

png

y = f(x) = -7.433434732564669e-09 x + 6.901511580636186e-06
error 4.809110869935852e-10
p =  3.1523565347787724e-42
pendiente significativa
R^2 0.3841635208468614

png

png

png

y = f(x) = -1.2411343091809244e-10 x + 2.1160165305354125e-07
error 2.468137006599137e-11
p =  7.217507598355145e-07
pendiente significativa
R^2 0.05458192227303572

png

png

png

y = f(x) = 4.306189172696877e-11 x + 4.5311776302657916e-07
error 4.983732447188263e-11
p =  0.3880813876059074
pendiente no significativa
R^2 0.0018723188046217538

png

png

png

y = f(x) = -9.851452950958728e-10 x + 1.4372871687382767e-06
error 2.0422528814979408e-10
p =  2.004159852045715e-06
pendiente significativa
R^2 0.05484537751521497

png

png

png

y = f(x) = -4.5977874526241305e-09 x + 2.970356116391645e-06
error 1.2200834000344058e-10
p =  5.731116914266081e-137
pendiente significativa
R^2 0.7713322815069262

png

png

png

y = f(x) = -3.141197049928086e-08 x + 0.00011159382796415527
error 1.555599789385595e-08
p =  0.04432369209037648
pendiente significativa
R^2 0.013065683803978847

png

png

png

y = f(x) = -1.0673850709576887e-11 x + 4.355250762325799e-09
error 3.1880871555405942e-12
p =  0.0008764994132686745
pendiente significativa
R^2 0.02236467960180641

png

png

png

y = f(x) = -1.1834633525276088e-09 x + 7.100912129285122e-07
error 9.393463801467669e-11
p =  1.2559331815198554e-31
pendiente significativa
R^2 0.24928894104283042

png

png

png

y = f(x) = -2.6544241562890327e-10 x + 3.2049521522763566e-07
error 3.7319118941308474e-11
p =  4.610321182741757e-12
pendiente significativa
R^2 0.10270484494415096

png

png

png

y = f(x) = -4.556172046800565e-10 x + 3.7500114147077754e-07
error 3.423510008372152e-11
p =  1.8548314561638924e-34
pendiente significativa
R^2 0.27626166419878756

png

png

png

y = f(x) = -6.932314608726674e-06 x + 0.008805967823835786
error 6.162044690299544e-07
p =  7.503334430143699e-26
pendiente significativa
R^2 0.23113873276962404

png

png

png

y = f(x) = -4.3876585629436324e-08 x + 5.08928603405923e-05
error 4.181979380800392e-09
p =  2.3727736779313174e-23
pendiente significativa
R^2 0.1846709286259034

png

png

png

y = f(x) = 3.0607452607643954e-14 x + 8.632235917582137e-10
error 1.081125720208934e-12
p =  0.977427221531179
pendiente no significativa
R^2 1.8298987789071008e-06

png

png

png

y = f(x) = -7.142618310354189e-10 x + 1.178302593061399e-06
error 1.457944295445013e-10
p =  1.332329783777257e-06
pendiente significativa
R^2 0.04918267323938655

png

png

png

y = f(x) = -1.3158987800860395e-08 x + 1.643445025799908e-05
error 1.6084785128648306e-09
p =  1.0885896084621633e-14
pendiente significativa
R^2 0.19747207526346663

png

png

png

y = f(x) = -5.001283641779608e-08 x + 8.79623291848092e-05
error 5.714736801137942e-09
p =  3.9731738436330694e-17
pendiente significativa
R^2 0.14220420601744926

png

png

png

y = f(x) = -8.130421944150189e-10 x + 1.4827816461880552e-06
error 5.134961166576471e-10
p =  0.11399979312553642
pendiente no significativa
R^2 0.005185003922499099

png

png

png

y = f(x) = -2.3438815029578847e-11 x + 1.5962437413039917e-07
error 5.628337741371575e-11
p =  0.6772793134611802
pendiente no significativa
R^2 0.00037523732655554817

png

png

png

y = f(x) = -5.193298199318447e-08 x + 4.533358442590547e-05
error 1.6692637054937939e-09
p =  1.6078008523125964e-114
pendiente significativa
R^2 0.6811910446622308

png

png

png

y = f(x) = -9.132331371853938e-10 x + 1.3202325787207294e-06
error 1.431149104840528e-10
p =  4.359158383428425e-10
pendiente significativa
R^2 0.08264080526994147

png

png

png

y = f(x) = 3.638834463785146e-10 x + 1.1174278631513553e-06
error 1.1201987356227443e-10
p =  0.001257639893824037
pendiente significativa
R^2 0.025453964627936122

png

png

png

for name, group in cieG:
    
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.detrend.html
    detrended = signal.detrend(group.casos)
    
    a, b, r, p, e = stats.linregress(group['sem'], group.casos)
    print("y = f(x) = {} x + {}".format(a, b))
    print("error", e)
    print("p = ", p)
    print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
    print("R^2", r**2)
    plt.figure(figsize=(12, 2))
    plt.plot(group['sem'], group.casos)
    plt.plot(group['sem'], detrended, c='black')
    plt.plot(group['sem'], (a * group['sem'] + b), label = 'y = {:.1f}x + {:.0f}'.format(a, b), color = 'red', linewidth = 3)
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Casos normalizados")
    plt.show()
    
    # https://stackoverflow.com/questions/48497756/time-series-distance-metric
    plt.figure(figsize=(12, 2))
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.cumsum.html
    plt.plot(group['sem'], group.casos.cumsum(), c='green')
    plt.title(name)
    plt.xlabel("Semana")
    plt.ylabel("Acumulado de Casos normalizados")
    plt.show()
    
    plt.figure()
    # https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/
    plot_acf(detrended, lags=52)
    # https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
    plt.title(name)
    plt.xlabel("Retraso en semanas")
    plt.ylabel('Correlación')
    plt.show()
y = f(x) = -2.3224356070873255e-07 x + 0.00018642953885186607
error 2.458069138569354e-08
p =  4.2189376399252386e-21
pendiente significativa
R^2 0.008834750473414046

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = -1.9503026794867653e-08 x + 2.1003475197080275e-05
error 2.9914826904487065e-09
p =  7.747452841909533e-11
pendiente significativa
R^2 0.008415817644051377

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = -7.687412126573934e-08 x + 6.301895314999713e-05
error 1.1457310486128478e-07
p =  0.5026730466383222
pendiente no significativa
R^2 0.0012420716051614582

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = 4.324626671299835e-10 x + 2.04941342972253e-07
error 7.239655510808932e-11
p =  4.1887790791163235e-09
pendiente significativa
R^2 0.06134442012808605

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = 6.114012516943938e-08 x + -9.37021948204884e-06
error 5.454249521195378e-09
p =  7.943541732517958e-27
pendiente significativa
R^2 0.1593291079485521

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = -5.4762876304630354e-06 x + 0.004047543918036931
error 6.313606290646645e-07
p =  1.2891385768087959e-17
pendiente significativa
R^2 0.057202405083796086

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = 3.0607452607643954e-14 x + 8.632235917582137e-10
error 1.081125720208934e-12
p =  0.977427221531179
pendiente no significativa
R^2 1.8298987789071008e-06

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = -4.552145867367468e-08 x + 4.150055938885172e-05
error 5.28039387715845e-09
p =  1.7027198403366345e-17
pendiente significativa
R^2 0.04850098103773122

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = -5.496981234999186e-10 x + 8.572895488021172e-07
error 2.657552949170129e-10
p =  0.038856309457124164
pendiente significativa
R^2 0.004272969521096534

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = -3.11383856620073e-08 x + 3.0350821580836582e-05
error 3.4613396780786333e-09
p =  2.313921331364859e-18
pendiente significativa
R^2 0.10649538120403663

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = -9.132331371853938e-10 x + 1.3202325787207294e-06
error 1.431149104840528e-10
p =  4.359158383428425e-10
pendiente significativa
R^2 0.08264080526994147

png

png

<Figure size 432x288 with 0 Axes>

png

y = f(x) = 3.638834463785146e-10 x + 1.1174278631513553e-06
error 1.1201987356227443e-10
p =  0.001257639893824037
pendiente significativa
R^2 0.025453964627936122

png

png

<Figure size 432x288 with 0 Axes>

png

Se extraen las características de cada CIE en tanto serie de tiempo. A saber, su pendiente, ordenada en el origen y las autocorrelaciones con retraso de 1 a 52 semanas (eliminando el retraso de 0 semanas)

ciesF = pd.DataFrame(ciesF)

# https://stackoverflow.com/a/11346337
colNames = ['m', 'b']
for i in range (53):
    colNames.append('ac' + str(i))
colNames.append('cie')
ciesF.columns = colNames

ciesF = ciesF.drop(['ac0'], axis=1)

ciesF.sort_values(by=['m'], ascending=False)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
m b ac1 ac2 ac3 ac4 ac5 ac6 ac7 ac8 ... ac44 ac45 ac46 ac47 ac48 ac49 ac50 ac51 ac52 cie
38 3.638834e-10 1.117428e-06 0.497210 0.371337 0.338770 0.295026 0.246889 0.170797 0.177532 0.174927 ... 0.095734 0.079953 0.094433 0.045073 0.020609 0.002565 -0.025604 0.001100 0.014393 z21
11 2.725401e-10 1.947195e-08 0.089659 -0.019229 -0.025867 -0.002608 -0.028985 -0.001130 -0.058919 -0.019433 ... -0.034949 -0.062431 -0.002219 0.029734 0.005434 -0.050439 0.078625 0.029168 -0.002898 a37
20 4.306189e-11 4.531178e-07 0.299805 0.225857 0.068495 0.057942 0.001655 0.017533 0.033656 0.100137 ... 0.050684 0.016318 0.061034 0.053793 0.021663 0.059753 -0.005982 0.033164 -0.047455 b17.1
0 4.268750e-11 -5.612765e-09 0.393749 0.122484 0.010328 0.026661 0.015698 0.021141 -0.002905 -0.010120 ... -0.017536 -0.017607 -0.017679 -0.017751 -0.017822 -0.017893 -0.017965 -0.018036 -0.018107 a00
8 3.893878e-11 4.432440e-08 0.393243 0.260146 0.044702 0.032172 0.012605 0.025166 0.014480 0.009281 ... -0.017366 0.067969 0.005142 0.003701 -0.020010 0.007456 0.042142 0.077540 0.090818 a27
9 2.343471e-13 -3.600315e-11 -0.007522 -0.007561 -0.007600 -0.007639 -0.007678 -0.007717 -0.007756 -0.007795 ... -0.000592 -0.000605 -0.000618 -0.000631 -0.000644 -0.000657 -0.000670 -0.000683 -0.000696 a33
30 3.060745e-14 8.632236e-10 0.055952 0.062823 0.076371 0.006623 0.030863 -0.037294 -0.060305 0.059805 ... -0.057127 -0.057262 -0.035717 -0.035852 -0.057666 -0.057801 -0.057936 -0.058070 -0.058205 p35.0
24 -1.067385e-11 4.355251e-09 0.077117 0.322187 0.046591 0.036037 -0.023600 0.020128 -0.018800 -0.026529 ... -0.023442 0.017590 -0.018073 -0.018183 -0.023882 0.032515 -0.001876 0.011733 0.004771 b50
6 -1.462773e-11 4.515318e-08 0.085993 0.048897 0.064574 0.118768 -0.023565 -0.026439 0.023959 -0.036252 ... 0.064103 0.019108 -0.021642 -0.019077 0.015544 0.031099 -0.056185 -0.073401 -0.004459 a17.0
35 -2.343882e-11 1.596244e-07 0.550097 0.512561 0.454005 0.411662 0.363330 0.254477 0.225435 0.181761 ... -0.032387 -0.007867 0.004137 -0.020714 0.015187 -0.003246 0.008389 0.020997 0.018492 u98
10 -3.423410e-11 1.862211e-08 -0.081445 0.057375 -0.049496 0.012841 0.056102 -0.020204 -0.009801 -0.032175 ... -0.005745 0.005151 -0.043027 0.056617 -0.031916 0.114628 -0.035333 0.052850 -0.016788 a34
14 -4.514430e-11 5.003110e-08 0.076912 0.149122 0.088945 0.072881 0.114257 0.074084 0.108017 0.108573 ... 0.036326 0.015114 0.051616 0.032731 -0.039804 0.002215 0.025619 0.016111 -0.024214 a50
13 -1.074456e-10 5.326971e-08 0.003941 -0.012528 -0.008077 -0.013007 -0.007537 -0.016467 0.003030 -0.001938 ... -0.011364 -0.003043 -0.003882 0.012084 0.000458 -0.001701 0.007212 -0.004964 -0.004440 a39.0
19 -1.241134e-10 2.116017e-07 0.289203 0.226359 0.276856 0.274657 0.253692 0.247411 0.247509 0.206441 ... 0.021663 0.093129 0.023837 0.146601 0.018644 0.036760 0.043459 -0.015475 -0.004356 b16
7 -1.764268e-10 6.894953e-07 0.522652 0.363609 0.361372 0.385651 0.326709 0.325553 0.318398 0.252245 ... 0.196099 0.182101 0.163183 0.167669 0.235813 0.206676 0.215066 0.167751 0.183912 a23
26 -2.654424e-10 3.204952e-07 0.110847 0.069672 0.107836 0.080559 0.128797 -0.027006 0.042914 -0.009807 ... 0.029668 0.076565 0.089312 0.024519 0.008848 0.059614 0.099755 0.046717 -0.020862 g00-g03
17 -2.722126e-10 1.262294e-07 0.488103 0.474707 0.426456 0.414057 0.317635 0.304248 0.261418 0.248762 ... 0.072010 0.087184 0.117220 0.163608 0.087383 0.129308 0.128205 0.131949 0.139945 b06
27 -4.556172e-10 3.750011e-07 0.232941 0.165279 0.155966 0.155952 0.159776 0.110074 0.261158 0.168664 ... 0.016427 0.004149 -0.044051 0.007307 -0.018886 -0.029089 0.021237 0.039481 0.027578 i00-i02
31 -7.142618e-10 1.178303e-06 0.791010 0.750958 0.691090 0.610929 0.512450 0.434146 0.366592 0.258712 ... 0.341533 0.408299 0.435233 0.469282 0.511489 0.522807 0.498786 0.480615 0.470837 t60
34 -8.130422e-10 1.482782e-06 0.026306 0.067629 0.068971 0.106583 0.028512 0.007521 0.073457 -0.032228 ... 0.055582 0.128333 0.022517 0.066484 0.006181 0.028253 0.168174 0.105951 0.083272 u97
37 -9.132331e-10 1.320233e-06 0.826727 0.781848 0.727522 0.675235 0.624828 0.550919 0.484088 0.401629 ... 0.459125 0.506974 0.547571 0.601010 0.631255 0.670476 0.649844 0.640989 0.621946 x20
21 -9.851453e-10 1.437287e-06 0.149215 0.272471 0.152501 0.089044 0.100963 0.106820 0.087191 -0.016662 ... 0.075923 0.128809 0.068058 0.029803 0.013137 -0.020469 -0.029275 -0.055377 -0.035842 b20-b24
25 -1.183463e-09 7.100912e-07 0.388003 0.416624 0.313638 0.387392 0.284052 0.250379 0.164883 0.231499 ... 0.188028 0.150510 0.140413 0.147020 0.207482 0.171138 0.209809 0.209396 0.200214 b51
15 -2.875762e-09 8.658057e-06 0.895262 0.842421 0.792705 0.707147 0.622664 0.527720 0.432752 0.356828 ... 0.243528 0.298282 0.370211 0.423460 0.466082 0.511227 0.539638 0.552434 0.540235 a90
2 -3.075234e-09 1.290497e-05 0.667701 0.505751 0.436771 0.352219 0.289719 0.228429 0.172198 0.107678 ... 0.223528 0.189754 0.185963 0.183744 0.179380 0.151920 0.143924 0.134758 0.057089 a01.0
5 -3.831012e-09 4.559111e-06 0.380632 0.195406 0.107774 0.091505 0.073293 0.073178 0.027202 0.086867 ... 0.121356 0.134870 0.096912 0.037600 0.126218 0.126044 0.177925 0.161334 0.038959 a15-a16
12 -4.255464e-09 2.747090e-06 0.140500 0.121196 0.127669 0.099854 0.073198 0.027292 -0.002968 -0.016134 ... 0.035622 0.029450 0.045802 0.047865 0.060802 0.095082 0.062623 0.058167 0.053912 a38
22 -4.597787e-09 2.970356e-06 0.564047 0.398251 0.335638 0.284887 0.179178 0.078869 0.009511 -0.045524 ... 0.070795 0.069166 0.100421 0.164831 0.185766 0.150027 0.154843 0.220636 0.151552 b26
18 -7.433435e-09 6.901512e-06 0.765747 0.587239 0.516973 0.418962 0.348350 0.239331 0.135074 0.112791 ... 0.326428 0.312633 0.307638 0.296214 0.237261 0.190637 0.124537 0.059086 0.007564 b15
4 -9.213574e-09 1.295795e-05 0.605797 0.479571 0.410353 0.367218 0.341321 0.311389 0.234742 0.219977 ... 0.255961 0.229158 0.221748 0.178246 0.191535 0.225850 0.294085 0.313643 0.295370 a05
3 -1.078567e-08 6.155839e-06 0.620012 0.595386 0.535631 0.462663 0.438088 0.373148 0.289653 0.300332 ... 0.178095 0.234208 0.255625 0.278374 0.284554 0.274991 0.301086 0.266200 0.231869 a03
32 -1.315899e-08 1.643445e-05 0.903920 0.826842 0.732156 0.614444 0.497397 0.376862 0.268490 0.160477 ... 0.183030 0.131180 0.085666 0.037463 -0.008971 -0.066770 -0.114970 -0.152466 -0.191697 t63 excepto t63.2
23 -3.141197e-08 1.115938e-04 0.767118 0.547797 0.395471 0.291686 0.210417 0.128438 0.081658 0.051735 ... -0.017768 -0.024711 -0.028894 -0.060319 -0.058954 -0.048837 -0.020363 -0.004847 -0.005577 b30
29 -4.387659e-08 5.089286e-05 0.927733 0.858740 0.790416 0.727838 0.637424 0.545003 0.449916 0.365855 ... 0.569450 0.607758 0.624284 0.633800 0.647336 0.649295 0.625177 0.589587 0.547403 j12
33 -5.001284e-08 8.796233e-05 0.881185 0.793629 0.694008 0.596382 0.456681 0.319222 0.189304 0.062305 ... 0.314212 0.388623 0.451051 0.497185 0.527848 0.531338 0.510950 0.475725 0.426713 t63.2
36 -5.193298e-08 4.533358e-05 0.844756 0.768593 0.723830 0.666654 0.614451 0.564508 0.510689 0.441227 ... 0.355990 0.405624 0.456501 0.503929 0.521726 0.555444 0.542070 0.525043 0.500933 w54
16 -1.464468e-07 1.121105e-04 0.946239 0.892301 0.837622 0.778140 0.711102 0.629254 0.536098 0.431731 ... 0.530872 0.565769 0.593141 0.613168 0.623759 0.618582 0.598455 0.570398 0.530427 b01
1 -1.765308e-06 1.975491e-03 0.831565 0.690654 0.594390 0.516446 0.437316 0.326857 0.221760 0.104962 ... 0.289442 0.214354 0.137897 0.059001 -0.016753 -0.061065 -0.108941 -0.158795 -0.225823 a01-a03
28 -6.932315e-06 8.805968e-03 0.853479 0.727355 0.620092 0.535945 0.445607 0.349639 0.267460 0.175938 ... 0.200076 0.214129 0.232129 0.213833 0.204222 0.194258 0.186803 0.151652 0.122209 j00-j06

39 rows × 55 columns

Se agrupan las CIEs por casos dados en una semana, como series de tiempo.

ciesTS = pd.DataFrame(ciesTSt)
t = list(cie.groups.keys())
t.remove('a92.3')
ciesTS['cie'] = t

ciesTS.sample(3)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
0 1 2 3 4 5 6 7 8 9 ... 499 500 501 502 503 504 505 506 507 cie
12 3.413076e-06 2.295700e-06 2.031593e-06 2.620755e-06 3.331812e-06 3.372444e-06 3.331812e-06 4.347608e-06 3.392760e-06 3.453708e-06 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN a38
20 4.875822e-07 5.078982e-07 4.063185e-07 4.266345e-07 4.266345e-07 6.297937e-07 4.875822e-07 6.501096e-07 3.656867e-07 3.453708e-07 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN b17.1
25 7.110574e-07 7.110574e-07 1.015796e-07 9.548485e-07 1.422115e-07 3.047389e-07 2.031593e-07 2.641070e-07 5.891619e-07 1.828433e-07 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN b51

3 rows × 509 columns

Pronóstico

Una vez eliminada la tendencia se puede comprobar que las series de tiempo para cada enfermedad son estacionales

from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries, w, name):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(w).mean()
    rolstd = timeseries.rolling(w).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries,label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title(name)
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
    
for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    test_stationarity(temp, 52, ciesTS.iloc[i, -1])

png

Results of Dickey-Fuller Test:
Test Statistic                -1.356054e+01
p-value                        2.314720e-25
#Lags Used                     0.000000e+00
Number of Observations Used    4.300000e+02
Critical Value (1%)           -3.445649e+00
Critical Value (5%)           -2.868285e+00
Critical Value (10%)          -2.570363e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -3.424746
p-value                          0.010144
#Lags Used                       2.000000
Number of Observations Used    348.000000
Critical Value (1%)             -3.449282
Critical Value (5%)             -2.869881
Critical Value (10%)            -2.571214
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -5.719534e+00
p-value                        6.993272e-07
#Lags Used                     2.000000e+00
Number of Observations Used    4.150000e+02
Critical Value (1%)           -3.446206e+00
Critical Value (5%)           -2.868530e+00
Critical Value (10%)          -2.570493e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -2.106851
p-value                          0.241769
#Lags Used                       7.000000
Number of Observations Used    461.000000
Critical Value (1%)             -3.444615
Critical Value (5%)             -2.867830
Critical Value (10%)            -2.570120
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -3.760647
p-value                          0.003337
#Lags Used                       4.000000
Number of Observations Used    476.000000
Critical Value (1%)             -3.444163
Critical Value (5%)             -2.867631
Critical Value (10%)            -2.570014
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -2.133072
p-value                          0.231426
#Lags Used                      10.000000
Number of Observations Used    461.000000
Critical Value (1%)             -3.444615
Critical Value (5%)             -2.867830
Critical Value (10%)            -2.570120
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -8.720275e+00
p-value                        3.400303e-14
#Lags Used                     3.000000e+00
Number of Observations Used    4.700000e+02
Critical Value (1%)           -3.444340e+00
Critical Value (5%)           -2.867709e+00
Critical Value (10%)          -2.570056e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -4.078834
p-value                          0.001050
#Lags Used                       5.000000
Number of Observations Used    435.000000
Critical Value (1%)             -3.445473
Critical Value (5%)             -2.868207
Critical Value (10%)            -2.570321
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -1.063365e+01
p-value                        5.137862e-19
#Lags Used                     2.000000e+00
Number of Observations Used    4.810000e+02
Critical Value (1%)           -3.444018e+00
Critical Value (5%)           -2.867568e+00
Critical Value (10%)          -2.569980e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                 -21.517435
p-value                          0.000000
#Lags Used                       0.000000
Number of Observations Used    463.000000
Critical Value (1%)             -3.444553
Critical Value (5%)             -2.867803
Critical Value (10%)            -2.570106
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -1.468685e+01
p-value                        3.093388e-27
#Lags Used                     1.000000e+00
Number of Observations Used    5.060000e+02
Critical Value (1%)           -3.443340e+00
Critical Value (5%)           -2.867269e+00
Critical Value (10%)          -2.569821e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -3.809081
p-value                          0.002816
#Lags Used                      11.000000
Number of Observations Used    445.000000
Critical Value (1%)             -3.445131
Critical Value (5%)             -2.868057
Critical Value (10%)            -2.570241
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -5.053787
p-value                          0.000017
#Lags Used                       4.000000
Number of Observations Used    468.000000
Critical Value (1%)             -3.444400
Critical Value (5%)             -2.867736
Critical Value (10%)            -2.570070
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                 -20.908062
p-value                          0.000000
#Lags Used                       0.000000
Number of Observations Used    449.000000
Critical Value (1%)             -3.444998
Critical Value (5%)             -2.867999
Critical Value (10%)            -2.570210
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -2.973026
p-value                          0.037491
#Lags Used                      11.000000
Number of Observations Used    447.000000
Critical Value (1%)             -3.445064
Critical Value (5%)             -2.868028
Critical Value (10%)            -2.570226
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -5.304842
p-value                          0.000005
#Lags Used                      12.000000
Number of Observations Used    488.000000
Critical Value (1%)             -3.443821
Critical Value (5%)             -2.867481
Critical Value (10%)            -2.569934
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -6.501620e+00
p-value                        1.158034e-08
#Lags Used                     9.000000e+00
Number of Observations Used    4.630000e+02
Critical Value (1%)           -3.444553e+00
Critical Value (5%)           -2.867803e+00
Critical Value (10%)          -2.570106e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -1.996208
p-value                          0.288238
#Lags Used                      11.000000
Number of Observations Used    433.000000
Critical Value (1%)             -3.445543
Critical Value (5%)             -2.868238
Critical Value (10%)            -2.570338
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -3.403355
p-value                          0.010845
#Lags Used                       8.000000
Number of Observations Used    376.000000
Critical Value (1%)             -3.447862
Critical Value (5%)             -2.869258
Critical Value (10%)            -2.570881
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -3.818212
p-value                          0.002726
#Lags Used                       6.000000
Number of Observations Used    433.000000
Critical Value (1%)             -3.445543
Critical Value (5%)             -2.868238
Critical Value (10%)            -2.570338
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -9.927614e+00
p-value                        2.878961e-17
#Lags Used                     1.000000e+00
Number of Observations Used    3.980000e+02
Critical Value (1%)           -3.446888e+00
Critical Value (5%)           -2.868829e+00
Critical Value (10%)          -2.570653e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -7.628780e+00
p-value                        2.034981e-11
#Lags Used                     2.000000e+00
Number of Observations Used    4.000000e+02
Critical Value (1%)           -3.446804e+00
Critical Value (5%)           -2.868793e+00
Critical Value (10%)          -2.570634e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -1.383949
p-value                          0.589947
#Lags Used                      14.000000
Number of Observations Used    408.000000
Critical Value (1%)             -3.446480
Critical Value (5%)             -2.868650
Critical Value (10%)            -2.570557
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -6.469065e+00
p-value                        1.381678e-08
#Lags Used                     1.000000e+00
Number of Observations Used    3.080000e+02
Critical Value (1%)           -3.451761e+00
Critical Value (5%)           -2.870970e+00
Critical Value (10%)          -2.571794e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -1.049502e+01
p-value                        1.121338e-18
#Lags Used                     1.000000e+00
Number of Observations Used    4.900000e+02
Critical Value (1%)           -3.443766e+00
Critical Value (5%)           -2.867457e+00
Critical Value (10%)          -2.569921e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -3.050080
p-value                          0.030475
#Lags Used                       8.000000
Number of Observations Used    471.000000
Critical Value (1%)             -3.444310
Critical Value (5%)             -2.867696
Critical Value (10%)            -2.570049
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -4.194474
p-value                          0.000673
#Lags Used                       8.000000
Number of Observations Used    435.000000
Critical Value (1%)             -3.445473
Critical Value (5%)             -2.868207
Critical Value (10%)            -2.570321
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -5.300617
p-value                          0.000005
#Lags Used                      17.000000
Number of Observations Used    448.000000
Critical Value (1%)             -3.445031
Critical Value (5%)             -2.868013
Critical Value (10%)            -2.570218
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -5.196448
p-value                          0.000009
#Lags Used                       0.000000
Number of Observations Used    422.000000
Critical Value (1%)             -3.445941
Critical Value (5%)             -2.868413
Critical Value (10%)            -2.570431
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -6.052569e+00
p-value                        1.267267e-07
#Lags Used                     1.300000e+01
Number of Observations Used    4.740000e+02
Critical Value (1%)           -3.444221e+00
Critical Value (5%)           -2.867657e+00
Critical Value (10%)          -2.570028e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -1.053776e+01
p-value                        8.810400e-19
#Lags Used                     2.000000e+00
Number of Observations Used    4.370000e+02
Critical Value (1%)           -3.445403e+00
Critical Value (5%)           -2.868177e+00
Critical Value (10%)          -2.570305e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -5.316712
p-value                          0.000005
#Lags Used                      14.000000
Number of Observations Used    451.000000
Critical Value (1%)             -3.444933
Critical Value (5%)             -2.867970
Critical Value (10%)            -2.570195
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -4.113528
p-value                          0.000920
#Lags Used                       4.000000
Number of Observations Used    269.000000
Critical Value (1%)             -3.454896
Critical Value (5%)             -2.872345
Critical Value (10%)            -2.572528
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -4.874372
p-value                          0.000039
#Lags Used                      11.000000
Number of Observations Used    452.000000
Critical Value (1%)             -3.444900
Critical Value (5%)             -2.867956
Critical Value (10%)            -2.570187
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -5.375052
p-value                          0.000004
#Lags Used                       8.000000
Number of Observations Used    474.000000
Critical Value (1%)             -3.444221
Critical Value (5%)             -2.867657
Critical Value (10%)            -2.570028
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -5.342202
p-value                          0.000004
#Lags Used                       5.000000
Number of Observations Used    458.000000
Critical Value (1%)             -3.444709
Critical Value (5%)             -2.867871
Critical Value (10%)            -2.570142
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                  -1.813468
p-value                          0.373796
#Lags Used                       2.000000
Number of Observations Used    452.000000
Critical Value (1%)             -3.444900
Critical Value (5%)             -2.867956
Critical Value (10%)            -2.570187
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -6.673468e+00
p-value                        4.526341e-09
#Lags Used                     1.100000e+01
Number of Observations Used    4.420000e+02
Critical Value (1%)           -3.445232e+00
Critical Value (5%)           -2.868101e+00
Critical Value (10%)          -2.570265e+00
dtype: float64

png

Results of Dickey-Fuller Test:
Test Statistic                -6.644475e+00
p-value                        5.308127e-09
#Lags Used                     2.000000e+00
Number of Observations Used    4.030000e+02
Critical Value (1%)           -3.446681e+00
Critical Value (5%)           -2.868739e+00
Critical Value (10%)          -2.570605e+00
dtype: float64

Por ello es posible pronosticarlas con el método de Holt-Winter

# https://www.analyticsvidhya.com/blog/2018/02/time-series-forecasting-methods/
# https://machinelearningmastery.com/time-series-forecasting-methods-in-python-cheat-sheet/
# https://towardsdatascience.com/time-series-in-python-exponential-smoothing-and-arima-processes-2c67f2a52788

from statsmodels.tsa.api import Holt, SimpleExpSmoothing, ExponentialSmoothing

pronosticos = []
real = []
for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    
    # https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1
    # Train = 0.7
    train = round(0.7 * len(temp))
    f = ExponentialSmoothing(np.asarray(temp.iloc[0:train])).fit(smoothing_level = 0.1)
    # https://stackoverflow.com/a/50786171
    pred = f.predict(start=train + 1, end=len(temp))
    fcast = f.forecast(len(temp) - train)
    plt.figure(figsize=(12, 4))
    plt.plot(temp)
    plt.plot(f.fittedvalues, c='black')
    plt.plot(range(train, len(temp)), fcast, c='red')
    #plt.plot(range(train, len(temp)), pred, c='red')
    plt.title(ciesTS.iloc[i, 508])
    plt.legend(["Serie de tiempo", "Ajuste de Holt-Winter", "Pronóstico de Holt-Winter"])
    plt.show()
    
    
    # https://stackoverflow.com/a/15863028
    real.append(temp.iloc[-1])
    pronosticos.append(fcast[0])

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

Que presenta muy buen ajuste respecto a los datos reales

a, b, r, p, e = stats.linregress(real, pronosticos)
print("y = f(x) = {:.4f} x + {:.4f}".format(a, b))
print("error", e)
print("p = ", p)
print("pendiente {:s}significativa".format("no " if p >= 0.05 else ""))
print("R^2", r**2)

plt.title('Logaritmo del pronóstico', fontsize = 20)
plt.xlabel('Logaritmo del último caso normalizado')
plt.ylabel('Pronóstico')
plt.scatter(real, pronosticos)
plt.show()
y = f(x) = 1.1122 x + 0.0000
error 0.019116083562680818
p =  5.559074078899032e-38
pendiente significativa
R^2 0.9891871972161727

png

Lo que se evidencia claramente al utilizar escala logarítmica en los datos

plt.title('Logaritmo del pronóstico', fontsize = 20)
plt.xlabel('Logaritmo del último caso normalizado')
plt.ylabel('Pronóstico')
plt.scatter(np.log(real), np.log(pronosticos))
plt.show()
C:\Users\bena8\AppData\Local\Programs\Python\Python37-32\lib\site-packages\ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in log
  after removing the cwd from sys.path.

png

# https://machinelearningmastery.com/time-series-forecast-uncertainty-using-confidence-intervals-python/
# https://machinelearningmastery.com/make-sample-forecasts-arima-python/
from statsmodels.tsa.arima_model import ARIMA

def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

for i in range(len(ciesTS)):
    temp = ciesTS.iloc[i, : 508]
    temp.dropna(inplace=True)
    train = round(0.9 * len(temp))
    ts = difference(np.asarray(temp), interval=len(temp) - train - 1)
    m = ARIMA(ts, order=(3, 1, 0))  
    f = m.fit(trend = 'nc', disp = 0)
    pred = f.predict(start=train + 1, end=len(temp))
    tempDiff = temp - temp.shift()
    plt.figure(figsize=(16, 4))
    plt.plot(tempDiff)
    plt.plot(f.fittedvalues, color='red')
    plt.plot(range(train, len(temp)), pred, c='black')
    plt.legend(['Diferencia', 'Valores ajustados de ARIMA', 'Pronóstico de ARIMA'])
    plt.show()

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png