forked from nds-org/jupyter-cinergi
-
Notifications
You must be signed in to change notification settings - Fork 6
/
helpers.py
180 lines (149 loc) · 6.2 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.dates as mdates
import urllib3.request, json
from datetime import datetime, date, time
from ipywidgets import widgets
from IPython import get_ipython
from matplotlib import pylab
from pylab import *
from IPython.display import clear_output
def plotTimeSeriesData(data,col_name_to_plot,graphKeyVals):
#Sets the size of the figure in the notebook
for i in range(len(data)):
plt.plot(data[i]['date_time'],pd.to_numeric(data[i][col_name_to_plot], errors='coerce'), label = str(graphKeyVals[i]))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
#fontsize of the tick labels
plt.rc('xtick', labelsize=15)
plt.rc('ytick', labelsize = 15)
#Size of ticks
plt.tick_params(direction='out', length=10, width=2,)
#X and Y labels
plt.xlabel('Time',fontsize=18)
plt.ylabel(col_name_to_plot,fontsize=18)
plt.legend(loc = "best",prop={'size': 20})
plt.show()
def simplify_column_names(df):
new_header = []
header_len = 0
test = 0
try:
test = int(list(df.columns.values)[0][1])
temp_header = [j[1] for j in df.columns.values]
df.loc[-1] = temp_header
df.index = df.index + 1 # shifting index
df = df.sort_index() # sorting by index
df.columns = [j[0] for j in df.columns.values]
except ValueError:
pass
for i in list(df.columns.values):
header_len = len(i)
if isinstance(i, str):
new_header.append(i)
elif isinstance(i, tuple):
new_header.append('%s (%s)'%(i[0],i[1]))
df.columns = new_header
return df
#Creates a datetime object based on the dates and times and appends to the existing dataframe
#function to streamline the operation
def createDateTime(df):
matchers = ['YY','MM','DD','hh',"mm"]
datetimeref = ['year', 'month', 'day', 'hour', 'minute']
temp = []
for i in [j[0:3] for j in df.columns.values]:
for j in matchers:
if j in i and i not in temp:
temp.append(i)
df.columns.values[0:len(temp)] = datetimeref[0:len(temp)]
df_temp = df.iloc[:,0:len(temp)]
if len(str(df_temp['year'][0])) < 4:
df_temp['year'] = ("%s%s")%("19",df_temp['year'][0])
df_temp = pd.to_datetime(df_temp)
df_temp = pd.DataFrame({'date_time': df_temp})
df = pd.concat([df,df_temp], axis = 1)
df = df.replace(['99.0','999.0','99.00','999','9999.0' ], np.nan)
return df
def create_dropdown_data(cinergi_url):
data_url = {}
with urllib.request.urlopen(cinergi_url) as url:
data = json.loads(url.read().decode())
data = data["_source"]["links_s"]
if isinstance(data,str):
data_url['{0}. {1}'.format(0,data)]= data
else:
n = 0
for i in data:
if "filename" in str(i):
data_url['{0}. {1}'.format(n,i)]=i
n += 1
#widgetData = []
#data_dropdown_options = widgetData[0].columns.values
#data_dropdown_options = dropdown_options.tolist()
#Generate Dropdown Options (Get all column names where sum of useable values is not 0)
#for i in data_url:
#dropdown_options = list(set(dropdown_options)&set((i.T[i.any()].T).columns.values))
#dropdown_options = [x for x in dropdown_options if x not in ["hh (hr)", "mm (mn)", "#YY (#yr)","DD (dy)","date_time","MM (mo)"]]
#return dropdown_options
return data_url
def create_data_from_urls(data_urls):
url_data = []
for i in data_urls:
print('Data url %s' % i)
try:
url_data.append(pd.read_csv(i, delim_whitespace=True, header=[0,1],
na_values=['99.0','999.0','99.00','999','9999.0' ], compression='infer'))
print (' read file: '+ i)
except:
print('Link not a csv datafile: ' + i)
# Preview Data from Cinergi URL
return url_data
def create_data(cinergi_url):
url_data = []
with urllib.request.urlopen(cinergi_url) as url:
data = json.loads(url.read().decode())
data = data["_source"]["links_s"]
if isinstance(data,str):
try:
url_data.append(pd.read_csv(data, delim_whitespace=True, header=[0,1], na_values=['99.0','999.0','99.00','999','9999.0' ]))
except:
print('Link not a csv datafile: ' + i)
else:
for i in data:
if "filename" in i:
try:
print(i)
url_data.append(pd.read_csv(i, delim_whitespace=True, header=[0,1], na_values=['99.0','999.0','99.00','999','9999.0' ]))
print (' read file: '+ i)
except:
print('Link not a csv datafile: ' + i)
# Preview Data from Cinergi URL
return url_data
# future. Some files are one line headers, others a multiple.
def skip_to(fle, line,**kwargs):
if os.stat(fle).st_size == 0:
raise ValueError("File is empty")
with open(fle) as f:
pos = 0
cur_line = f.readline()
while not cur_line.startswith(line):
pos = f.tell()
cur_line = f.readline()
f.seek(pos)
return pd.read_csv(f, **kwargs)
#Generates table of number of variables in each column name
def create_dropdowns(data):
widgetData = []
for i in data:
display(("%s 's data (number of variables in each column)"%(i["year"][0])),pd.DataFrame(i.count()).transpose().style)
widgetData.append(pd.DataFrame(i.count()).transpose())
dropdown_options = widgetData[0].columns.values
dropdown_options = dropdown_options.tolist()
#Generate Dropdown Options (Get all column names where sum of useable values is not 0)
for i in widgetData:
dropdown_options = list(set(dropdown_options)&set((i.T[i.any()].T).columns.values))
dropdown_options = [x for x in dropdown_options if x not in ["hour", "minute", "year","day","date_time","month"]]
return dropdown_options