-
Notifications
You must be signed in to change notification settings - Fork 1
/
mohfw_handler.py
98 lines (81 loc) · 3.52 KB
/
mohfw_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import json
import logging as lg
from urllib import request, parse
from datetime import date, datetime
import requests
import pandas as pd
log = lg.getLogger(__name__)
def mohfw_data_to_df():
url = 'https://www.mohfw.gov.in/index.html'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
req = requests.get(url, headers=header)
if req.status_code == 200:
table_list = pd.read_html(req.content)
df = table_list[-1]
return df
else:
log.error(
f"Could not read MoHFW website. Request status code = {req.status_code}")
return None
# req = request.Request(url, headers=header)#, data=params)
#response = request.urlopen(req)
#table_list = pd.read_html(response, header=0)
#df = table_list[-1]
# return df
def extract_clean_df(df):
#clean_df = df.head(-1)
clean_df = df[pd.to_numeric(df['S. No.'], errors='coerce').notna()]
clean_df.iloc[:,1] = clean_df.iloc[:,1].str.rstrip('#*')
#clean_df.iloc[:,2] = clean_df.iloc[:,2].astype(str).str.rstrip('#*')
#clean_df.iloc[:,3] = clean_df.iloc[:,3].str.rstrip('#*')
#clean_df.iloc[:,4] = clean_df.iloc[:,4].astype(str).str.rstrip('#*')
# MOHFW table format changed - added active cases
clean_df.drop(clean_df.columns[2], axis=1, inplace=True)
c = clean_df.columns.to_list()
reorder = c[:2]+[c[-1]]+c[2:4]
clean_df = clean_df[reorder]
return clean_df
# def geocode(city):
# url = 'https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/findAddressCandidates'
# header = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
# param_dict = {"f": "json", "singleLine": f"{city}, IND", "maxLocations": 2}
# params = parse.urlencode(param_dict).encode('UTF-8')
# req = request.Request(url, headers=header, data=params)
# try:
# response = request.urlopen(req)
# except Exception:
# log.error("Geocode request Failed", exc_info=True)
# else:
# # some other code? handle it here
# log.debug(f"Response code = {response.getcode()}")
# log.info("Adding Latitude and Longitude")
# if response.getcode() == 200:
# response_dict = json.load(response)
# if city == 'Andhra Pradesh':
# return (response_dict['candidates'][1]["location"]["x"], response_dict['candidates'][1]["location"]["y"])
# else:
# return (response_dict['candidates'][0]["location"]["x"], response_dict['candidates'][0]["location"]["y"])
def geocode(place):
data = pd.read_csv('india_states_lon_lat.csv')
lon = float(data[data['place'] == place]['lon'])
lat = float(data[data['place'] == place]['lat'])
return (lon, lat)
def add_lat_lon(df):
temp_df = pd.DataFrame()
temp_df = df
try:
temp_df['Lon'], temp_df['Lat'] = zip(*(df['Name of State / UT'].map(geocode)))
except Exception:
log.error("adding lat & lon failed", exc_info=True)
else:
return temp_df
# TODO
# Melting data -> should happen in the main file
def get_mohfw_stats(df):
cases_sum = pd.to_numeric(df.iloc[:, 2]).sum() # <-- handle if df is None
deaths_sum = pd.to_numeric(df.iloc[:, 4]).sum()
recovered_sum = pd.to_numeric(df.iloc[:, 3]).sum()
# return (cases_sum, deaths_sum, recovered_sum)
return {"in_stats": {'cases': cases_sum, 'deaths': deaths_sum, 'recovered': recovered_sum}}