-
Notifications
You must be signed in to change notification settings - Fork 2
/
maharashtra_panvel_parser.py
101 lines (91 loc) · 3.63 KB
/
maharashtra_panvel_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import requests
import logging
from bs4 import BeautifulSoup
from dateutil import parser
from hospital import Hospital, Resource, ResourceType
from generic_html_parser import HtmlHospitalParser
logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)
class PanvelParser(HtmlHospitalParser):
URL = "https://covidbedpanvel.in/HospitalInfo/showindex"
hospital_district = 'Navi Mumbai'
hospital_city = 'Panvel'
hospital_state = 'Maharashtra'
def __init__(self):
self.page_soup = None
self.hospitals = []
def read_page(self):
response = requests.get(self.URL)
self.page_soup = BeautifulSoup(response.text, 'html.parser')
return self
def parse_hospitals(self):
hospital_containers = self.page_soup.find_all("div", {"class": "row"})
container_idx = 0
for hosp_cont in hospital_containers:
try:
self.load_hospital_from_row(hosp_cont)
except Exception as e:
logger.warning(f'Cannot parse line {container_idx}: {e}')
container_idx += 1
if not self.hospitals:
raise Exception(
f'Could not pick up any hospitals - schema likely changed for {self.URL}'
)
logger.info(
f'Picked up a total of {len(self.hospitals)} hospitals in {self.hospital_district} - '
f'{self.hospital_city}')
return self
def load_hospital_from_row(self, container):
name_elem = container.find('h4')
if not name_elem:
raise Exception(f'Cannot read name of hospital')
resource_mapper = {
'ICU Vacant': ResourceType.ICUS,
'Non ICU Vacant': ResourceType.BEDS,
'Ventilator Available': ResourceType.ICU_WITH_VENTILATOR
}
categories = [
x.get_text()
for x in container.find_all("div", {'class': 'text-white mb-0'})
]
numbers = [
int(x.find_all('b')[0].get_text())
for x in container.find_all("div", {'class': 'h1 m-0'})
]
hospital_name = name_elem.get_text()
assert categories == [
'Capacity', 'Occupied', 'Vacant', 'ICU Vacant', 'Non ICU Vacant',
'Ventilator Available'
], f"Cant read categories"
assert len(categories) == len(
numbers
), f"Unable to match up numbers and categories for {hospital_name} details"
resource_dict = {k: v for k, v in zip(categories, numbers)}
resources = []
for resource_name, resource_type in resource_mapper.items():
resources.append(
Resource(resource_type, '', resource_dict[resource_name]))
time_stamp = container.find('span', {
'class': 'pull-right'
}).get_text().split('Updated :')[-1] + "+05:30"
hospital = Hospital(
**{
'name': hospital_name,
'resources': resources,
'district': self.hospital_district,
'city': self.hospital_city,
'url': self.URL,
'state': self.hospital_state,
'last_updated': parser.parse(time_stamp, dayfirst=True),
'debug_text': str(container)
})
self.hospitals.append(hospital)
logger.info(
f'Parsed hospital: {hospital.name} with {len(hospital.resources)} resources'
)
@staticmethod
def export_hospital_data():
panvel_parser = PanvelParser().read_page().parse_hospitals()
return {panvel_parser.URL: panvel_parser.hospitals}
def get_hospital_data():
return PanvelParser.export_hospital_data()