Skip to content

Commit

Permalink
Manage Attribute Error explicitly
Browse files Browse the repository at this point in the history
  • Loading branch information
ninadicara committed Aug 24, 2021
1 parent 3cfb644 commit 907c523
Showing 1 changed file with 17 additions and 12 deletions.
29 changes: 17 additions & 12 deletions backend/data_collection/phw_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,14 @@ def get_url(self):
# Parse the content using Beautiful Soup
soup = BeautifulSoup(response.content, "html.parser")

# Find the element with the title that we need, and get the parent row (<tr>)
element = soup.find(title=self.dataset).find_parent("tr")
# Extract the URL from the row element (it is the first href in the row.)
url = "http://www2.nphs.wales.nhs.uk:8080" + element.find("a").get("href")
try:
# Find the element with the title that we need, and get the parent row (<tr>)
element = soup.find(title=self.dataset).find_parent("tr")
# Extract the URL from the row element (it is the first href in the row.)
url = "http://www2.nphs.wales.nhs.uk:8080" + element.find("a").get("href")
except AttributeError:
# If the element can't be found then return url as None.
url = None

return url

Expand All @@ -66,11 +70,12 @@ def save_data(self):
except Exception as e:
raise e

r = requests.get(url, allow_redirects=True)

# Save in native xlsx format
output = open(self.path, "wb")
output.write(r.content)
output.close()

logger.info("Message (phwScraper): Scraped {}".format(self.dataset))
if url is not None:
r = requests.get(url, allow_redirects=True)
# Save in native xlsx format
output = open(self.path, "wb")
output.write(r.content)
output.close()
logger.info("Successfully saved {}".format(self.dataset))
elif url is None:
logger.error("Could not get URL to download {}".format(self.dataset))

0 comments on commit 907c523

Please sign in to comment.