Skip to content

Commit

Permalink
Use html5 parser for better compatibility with MacWork; decompose str…
Browse files Browse the repository at this point in the history
…ipped tags instead of extracting them
  • Loading branch information
rdmark committed Dec 29, 2021
1 parent 4f08648 commit 2cb84a9
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions html_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from bs4 import BeautifulSoup

def transcode_html(html):
"""
Uses BeatifulSoup to transcode payloads with the text/html content type
"""
"""
Uses BeatifulSoup to transcode payloads with the text/html content type
"""
soup = BeautifulSoup(html, features="html.parser")
for tag in soup("base"):
tag["href"] = tag["href"].replace("https://", "http://")
Expand All @@ -19,8 +19,8 @@ def transcode_html(html):
except:
pass
for tag in soup(["script", "link", "style", "noscript"]):
tag.extract()
tag.decompose()
for tag in soup():
for attr in ["style", "onclick"]:
del tag[attr]
return str(soup)
return soup.prettify(formatter="html5")

0 comments on commit 2cb84a9

Please sign in to comment.