Merge pull request #2281 from camp00000/libread

freewebnovel add new mirror & remove self-promo
dipu-bd · Feb 25, 2024 · 84f301b · 84f301b
2 parents ffc8e06 + bbf31f7
commit 84f301b
Showing 1 changed file with 11 additions and 1 deletion.
diff --git a/sources/en/f/freewebnovel.py b/sources/en/f/freewebnovel.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 import unicodedata
+import re
 
 from bs4 import BeautifulSoup, Tag
 
@@ -14,7 +15,8 @@ class FreeWebNovelCrawler(SearchableSoupTemplate, ChapterOnlySoupTemplate):
         "https://bednovel.com/",
         "https://innread.com/",
         "https://innnovel.com/",
-        "https://libread.com/"
+        "https://libread.com/",
+        "https://libread.org/",
     ]
 
     def initialize(self) -> None:
@@ -99,8 +101,16 @@ def normalize_text(self, text: str) -> str:
 
     def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
         body_tag = soup.select_one(".m-read .txt")
+        # style element on page that hides usually last paragraph which contains randomised self-promo text
+        has_promo = soup.find("style", text=re.compile("p:nth-last-child\\(\\d\\)"))
         if body_tag:
             normalized_body = self.normalize_text(str(body_tag))
             normalized_soup = BeautifulSoup(normalized_body, "html.parser")
+            if has_promo:
+                # get index out of css selector and manually remove it via decompose
+                idx = int(re.match(re.compile(".+p:nth-last-child\\((\\d)\\).+"), has_promo.text)[1])
+                random_self_promo = normalized_soup.find_all("p")[-idx]
+                if isinstance(random_self_promo, Tag):
+                    random_self_promo.decompose()
             return normalized_soup
         return body_tag