From 432a724d66219d14956c77242c8953ae781d3fcf Mon Sep 17 00:00:00 2001 From: jere344 <86294972+jere344@users.noreply.github.com> Date: Sun, 3 Nov 2024 10:19:17 -0500 Subject: [PATCH 1/3] added source mydramanovel.com --- sources/en/m/mydramanovel.py | 77 ++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 sources/en/m/mydramanovel.py diff --git a/sources/en/m/mydramanovel.py b/sources/en/m/mydramanovel.py new file mode 100644 index 000000000..be1191aaa --- /dev/null +++ b/sources/en/m/mydramanovel.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +import logging +import requests +from lncrawl.core.crawler import Crawler + +logger = logging.getLogger(__name__) + + +class MyDramaNovel(Crawler): + base_url = ["https://mydramanovel.com/"] + has_manga = False + has_mtl = True + + def read_novel_info(self): + soup = self.get_soup(self.novel_url) + + self.novel_synopsis = self.cleaner.extract_contents( + soup.find("div", {"class": "tagdiv-type"}) + ) + + self.novel_cover = self.absolute_url( + soup.find("span", {"class": "entry-thumb"}).get("data-img-url") + ) + + self.novel_title = soup.find("h1", {"class": "tdb-title-text"}).text + + # the synopsis may start like this : + # "

Original Title: 春花厌

Author: Hei Yan

Raw Link : Chun Hua Yan

Mal..." + # try to extract the author from the synopsis safely + try: + self.novel_author = ( + self.novel_synopsis.split("

Author:")[1].split("

")[0].strip() + ) + except: + self.novel_author = None + + self.volumes.append( + { + "id": 0, + "title": self.novel_title, + } + ) + + # the first five chapters are the first five a.td-image-wrap + # The rest are normal divs + preview_chapters = soup.select("a.td-image-wrap")[0:5] + for preview_chapter in preview_chapters: + self.chapters.append( + { + "id": len(self.chapters) + 1, + "volume": 0, + "url": self.absolute_url(preview_chapter.get("href")), + "title": preview_chapter.get("title"), + } + ) + + for chapter in soup.select( + "div.tdb_module_loop.td_module_wrap.td-animation-stack.td-cpt-post" + ): + chapter_title = chapter.select_one("h3.entry-title a") + if not chapter_title: + continue + self.chapters.append( + { + "id": len(self.chapters) + 1, + "volume": 0, + "url": self.absolute_url(chapter_title.get("href")), + "title": chapter_title.text, + } + ) + + def download_chapter_body(self, chapter): + soup = self.get_soup(chapter["url"]) + + content = soup.find("div", {"class": "tagdiv-type"}) + return self.cleaner.extract_contents(content) From 9253ccb7281de2bc85382a6ab9a5b5cf35f217ed Mon Sep 17 00:00:00 2001 From: jere344 <86294972+jere344@users.noreply.github.com> Date: Sun, 3 Nov 2024 10:34:51 -0500 Subject: [PATCH 2/3] Update mydramanovel.py lint --- sources/en/m/mydramanovel.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sources/en/m/mydramanovel.py b/sources/en/m/mydramanovel.py index be1191aaa..f7931019e 100644 --- a/sources/en/m/mydramanovel.py +++ b/sources/en/m/mydramanovel.py @@ -27,12 +27,14 @@ def read_novel_info(self): # the synopsis may start like this : # "

Original Title: 春花厌

Author: Hei Yan

Raw Link : Chun Hua Yan

Mal..." - # try to extract the author from the synopsis safely - try: - self.novel_author = ( - self.novel_synopsis.split("

Author:")[1].split("

")[0].strip() - ) - except: + parts = self.novel_synopsis.split("

Author:") + if len(parts) > 1: + author_part = parts[1].split("

") + if len(author_part) > 0: + self.novel_author = author_part[0].strip() + else: + self.novel_author = None + else: self.novel_author = None self.volumes.append( From def8081db50af60278f4d79bc4a94e5d09641c45 Mon Sep 17 00:00:00 2001 From: jere344 <86294972+jere344@users.noreply.github.com> Date: Sun, 3 Nov 2024 10:41:12 -0500 Subject: [PATCH 3/3] Update mydramanovel.py lint... --- sources/en/m/mydramanovel.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sources/en/m/mydramanovel.py b/sources/en/m/mydramanovel.py index f7931019e..7201ebc43 100644 --- a/sources/en/m/mydramanovel.py +++ b/sources/en/m/mydramanovel.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import logging -import requests from lncrawl.core.crawler import Crawler logger = logging.getLogger(__name__)