Skip to content

Commit

Permalink
Update 69shuba.cx.py
Browse files Browse the repository at this point in the history
  • Loading branch information
zGadli authored and dipu-bd committed Jul 20, 2024
1 parent 5b931a3 commit 8c8cef4
Showing 1 changed file with 2 additions and 8 deletions.
10 changes: 2 additions & 8 deletions sources/zh/69shuba.cx.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ class sixnineshu(Crawler):
]

def initialize(self):
# the default lxml parser cannot handle the huge gbk encoded sites (fails after 4.3k chapters)
self.init_parser("html.parser")
self.init_executor(ratelimit=20)

Expand All @@ -49,7 +48,6 @@ def search_novel(self, query):
headers=headers,
data=data,
encoding="gbk",
# cookies=self.cookies2,
)

results = []
Expand Down Expand Up @@ -83,19 +81,15 @@ def read_novel_info(self):
self.novel_author = possible_author.text.strip()
logger.info("Novel Author: %s", self.novel_author)

# Only one category per novel on this website
possible_tag = soup.select_one('div.booknav2 > p:nth-child(4) > a')
if isinstance(possible_tag, Tag):
self.novel_tags = [possible_tag.text.strip()]
logger.info("Novel Tag: %s", self.novel_tags)

# https://www.69shuba.com/txt/A43616.htm -> https://www.69shuba.com/A43616/
# soup = self.get_soup(self.novel_url.replace("/txt/", "/").replace(".htm", "/"), encoding="gbk")

# manually correct their false chapter identifiers if need be
chapter_catalog = self.get_soup(f'{self.novel_url[:-4]}/', encoding="gbk")
# logger.debug(chapter_catalog.select("div#catalog li"))

chapter_list = chapter_catalog.select("div#catalog li")

for item in reversed(chapter_list):
chap_id = int(item["data-num"])
vol_id = len(self.chapters) // 100 + 1
Expand Down

0 comments on commit 8c8cef4

Please sign in to comment.