Skip to content

Commit

Permalink
Merge pull request #2243 from NilanEkanayake/dev
Browse files Browse the repository at this point in the history
Fix Syosetu and Fanstrans
  • Loading branch information
dipu-bd authored Feb 12, 2024
2 parents a35ee6e + 79c47f4 commit 6eac1c0
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 19 deletions.
3 changes: 3 additions & 0 deletions sources/en/f/fanstrans.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def initialize(self) -> None:
r"^Get on Patreon",
r"^Check out other novels on Fan’s Translation~",
r"^to get Notification for latest Chapter Releases",
r"^Can’t wait to read more? Want to show your support? Click",
r"^to be a sponsor and get additional chapters ahead of time!",
]
)
self.cleaner.bad_tags.update(["a"])
Expand All @@ -36,6 +38,7 @@ class FansTranslations(Crawler):

def initialize(self) -> None:
self.cleaner.bad_tags.update(["h3"])
self.init_executor(4)

def search_novel(self, query):
query = query.lower().replace(" ", "+")
Expand Down
50 changes: 31 additions & 19 deletions sources/jp/s/syosetu.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ class SyosetuCrawler(Crawler):
has_mtl = True
base_url = "https://ncode.syosetu.com/"

def initialize(self) -> None:
self.init_executor(2)

def search_novel(self, query):
soup = self.get_soup(search_url % quote_plus(query))
results = []
Expand Down Expand Up @@ -45,28 +48,37 @@ def read_novel_info(self):
self.novel_author = author_tag.text.strip()

# Syosetu calls parts "chapters"
soups = []
pager_last = soup.select_one("a[class='novelview_pager-last']")
if pager_last and 'href' in pager_last.attrs:
page_num = int(pager_last["href"].split("=")[-1])
for x in range(1, page_num + 1):
soup = self.get_soup(f'{self.novel_url}?p={x}')
soups.append(soup)
else:
soups.append(soup)

volume_id = 0
chapter_id = 0
volume = {"id": 0}
self.volumes.append(volume)
for tag in soup.select(".index_box .chapter_title, .index_box .subtitle a"):
if 'chapter_title' in tag.attrs.get('class', ''):
# Part/volume (there might be none)
volume = {
"id": volume['id'] + 1,
"title": tag.text.strip(),
}
self.volumes.append(volume)
elif tag.name == "a":
# Chapter
chapter_id += 1
self.chapters.append(
{
self.volumes.append({'id': 0})
for soup in soups:
for tag in soup.select(".index_box .chapter_title, .index_box .subtitle a"):
if 'chapter_title' in tag.attrs.get('class', ''):
# Part/volume (there might be none)
volume_id += 1
self.volumes.append({
'id': volume_id,
'title': tag.text.strip(),
})
elif tag.name == "a":
# Chapter
chapter_id += 1
self.chapters.append({
"id": chapter_id,
"volume": volume['id'],
"title": tag.text.strip() or ("Chapter %d" % chapter_id),
"volume": volume_id,
"title": tag.text.strip(),
"url": self.absolute_url(tag["href"]),
}
)
})

def download_chapter_body(self, chapter):
soup = self.get_soup(chapter["url"])
Expand Down

0 comments on commit 6eac1c0

Please sign in to comment.