Skip to content

Commit

Permalink
Merge pull request #2171 from neoryd/master
Browse files Browse the repository at this point in the history
new domain for allnovelfull and Added New Source (daotranslate.com)
  • Loading branch information
dipu-bd authored Oct 21, 2023
2 parents 44626fb + 498a5e7 commit 9cdf7b6
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 1 deletion.
5 changes: 4 additions & 1 deletion sources/en/a/allnovelfull.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@


class AllNovelFullCrawler(NovelFullTemplate):
base_url = ["https://allnovelfull.com/"]
base_url = [
"https://allnovelfull.com/",
"https://allnovelfull.net/"
]
90 changes: 90 additions & 0 deletions sources/en/d/daotranslate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
import logging

from bs4 import Tag

from lncrawl.core.crawler import Crawler

logger = logging.getLogger(__name__)
search_url = "https://daotranslate.com/?s=%s"


class DaoTranslateCrawler(Crawler):
base_url = "https://daotranslate.com/"
has_mtl= True

def initialize(self):
self.init_executor(ratelimit=1.1)

def search_novel(self, query):
query = query.lower().replace(" ", "+")
soup = self.get_soup(search_url % query)

results = []
for tab in soup.select("article.maindet")[:10]:
a = tab.select_one("h2 a")
if not isinstance(a, Tag):
continue

info = []
latest = tab.select_one(".mdinfodet .nchapter a")
if isinstance(latest, Tag):
info.append(latest.text.strip())

votes = tab.select_one(".mdinfodet .mdminf")
if isinstance(votes, Tag):
info.append("Rating: " + votes.text.strip())

results.append(
{
"title": a.text.strip(),
"url": self.absolute_url(a["href"]),
"info": " | ".join(info),
}
)

return results

def read_novel_info(self):
logger.debug("Visiting %s", self.novel_url)
soup = self.get_soup(self.novel_url)

self.novel_title = soup.select_one('.infox h1').text.strip()
logger.info('Novel title: %s', self.novel_title)

possible_image = soup.select_one(".thumbook .thumb img")
if isinstance(possible_image, Tag):
self.novel_cover = possible_image["data-src"]
logger.info("Novel cover: %s", self.novel_cover)


possible_author = soup.select_one(
".info-content .spe span:nth-child(3) a"
)
if isinstance(possible_author, Tag):
self.novel_author = possible_author.text.strip()
logger.info("Novel author: %s", self.novel_author)

possible_synopsis = soup.select(".entry-content p")
if possible_synopsis:
self.novel_synopsis = "".join([str(p) for p in possible_synopsis])
logger.info("Novel synopsis: %s", self.novel_synopsis)

for a in reversed(soup.select(".eplisterfull ul li a")):
chap_id = len(self.chapters) + 1
vol_id = 1 + len(self.chapters) // 100
if chap_id % 100 == 1:
self.volumes.append({"id": vol_id})
self.chapters.append(
{
"id": chap_id,
"volume": vol_id,
"title": a.select_one('.epl-title').text.strip(),
"url": self.absolute_url(a["href"]),
}
)

def download_chapter_body(self, chapter):
soup = self.get_soup(chapter["url"])
contents = soup.select(".epcontent.entry-content p")
return "".join([str(p) for p in contents])

0 comments on commit 9cdf7b6

Please sign in to comment.