-
-
Notifications
You must be signed in to change notification settings - Fork 300
/
noveldeglace.py
95 lines (76 loc) · 3.22 KB
/
noveldeglace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import logging
from lncrawl.core.crawler import Crawler
from lncrawl.core.exeptions import LNException
from lncrawl.models.chapter import Chapter
from lncrawl.models.volume import Volume
logger = logging.getLogger(__name__)
class NovelDeGlace(Crawler):
base_url = "https://noveldeglace.com/"
last_updated = "2024-02-22"
has_mtl = False
def read_novel_info(self) -> None:
logger.debug("Visiting %s", self.novel_url)
soup = self.get_soup(self.novel_url)
novel_details = soup.select_one("div.entry-content")
if not novel_details:
raise LNException("Failed to find novel details")
for div in novel_details.find_all("div", class_="line_roman"):
strong = div.find("strong")
if not strong:
continue
strong_text = strong.text.strip()
if strong_text == "Titre complet :":
self.novel_title = (
div.text.split(":")[1].split("RSS")[0].split("CMS")[0].strip()
)
elif strong_text == "Auteur :":
self.novel_author = div.text.split(":")[1].strip()
if not self.novel_title:
logger.debug("Failed to find novel title")
tabs = soup.select_one("div.su-tabs-panes")
if not tabs:
raise LNException("Failed to find chapters")
volume_id = 0
rows = tabs.find_all("div", class_="su-row")
for row in rows:
img = row.find("img")
if img:
self.novel_cover = img["src"]
else:
logger.debug("Failed to find novel cover")
uls = row.find_all("ul")
if len(uls) == 0:
raise LNException("Failed to find chapters")
volume_span = row.find("span", class_="roman volume")
for ul in uls: # There is one ul for each arc
chapters_lis = ul.find_all("li")
for li in chapters_lis:
a = li.find("a")
if a and a.has_attr("href"):
self.chapters.append(
Chapter(
id=len(self.chapters) + 1,
title=a.text.strip(),
url=a["href"],
volume=volume_id,
volume_title=volume_span.text.strip(),
)
)
else:
logger.debug("Failed to find chapter link")
self.volumes.append(Volume(id=volume_id, title=volume_span.text.strip()))
volume_id += 1
def download_chapter_body(self, chapter: Chapter) -> str:
logger.debug("Visiting %s", chapter.url)
soup = self.get_soup(chapter.url)
body = soup.select_one("div.content-tome")
if not body:
body = soup.select_one("div.entry-content-chapitre")
if body.h2:
body.h2.decompose()
mistape_caption = body.find("div", class_="mistape_caption")
if mistape_caption:
mistape_caption.decompose()
if not body:
raise LNException("Failed to find chapter content")
return str(body)