From bb6c23727203e32ee1b9fed1b6b1d92a61a5d222 Mon Sep 17 00:00:00 2001 From: zGadli Date: Sun, 12 Nov 2023 12:34:38 +0530 Subject: [PATCH 1/6] Update script.js --- lncrawl/assets/web/script.js | 39 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/lncrawl/assets/web/script.js b/lncrawl/assets/web/script.js index d39a0ccb0..c5f2b45be 100644 --- a/lncrawl/assets/web/script.js +++ b/lncrawl/assets/web/script.js @@ -7,17 +7,18 @@ window.addEventListener("keyup", function (evt) { window.location.href = href; } - switch (evt.key) { - case "ArrowLeft": - goToHref(document.querySelector("a.prev-button")); - break; - case "ArrowRight": - goToHref(document.querySelector("a.next-button")); - break; - default: - break; - } -}); + + switch (evt.key) { + case "ArrowLeft": + goToHref(document.querySelector("a.prev-button")); + break; + case "ArrowRight": + goToHref(document.querySelector("a.next-button")); + break; + default: + break; + } + }); // Handle next TOC select function addTocSelectListener() { @@ -29,18 +30,20 @@ function addTocSelectListener() { } // Handle update reading progress on scroll -window.addEventListener("scroll", function (e) { - try { +let debounceTimeout; +function debouncedUpdate(evt) { + clearTimeout(debounceTimeout); + debounceTimeout = setTimeout(() => { var scroll = window.scrollY; var height = document.body.scrollHeight - window.innerHeight + 10; var percent = Math.round((100.0 * scroll) / height); document.getElementById("readpos").innerText = percent + "%"; - } catch (err) { - // ignore - } -}); + }, 100); // 100ms delay +} + + +window.addEventListener("scroll", debouncedUpdate); -// Add element wise listeners after page load window.addEventListener("load", function (evt) { addTocSelectListener(); }); From 939218293247d386ee760ebb49cd4a993867b4cc Mon Sep 17 00:00:00 2001 From: zGadli Date: Sun, 12 Nov 2023 13:14:20 +0530 Subject: [PATCH 2/6] Update calibre.py --- lncrawl/binders/calibre.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/lncrawl/binders/calibre.py b/lncrawl/binders/calibre.py index bde18b797..f42acfd4d 100644 --- a/lncrawl/binders/calibre.py +++ b/lncrawl/binders/calibre.py @@ -1,14 +1,12 @@ import logging import os import subprocess - logger = logging.getLogger(__name__) - EBOOK_CONVERT = "ebook-convert" CALIBRE_LINK = "https://calibre-ebook.com/download" -def run_ebook_convert(*args): +def run_ebook_convert(*args) -> bool: """ Calls `ebook-convert` with given args Visit https://manual.calibre-ebook.com/generated/en/ebook-convert.html for argument list. @@ -16,37 +14,36 @@ def run_ebook_convert(*args): try: isdebug = os.getenv("debug_mode") with open(os.devnull, "w", encoding="utf8") as dumper: - subprocess.call( + subprocess.run( args=[EBOOK_CONVERT] + list(args), stdout=None if isdebug else dumper, stderr=None if isdebug else dumper, + check=True ) return True - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Failed to convert ebook with args: %s", list(args)) - + except subprocess.CalledProcessError: + logger.exception("Failed to convert ebook with args: %s", list(args)) + return False + except Exception as e: + logger.exception("An unexpected error occurred: %s", str(e)) return False -def epub_to_calibre(app, epub_file, out_fmt): +def epub_to_calibre(app, epub_file, out_fmt) -> Union[str, None]: if not os.path.exists(epub_file): return None epub_path = os.path.dirname(epub_file) epub_file_name = os.path.basename(epub_file) - file_name_without_ext = epub_file_name.replace(".epub", "") + file_name_without_ext, _ = os.path.splitext(epub_file_name) work_path = os.path.dirname(epub_path) out_path = os.path.join(work_path, out_fmt) out_file_name = file_name_without_ext + "." + out_fmt out_file = os.path.join(out_path, out_file_name) - os.makedirs(out_path, exist_ok=True) - logger.debug('Converting "%s" to "%s"', epub_file, out_file) - args = [ epub_file, out_file, @@ -82,9 +79,7 @@ def epub_to_calibre(app, epub_file, out_fmt): "--pdf-header-template", '

⦗ _TITLE_ — _SECTION_ ⦘

', ] - run_ebook_convert(*args) - if os.path.exists(out_file): logger.info("Created: %s" % out_file_name) return out_file @@ -93,17 +88,15 @@ def epub_to_calibre(app, epub_file, out_fmt): return None -def make_calibres(app, epubs, out_fmt): +def make_calibres(app, epubs, out_fmt) -> List[str]: if out_fmt == "epub" or not epubs: return epubs if not run_ebook_convert("--version"): logger.error("Install Calibre to generate %s: %s", out_fmt, CALIBRE_LINK), return - out_files = [] for epub in epubs: out = epub_to_calibre(app, epub, out_fmt) out_files += [out] - - return out_files + return out_files \ No newline at end of file From 0e6f875df8a6241347c6aace35ee105b224530fc Mon Sep 17 00:00:00 2001 From: zGadli Date: Sun, 12 Nov 2023 14:33:20 +0530 Subject: [PATCH 3/6] Update calibre.py --- lncrawl/binders/calibre.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lncrawl/binders/calibre.py b/lncrawl/binders/calibre.py index f42acfd4d..46292e68b 100644 --- a/lncrawl/binders/calibre.py +++ b/lncrawl/binders/calibre.py @@ -1,6 +1,7 @@ import logging import os import subprocess +from typing import Union, List logger = logging.getLogger(__name__) EBOOK_CONVERT = "ebook-convert" CALIBRE_LINK = "https://calibre-ebook.com/download" @@ -99,4 +100,4 @@ def make_calibres(app, epubs, out_fmt) -> List[str]: for epub in epubs: out = epub_to_calibre(app, epub, out_fmt) out_files += [out] - return out_files \ No newline at end of file + return out_files From cb17eef802f2de0c53b15663d19e4b0dd4ec4e21 Mon Sep 17 00:00:00 2001 From: zGadli Date: Sun, 12 Nov 2023 16:12:27 +0530 Subject: [PATCH 4/6] adding another link to mangarosie.py --- sources/en/m/mangarosie.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sources/en/m/mangarosie.py b/sources/en/m/mangarosie.py index 51f0934df..0203952b5 100644 --- a/sources/en/m/mangarosie.py +++ b/sources/en/m/mangarosie.py @@ -11,6 +11,7 @@ class MangaRosieCrawler(Crawler): base_url = [ "https://mangarosie.me/", "https://mangarosie.love/", + "https://toon69.com/", ] search_url = "%s?s=%s&post_type=wp-manga&author=&artist=&release=" From 8c8319e590bc64ecfc024c622e8136fce5d0cbd5 Mon Sep 17 00:00:00 2001 From: zGadli Date: Sun, 12 Nov 2023 17:12:37 +0530 Subject: [PATCH 5/6] Added new source and some more css tags in the cleaner --- lncrawl/core/cleaner.py | 2 + sources/en/w/whitemoonlightnovels.py | 74 ++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 sources/en/w/whitemoonlightnovels.py diff --git a/lncrawl/core/cleaner.py b/lncrawl/core/cleaner.py index ac74d5dfd..b0c8e131c 100644 --- a/lncrawl/core/cleaner.py +++ b/lncrawl/core/cleaner.py @@ -65,6 +65,8 @@ def __init__(self) -> None: [ # css selector to select and remove tags ".adblock-service", + ".sharedaddy", + ".saboxplugin-wrap", ".adbox", ".ads-middle", ".ads", diff --git a/sources/en/w/whitemoonlightnovels.py b/sources/en/w/whitemoonlightnovels.py new file mode 100644 index 000000000..4b8703569 --- /dev/null +++ b/sources/en/w/whitemoonlightnovels.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +import logging +import re +from lncrawl.core.crawler import Crawler + +logger = logging.getLogger(__name__) +search_url = "https://whitemoonlightnovels.com/?s=%s" + + +class NovelsEmperorCrawler(Crawler): + base_url = ["https://whitemoonlightnovels.com/"] + + def search_novel(self, query): + query = query.lower().replace(" ", "+") + soup = self.get_soup(search_url % query) + + results = [] + for tab in soup.select("div > article"): + a = tab.select_one("div > article div.mdthumb a") + title = a["href"][39:].replace("-", " ") + img = tab.select_one("img")["src"] + results.append( + { + "title": title, + "url": self.absolute_url(a["href"]), + "img": img, + } + ) + + return results + + def read_novel_info(self): + logger.debug("Visiting %s", self.novel_url) + soup = self.get_soup(self.novel_url) + + possible_title = soup.select_one("h1.entry-title") + assert possible_title, "No novel title" + self.novel_title = possible_title.text + logger.info("Novel title: %s", self.novel_title) + + author = soup.select(".serval a") + if len(author) == 2: + self.novel_author = author[0].text + " (" + author[1].text + ")" + else: + self.novel_author = author[0].text + logger.info("Novel author: %s", self.novel_author) + + self.novel_cover = self.absolute_url( + soup.select_one("img.size-post-thumbnail")["src"] + ) + logger.info("Novel cover: %s", self.novel_cover) + + for a in soup.select("div.eplister ul li a"): + ch_title = a.select_one("div.epl-title").text + ch_id = [int(x) for x in re.findall(r"\d+", ch_title)] + ch_id = ch_id[0] if len(ch_id) else len(self.chapters) + 1 + self.chapters.append( + { + "id": ch_id, + "title": ch_title, + "url": self.absolute_url(a["href"]), + } + ) + + logger.debug( + "%d chapters and %d volumes found", len(self.chapters), len(self.volumes) + ) + + def download_chapter_body(self, chapter): + soup = self.get_soup(chapter["url"]) + + contents = soup.select_one("div.epcontent") + contents = self.cleaner.extract_contents(contents) + return str(contents) From 212ae6d3089cb655e7c83734f655af81e9b5e0f6 Mon Sep 17 00:00:00 2001 From: jere344 <86294972+jere344@users.noreply.github.com> Date: Sun, 12 Nov 2023 13:49:38 -0500 Subject: [PATCH 6/6] Full quality cover --- sources/en/w/whitemoonlightnovels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/en/w/whitemoonlightnovels.py b/sources/en/w/whitemoonlightnovels.py index 4b8703569..c7670a5db 100644 --- a/sources/en/w/whitemoonlightnovels.py +++ b/sources/en/w/whitemoonlightnovels.py @@ -47,7 +47,7 @@ def read_novel_info(self): self.novel_cover = self.absolute_url( soup.select_one("img.size-post-thumbnail")["src"] - ) + ).split("?")[0] logger.info("Novel cover: %s", self.novel_cover) for a in soup.select("div.eplister ul li a"):