diff --git a/lncrawl/assets/web/script.js b/lncrawl/assets/web/script.js index d39a0ccb0..c5f2b45be 100644 --- a/lncrawl/assets/web/script.js +++ b/lncrawl/assets/web/script.js @@ -7,17 +7,18 @@ window.addEventListener("keyup", function (evt) { window.location.href = href; } - switch (evt.key) { - case "ArrowLeft": - goToHref(document.querySelector("a.prev-button")); - break; - case "ArrowRight": - goToHref(document.querySelector("a.next-button")); - break; - default: - break; - } -}); + + switch (evt.key) { + case "ArrowLeft": + goToHref(document.querySelector("a.prev-button")); + break; + case "ArrowRight": + goToHref(document.querySelector("a.next-button")); + break; + default: + break; + } + }); // Handle next TOC select function addTocSelectListener() { @@ -29,18 +30,20 @@ function addTocSelectListener() { } // Handle update reading progress on scroll -window.addEventListener("scroll", function (e) { - try { +let debounceTimeout; +function debouncedUpdate(evt) { + clearTimeout(debounceTimeout); + debounceTimeout = setTimeout(() => { var scroll = window.scrollY; var height = document.body.scrollHeight - window.innerHeight + 10; var percent = Math.round((100.0 * scroll) / height); document.getElementById("readpos").innerText = percent + "%"; - } catch (err) { - // ignore - } -}); + }, 100); // 100ms delay +} + + +window.addEventListener("scroll", debouncedUpdate); -// Add element wise listeners after page load window.addEventListener("load", function (evt) { addTocSelectListener(); }); diff --git a/lncrawl/binders/calibre.py b/lncrawl/binders/calibre.py index bde18b797..46292e68b 100644 --- a/lncrawl/binders/calibre.py +++ b/lncrawl/binders/calibre.py @@ -1,14 +1,13 @@ import logging import os import subprocess - +from typing import Union, List logger = logging.getLogger(__name__) - EBOOK_CONVERT = "ebook-convert" CALIBRE_LINK = "https://calibre-ebook.com/download" -def run_ebook_convert(*args): +def run_ebook_convert(*args) -> bool: """ Calls `ebook-convert` with given args Visit https://manual.calibre-ebook.com/generated/en/ebook-convert.html for argument list. @@ -16,37 +15,36 @@ def run_ebook_convert(*args): try: isdebug = os.getenv("debug_mode") with open(os.devnull, "w", encoding="utf8") as dumper: - subprocess.call( + subprocess.run( args=[EBOOK_CONVERT] + list(args), stdout=None if isdebug else dumper, stderr=None if isdebug else dumper, + check=True ) return True - except Exception: - if logger.isEnabledFor(logging.DEBUG): - logger.exception("Failed to convert ebook with args: %s", list(args)) - + except subprocess.CalledProcessError: + logger.exception("Failed to convert ebook with args: %s", list(args)) + return False + except Exception as e: + logger.exception("An unexpected error occurred: %s", str(e)) return False -def epub_to_calibre(app, epub_file, out_fmt): +def epub_to_calibre(app, epub_file, out_fmt) -> Union[str, None]: if not os.path.exists(epub_file): return None epub_path = os.path.dirname(epub_file) epub_file_name = os.path.basename(epub_file) - file_name_without_ext = epub_file_name.replace(".epub", "") + file_name_without_ext, _ = os.path.splitext(epub_file_name) work_path = os.path.dirname(epub_path) out_path = os.path.join(work_path, out_fmt) out_file_name = file_name_without_ext + "." + out_fmt out_file = os.path.join(out_path, out_file_name) - os.makedirs(out_path, exist_ok=True) - logger.debug('Converting "%s" to "%s"', epub_file, out_file) - args = [ epub_file, out_file, @@ -82,9 +80,7 @@ def epub_to_calibre(app, epub_file, out_fmt): "--pdf-header-template", '

⦗ _TITLE_ — _SECTION_ ⦘

', ] - run_ebook_convert(*args) - if os.path.exists(out_file): logger.info("Created: %s" % out_file_name) return out_file @@ -93,17 +89,15 @@ def epub_to_calibre(app, epub_file, out_fmt): return None -def make_calibres(app, epubs, out_fmt): +def make_calibres(app, epubs, out_fmt) -> List[str]: if out_fmt == "epub" or not epubs: return epubs if not run_ebook_convert("--version"): logger.error("Install Calibre to generate %s: %s", out_fmt, CALIBRE_LINK), return - out_files = [] for epub in epubs: out = epub_to_calibre(app, epub, out_fmt) out_files += [out] - return out_files diff --git a/lncrawl/core/cleaner.py b/lncrawl/core/cleaner.py index ac74d5dfd..b0c8e131c 100644 --- a/lncrawl/core/cleaner.py +++ b/lncrawl/core/cleaner.py @@ -65,6 +65,8 @@ def __init__(self) -> None: [ # css selector to select and remove tags ".adblock-service", + ".sharedaddy", + ".saboxplugin-wrap", ".adbox", ".ads-middle", ".ads", diff --git a/sources/en/m/mangarosie.py b/sources/en/m/mangarosie.py index 51f0934df..0203952b5 100644 --- a/sources/en/m/mangarosie.py +++ b/sources/en/m/mangarosie.py @@ -11,6 +11,7 @@ class MangaRosieCrawler(Crawler): base_url = [ "https://mangarosie.me/", "https://mangarosie.love/", + "https://toon69.com/", ] search_url = "%s?s=%s&post_type=wp-manga&author=&artist=&release=" diff --git a/sources/en/w/whitemoonlightnovels.py b/sources/en/w/whitemoonlightnovels.py new file mode 100644 index 000000000..c7670a5db --- /dev/null +++ b/sources/en/w/whitemoonlightnovels.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +import logging +import re +from lncrawl.core.crawler import Crawler + +logger = logging.getLogger(__name__) +search_url = "https://whitemoonlightnovels.com/?s=%s" + + +class NovelsEmperorCrawler(Crawler): + base_url = ["https://whitemoonlightnovels.com/"] + + def search_novel(self, query): + query = query.lower().replace(" ", "+") + soup = self.get_soup(search_url % query) + + results = [] + for tab in soup.select("div > article"): + a = tab.select_one("div > article div.mdthumb a") + title = a["href"][39:].replace("-", " ") + img = tab.select_one("img")["src"] + results.append( + { + "title": title, + "url": self.absolute_url(a["href"]), + "img": img, + } + ) + + return results + + def read_novel_info(self): + logger.debug("Visiting %s", self.novel_url) + soup = self.get_soup(self.novel_url) + + possible_title = soup.select_one("h1.entry-title") + assert possible_title, "No novel title" + self.novel_title = possible_title.text + logger.info("Novel title: %s", self.novel_title) + + author = soup.select(".serval a") + if len(author) == 2: + self.novel_author = author[0].text + " (" + author[1].text + ")" + else: + self.novel_author = author[0].text + logger.info("Novel author: %s", self.novel_author) + + self.novel_cover = self.absolute_url( + soup.select_one("img.size-post-thumbnail")["src"] + ).split("?")[0] + logger.info("Novel cover: %s", self.novel_cover) + + for a in soup.select("div.eplister ul li a"): + ch_title = a.select_one("div.epl-title").text + ch_id = [int(x) for x in re.findall(r"\d+", ch_title)] + ch_id = ch_id[0] if len(ch_id) else len(self.chapters) + 1 + self.chapters.append( + { + "id": ch_id, + "title": ch_title, + "url": self.absolute_url(a["href"]), + } + ) + + logger.debug( + "%d chapters and %d volumes found", len(self.chapters), len(self.volumes) + ) + + def download_chapter_body(self, chapter): + soup = self.get_soup(chapter["url"]) + + contents = soup.select_one("div.epcontent") + contents = self.cleaner.extract_contents(contents) + return str(contents)