Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A better version of PR #2155. #2196

Merged
merged 8 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 21 additions & 18 deletions lncrawl/assets/web/script.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@ window.addEventListener("keyup", function (evt) {
window.location.href = href;
}

switch (evt.key) {
case "ArrowLeft":
goToHref(document.querySelector("a.prev-button"));
break;
case "ArrowRight":
goToHref(document.querySelector("a.next-button"));
break;
default:
break;
}
});

switch (evt.key) {
case "ArrowLeft":
goToHref(document.querySelector("a.prev-button"));
break;
case "ArrowRight":
goToHref(document.querySelector("a.next-button"));
break;
default:
break;
}
});

// Handle next TOC select
function addTocSelectListener() {
Expand All @@ -29,18 +30,20 @@ function addTocSelectListener() {
}

// Handle update reading progress on scroll
window.addEventListener("scroll", function (e) {
try {
let debounceTimeout;
function debouncedUpdate(evt) {
clearTimeout(debounceTimeout);
debounceTimeout = setTimeout(() => {
var scroll = window.scrollY;
var height = document.body.scrollHeight - window.innerHeight + 10;
var percent = Math.round((100.0 * scroll) / height);
document.getElementById("readpos").innerText = percent + "%";
} catch (err) {
// ignore
}
});
}, 100); // 100ms delay
}


window.addEventListener("scroll", debouncedUpdate);

// Add element wise listeners after page load
window.addEventListener("load", function (evt) {
addTocSelectListener();
});
30 changes: 12 additions & 18 deletions lncrawl/binders/calibre.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,50 @@
import logging
import os
import subprocess

from typing import Union, List
logger = logging.getLogger(__name__)

EBOOK_CONVERT = "ebook-convert"
CALIBRE_LINK = "https://calibre-ebook.com/download"


def run_ebook_convert(*args):
def run_ebook_convert(*args) -> bool:
"""
Calls `ebook-convert` with given args
Visit https://manual.calibre-ebook.com/generated/en/ebook-convert.html for argument list.
"""
try:
isdebug = os.getenv("debug_mode")
with open(os.devnull, "w", encoding="utf8") as dumper:
subprocess.call(
subprocess.run(
args=[EBOOK_CONVERT] + list(args),
stdout=None if isdebug else dumper,
stderr=None if isdebug else dumper,
check=True
)

return True
except Exception:
if logger.isEnabledFor(logging.DEBUG):
logger.exception("Failed to convert ebook with args: %s", list(args))

except subprocess.CalledProcessError:
logger.exception("Failed to convert ebook with args: %s", list(args))
return False
except Exception as e:
logger.exception("An unexpected error occurred: %s", str(e))
return False


def epub_to_calibre(app, epub_file, out_fmt):
def epub_to_calibre(app, epub_file, out_fmt) -> Union[str, None]:
if not os.path.exists(epub_file):
return None

epub_path = os.path.dirname(epub_file)
epub_file_name = os.path.basename(epub_file)
file_name_without_ext = epub_file_name.replace(".epub", "")
file_name_without_ext, _ = os.path.splitext(epub_file_name)

work_path = os.path.dirname(epub_path)
out_path = os.path.join(work_path, out_fmt)
out_file_name = file_name_without_ext + "." + out_fmt
out_file = os.path.join(out_path, out_file_name)

os.makedirs(out_path, exist_ok=True)

logger.debug('Converting "%s" to "%s"', epub_file, out_file)

args = [
epub_file,
out_file,
Expand Down Expand Up @@ -82,9 +80,7 @@ def epub_to_calibre(app, epub_file, out_fmt):
"--pdf-header-template",
'<p style="text-align:center; color:#555; font-size:0.9em">⦗ _TITLE_ &mdash; _SECTION_ ⦘</p>',
]

run_ebook_convert(*args)

if os.path.exists(out_file):
logger.info("Created: %s" % out_file_name)
return out_file
Expand All @@ -93,17 +89,15 @@ def epub_to_calibre(app, epub_file, out_fmt):
return None


def make_calibres(app, epubs, out_fmt):
def make_calibres(app, epubs, out_fmt) -> List[str]:
if out_fmt == "epub" or not epubs:
return epubs

if not run_ebook_convert("--version"):
logger.error("Install Calibre to generate %s: %s", out_fmt, CALIBRE_LINK),
return

out_files = []
for epub in epubs:
out = epub_to_calibre(app, epub, out_fmt)
out_files += [out]

return out_files
2 changes: 2 additions & 0 deletions lncrawl/core/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def __init__(self) -> None:
[
# css selector to select and remove tags
".adblock-service",
".sharedaddy",
".saboxplugin-wrap",
".adbox",
".ads-middle",
".ads",
Expand Down
1 change: 1 addition & 0 deletions sources/en/m/mangarosie.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class MangaRosieCrawler(Crawler):
base_url = [
"https://mangarosie.me/",
"https://mangarosie.love/",
"https://toon69.com/",
]

search_url = "%s?s=%s&post_type=wp-manga&author=&artist=&release="
Expand Down
74 changes: 74 additions & 0 deletions sources/en/w/whitemoonlightnovels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
import logging
import re
from lncrawl.core.crawler import Crawler

logger = logging.getLogger(__name__)
search_url = "https://whitemoonlightnovels.com/?s=%s"


class NovelsEmperorCrawler(Crawler):
base_url = ["https://whitemoonlightnovels.com/"]

def search_novel(self, query):
query = query.lower().replace(" ", "+")
soup = self.get_soup(search_url % query)

results = []
for tab in soup.select("div > article"):
a = tab.select_one("div > article div.mdthumb a")
title = a["href"][39:].replace("-", " ")
img = tab.select_one("img")["src"]
results.append(
{
"title": title,
"url": self.absolute_url(a["href"]),
"img": img,
}
)

return results

def read_novel_info(self):
logger.debug("Visiting %s", self.novel_url)
soup = self.get_soup(self.novel_url)

possible_title = soup.select_one("h1.entry-title")
assert possible_title, "No novel title"
self.novel_title = possible_title.text
logger.info("Novel title: %s", self.novel_title)

author = soup.select(".serval a")
if len(author) == 2:
self.novel_author = author[0].text + " (" + author[1].text + ")"
else:
self.novel_author = author[0].text
logger.info("Novel author: %s", self.novel_author)

self.novel_cover = self.absolute_url(
soup.select_one("img.size-post-thumbnail")["src"]
)
logger.info("Novel cover: %s", self.novel_cover)

for a in soup.select("div.eplister ul li a"):
ch_title = a.select_one("div.epl-title").text
ch_id = [int(x) for x in re.findall(r"\d+", ch_title)]
ch_id = ch_id[0] if len(ch_id) else len(self.chapters) + 1
self.chapters.append(
{
"id": ch_id,
"title": ch_title,
"url": self.absolute_url(a["href"]),
}
)

logger.debug(
"%d chapters and %d volumes found", len(self.chapters), len(self.volumes)
)

def download_chapter_body(self, chapter):
soup = self.get_soup(chapter["url"])

contents = soup.select_one("div.epcontent")
contents = self.cleaner.extract_contents(contents)
return str(contents)