Skip to content

Commit

Permalink
Merge pull request #2360 from zGadli/scrape-fix
Browse files Browse the repository at this point in the history
fix chapter detection
  • Loading branch information
dipu-bd authored Apr 30, 2024
2 parents f4c267c + 492d642 commit 9e70723
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions sources/en/l/luminarynovels.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging

from bs4 import BeautifulSoup, Tag

from lncrawl.templates.madara import MadaraTemplate

logger = logging.getLogger(__name__)
Expand All @@ -13,3 +15,26 @@ class Luminarynovels(MadaraTemplate):
def initialize(self) -> None:
# contains self-promo and discord link
self.cleaner.bad_css.add("div.chapter-warning.alert.alert-warning")

def select_chapter_tags(self, soup: BeautifulSoup):
try:
clean_novel_url = self.novel_url.split("?")[0].strip("/")
response = self.submit_form(f"{clean_novel_url}/ajax/chapters/", retry=0)
soup = self.make_soup(response)
chapters = soup.select(" div.page-content-listing.single-page > div > ul > li > a")
if not chapters:
raise Exception("No chapters on first URL")
except Exception:
nl_id = soup.select_one("#manga-chapters-holder[data-id]")
assert isinstance(nl_id, Tag)
response = self.submit_form(
f"{self.home_url}wp-admin/admin-ajax.php",
data={
"action": "manga_get_chapters",
"manga": nl_id["data-id"],
},
)
soup = self.make_soup(response)
chapters = soup.select("ul.main .wp-manga-chapter a")

yield from reversed(chapters)

0 comments on commit 9e70723

Please sign in to comment.