-
-
Notifications
You must be signed in to change notification settings - Fork 302
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
114 additions
and
124 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
3.5.1 | ||
3.5.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,137 +1,105 @@ | ||
""" | ||
To search for novels in selected sources | ||
""" | ||
import random | ||
import logging | ||
import os | ||
from concurrent import futures | ||
from typing import Dict, List | ||
|
||
from bs4 import Tag | ||
from concurrent.futures import Future | ||
from slugify import slugify | ||
from tqdm import tqdm | ||
|
||
from ..core.sources import crawler_list, prepare_crawler | ||
from ..models import CombinedSearchResult, SearchResult | ||
from .sources import crawler_list, prepare_crawler | ||
from .taskman import TaskManager | ||
|
||
SEARCH_TIMEOUT = 60 | ||
MAX_RESULTS = 15 | ||
|
||
logger = logging.getLogger(__name__) | ||
executor = futures.ThreadPoolExecutor(20) | ||
taskman = TaskManager(10) | ||
|
||
|
||
def _perform_search(app, link, bar): | ||
def _perform_search(app, link): | ||
from .app import App | ||
assert isinstance(app, App) | ||
try: | ||
crawler = prepare_crawler(link) | ||
results = [] | ||
for item in crawler.search_novel(app.user_input): | ||
if not item.get("url"): | ||
continue | ||
if not isinstance(item, SearchResult): | ||
item = SearchResult(**item) | ||
if not (item.url and item.title): | ||
continue | ||
results.append(item) | ||
|
||
logger.debug(results) | ||
logger.info("%d results from %s", len(results), link) | ||
logger.info(f"{len(results)} results from {link}") | ||
return results | ||
except KeyboardInterrupt as e: | ||
raise e | ||
except Exception: | ||
if logger.isEnabledFor(logging.DEBUG): | ||
logging.exception("<!> Search Failed! << %s >>", link) | ||
return [] | ||
|
||
|
||
def _combine_results(results: List[SearchResult]) -> List[CombinedSearchResult]: | ||
combined: Dict[str, List[SearchResult]] = {} | ||
for item in results: | ||
|
||
if item.title is None: | ||
logger.warn(f'Title is type None in {item}') | ||
continue | ||
elif isinstance(item.title, Tag): | ||
logger.warn(f'Title is type Tag in {item}') | ||
item.title = item.title.get_text() | ||
|
||
key = slugify(item.title) | ||
|
||
if len(key) <= 2: | ||
continue | ||
|
||
combined.setdefault(key, []) | ||
combined[key].append(item) | ||
|
||
processed: List[CombinedSearchResult] = [] | ||
for key, value in combined.items(): | ||
value.sort(key=lambda x: x.url) | ||
processed.append( | ||
CombinedSearchResult( | ||
id=key, | ||
title=value[0].title, | ||
novels=value, | ||
) | ||
) | ||
|
||
processed.sort(key=lambda x: -len(x.novels)) | ||
return processed[:15] # Control the number of results | ||
finally: | ||
app.progress += 1 | ||
|
||
|
||
def search_novels(app): | ||
from .app import App | ||
|
||
assert isinstance(app, App) | ||
|
||
if not app.crawler_links: | ||
return | ||
|
||
sources = app.crawler_links.copy() | ||
# random.shuffle(sources) | ||
|
||
is_debug = os.getenv("debug_mode") | ||
bar = tqdm( | ||
desc="Searching", | ||
total=len(sources), | ||
unit="source", | ||
disable=is_debug, | ||
) | ||
random.shuffle(sources) | ||
|
||
# Add future tasks | ||
checked = {} | ||
futures_to_check = [] | ||
checked = set() | ||
app.progress = 0 | ||
futures: List[Future] = [] | ||
for link in sources: | ||
crawler = crawler_list[link] | ||
if crawler in checked: | ||
bar.update() | ||
continue | ||
checked[crawler] = True | ||
future = executor.submit(_perform_search, app, link, bar) | ||
futures_to_check.append(future) | ||
checked.add(crawler) | ||
f = taskman.submit_task(_perform_search, app, link) | ||
futures.append(f) | ||
|
||
# Resolve all futures | ||
results: List[SearchResult] = [] | ||
for i, f in enumerate(futures_to_check): | ||
assert isinstance(f, futures.Future) | ||
try: | ||
f.result(SEARCH_TIMEOUT) | ||
except KeyboardInterrupt: | ||
break | ||
except TimeoutError: | ||
f.cancel() | ||
except Exception as e: | ||
if is_debug: | ||
logger.error("Failed to complete search", e) | ||
finally: | ||
app.progress += 1 | ||
bar.update() | ||
try: | ||
taskman.resolve_futures( | ||
futures, | ||
desc="Searching", | ||
unit="source", | ||
timeout=SEARCH_TIMEOUT, | ||
) | ||
except Exception: | ||
if logger.isEnabledFor(logging.DEBUG): | ||
logging.exception("<!> Search Failed!") | ||
|
||
# Cancel any remaining futures | ||
for f in futures_to_check: | ||
assert isinstance(f, futures.Future) | ||
if not f.done(): | ||
f.cancel() | ||
elif not f.cancelled(): | ||
results += f.result() | ||
# Combine the search results | ||
combined: Dict[str, List[SearchResult]] = {} | ||
for f in futures: | ||
if not f or not f.done() or f.cancelled(): | ||
continue | ||
for item in f.result() or []: | ||
if not item: | ||
continue | ||
key = slugify(item.title) | ||
if len(key) <= 2: | ||
continue | ||
combined.setdefault(key, []) | ||
combined[key].append(item) | ||
|
||
# Process combined search results | ||
app.search_results = _combine_results(results) | ||
bar.close() | ||
processed: List[CombinedSearchResult] = [] | ||
for key, value in combined.items(): | ||
value.sort(key=lambda x: x.url) | ||
processed.append( | ||
CombinedSearchResult( | ||
id=key, | ||
title=value[0].title, | ||
novels=value, | ||
) | ||
) | ||
processed.sort(key=lambda x: -len(x.novels)) | ||
app.search_results = processed[:MAX_RESULTS] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.