Skip to content

Commit a79e6ad

Browse files
authored
Merge pull request #65 from ClericPy/dev
1.8.2
2 parents 138f83c + 50cf90c commit a79e6ad

File tree

9 files changed

+539
-154
lines changed

9 files changed

+539
-154
lines changed

watchdogs/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
from .config import Config
44
from .main import init_app
55

6-
__version__ = '1.8.1'
6+
__version__ = '1.8.2'
77
__all__ = ['Config', 'init_app']
88
logging.getLogger('watchdogs').addHandler(logging.NullHandler())

watchdogs/app.py

+96-5
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
from . import __version__
2121
from .config import md5_checker
2222
from .crawler import crawl_once, find_next_check_time
23-
from .models import Task, query_tasks, tasks
23+
from .models import Task, query_feeds, query_tasks, tasks
2424
from .settings import (Config, get_host_freq_list, refresh_token, release_app,
2525
set_host_freq, setup_app)
2626
from .utils import format_size, gen_rss
2727

28-
description = f"Watchdogs to keep an eye on the world's change.\nRead more: [https://github.com/ClericPy/watchdogs](https://github.com/ClericPy/watchdogs)"
28+
description = "Watchdogs to keep an eye on the world's change.\nRead more: [https://github.com/ClericPy/watchdogs](https://github.com/ClericPy/watchdogs)"
2929
app = FastAPI(title="Watchdogs", description=description, version=__version__)
3030
sub_app.openapi_prefix = '/uniparser'
3131
app.mount("/uniparser", sub_app)
@@ -113,8 +113,13 @@ async def index(request: Request, tag: str = ''):
113113
quoted_tag = quote_plus(tag)
114114
rss_sign = Config.get_sign('/rss', f'tag={quoted_tag}')[1]
115115
lite_sign = Config.get_sign('/lite', f'tag={quoted_tag}')[1]
116+
feeds_sign = Config.get_sign('/feeds', f'tag={quoted_tag}')[1]
117+
rss_feeds_sign = Config.get_sign('/rss_feeds', f'tag={quoted_tag}')[1]
116118
kwargs['rss_url'] = f'/rss?tag={quoted_tag}&sign={rss_sign}'
117119
kwargs['lite_url'] = f'/lite?tag={quoted_tag}&sign={lite_sign}'
120+
kwargs['feeds_url'] = f'/feeds?tag={quoted_tag}&sign={feeds_sign}'
121+
kwargs[
122+
'rss_feeds_url'] = f'/rss_feeds?tag={quoted_tag}&sign={rss_feeds_sign}'
118123
init_vars_json = dumps({
119124
'custom_links': Config.custom_links,
120125
'callback_workers': Config.callback_handler.workers,
@@ -303,7 +308,7 @@ async def crawler_rule(method: str,
303308
elif method == 'pop':
304309
_result = await Config.rule_db.pop_crawler_rule(rule)
305310
else:
306-
raise ValueError(f'method only support add and pop')
311+
raise ValueError('method only support add and pop')
307312
result = {'msg': 'ok', 'result': _result}
308313
except Exception as e:
309314
result = {'msg': repr(e)}
@@ -419,7 +424,7 @@ async def rss(request: Request,
419424
source_link = f'https://{host}'
420425
xml_data: dict = {
421426
'channel': {
422-
'title': f'Watchdogs',
427+
'title': 'Watchdogs',
423428
'description': f'Watchdog on web change, v{__version__}.',
424429
'link': source_link,
425430
},
@@ -505,7 +510,93 @@ async def lite(request: Request,
505510
else:
506511
last_page_url = ''
507512
context['last_page_url'] = last_page_url
508-
quoted_tag = quote_plus(tag)
509513
rss_sign = Config.get_sign('/rss', f'tag={quoted_tag}')[1]
510514
context['rss_url'] = f'/rss?tag={quoted_tag}&sign={rss_sign}'
511515
return templates.TemplateResponse("lite.html", context=context)
516+
517+
518+
@app.get("/feeds")
519+
async def feeds(
520+
request: Request,
521+
tag: str = '',
522+
# user: str = '',
523+
sign: str = '',
524+
page: int = 1,
525+
page_size: int = Config.default_page_size,
526+
):
527+
feeds, has_more = await query_feeds(tag=tag, page=page, page_size=page_size)
528+
now = datetime.now()
529+
_feeds = []
530+
current_date = None
531+
today = datetime.today().strftime('%Y-%m-%d')
532+
for feed in feeds:
533+
date = feed['ts_create'].strftime('%Y-%m-%d')
534+
if date != current_date:
535+
current_date = date
536+
if date == today:
537+
date += ' [Today]'
538+
_feeds.append({'current_date': date})
539+
feed['timeago'] = timeago((now - feed['ts_create']).total_seconds(),
540+
1,
541+
1,
542+
short_name=True)
543+
_feeds.append(feed)
544+
context = {'feeds': _feeds, 'request': request}
545+
context['version'] = __version__
546+
quoted_tag = quote_plus(tag)
547+
if has_more:
548+
next_page = page + 1
549+
sign = Config.get_sign('/feeds',
550+
f'tag={quoted_tag}&page={next_page}')[1]
551+
next_page_url = f'/feeds?tag={quoted_tag}&page={next_page}&sign={sign}'
552+
else:
553+
next_page_url = ''
554+
context['next_page_url'] = next_page_url
555+
if page > 1:
556+
last_page = page - 1
557+
sign = Config.get_sign('/feeds',
558+
f'tag={quoted_tag}&page={last_page}')[1]
559+
last_page_url = f'/feeds?tag={quoted_tag}&page={last_page}&sign={sign}'
560+
else:
561+
last_page_url = ''
562+
context['last_page_url'] = last_page_url
563+
rss_sign = Config.get_sign('/rss_feeds', f'tag={quoted_tag}')[1]
564+
context['rss_url'] = f'/rss_feeds?tag={quoted_tag}&sign={rss_sign}'
565+
return templates.TemplateResponse("feeds.html", context=context)
566+
567+
568+
@app.get("/rss_feeds")
569+
async def rss_feeds(request: Request,
570+
tag: str = '',
571+
sign: str = '',
572+
host: str = Header('', alias='Host')):
573+
feeds, _ = await query_feeds(tag=tag)
574+
source_link = f'https://{host}'
575+
xml_data: dict = {
576+
'channel': {
577+
'title': 'Watchdogs Timeline',
578+
'description': f'Watchdog on web change, v{__version__}.',
579+
'link': source_link,
580+
},
581+
'items': []
582+
}
583+
for feed in feeds:
584+
pubDate: str = feed['ts_create'].strftime(
585+
format='%a, %d %b %Y %H:%M:%S')
586+
link: str = feed['url']
587+
description: str = feed['text']
588+
title: str = f'{feed["name"]}#{description[:80]}'
589+
item: dict = {
590+
'title': title,
591+
'link': link,
592+
'guid': str(feed['id']),
593+
'description': description,
594+
'pubDate': pubDate
595+
}
596+
xml_data['items'].append(item)
597+
xml: str = gen_rss(xml_data)
598+
response = Response(
599+
content=xml,
600+
media_type="application/xml",
601+
headers={'Content-Type': 'application/xml; charset="utf-8"'})
602+
return response

watchdogs/config.py

+3
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def get_sign(path, query):
6060
given_sign = value
6161
else:
6262
query_list.append(f'{key}={value}')
63+
query_list.sort()
6364
valid_sign = md5(f'{path}?{"&".join(query_list)}')
6465
return given_sign, valid_sign
6566

@@ -160,7 +161,9 @@ class Config:
160161
'dispatch': auth_checker
161162
}]
162163
md5_cache_maxsize = 128
164+
query_task_ids_cache_maxsize = 128
163165
query_tasks_cache_maxsize = 128
166+
query_feeds_cache_maxsize = 128
164167
metas_cache_maxsize = 128
165168
sign_cache_maxsize = 128
166169
_md5 = _md5

watchdogs/crawler.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
from asyncio import ensure_future, wait
44
from datetime import datetime, timedelta
55
from json import JSONDecodeError, dumps, loads
6+
from traceback import format_exc
67
from typing import Optional, Tuple
78

89
from torequests.utils import timeago, ttime
910
from uniparser import Crawler, RuleNotFoundError
1011

1112
from .config import Config
12-
from .models import Database, Task, query_tasks, tasks
13+
from .models import Database, Task, query_feeds, query_tasks, tasks
1314
from .utils import check_work_time, get_watchdog_result, solo, try_catch
1415

1516

@@ -260,6 +261,7 @@ async def _crawl_once(task_name: Optional[str] = None, chunk_size: int = 20):
260261
)
261262
for task in changed_tasks:
262263
ensure_future(try_catch(Config.callback_handler.callback, task))
264+
await save_feeds(changed_tasks, db)
263265
else:
264266
logger.info(f'Crawl task_name={task_name} finished. 0 todo.')
265267
if CLEAR_CACHE_NEEDED:
@@ -279,3 +281,29 @@ async def crawl_once(task_name: Optional[str] = None):
279281
with solo:
280282
result = await try_catch(_crawl_once, task_name)
281283
return result
284+
285+
286+
async def save_feeds(tasks, db):
287+
if not tasks:
288+
return
289+
try:
290+
values = []
291+
for task in tasks:
292+
latest_result = loads(
293+
task.latest_result) if task.latest_result else {}
294+
text = latest_result.get('text') or latest_result.get('title') or ''
295+
value = {
296+
'task_id': task.task_id,
297+
'name': task.name,
298+
'text': text,
299+
'url': latest_result.get('url') or task.origin_url,
300+
'ts_create': datetime.now(),
301+
}
302+
values.append(value)
303+
query = "INSERT INTO feeds (`task_id`, `name`, `text`, `url`, `ts_create`) values (:task_id, :name, :text, :url, :ts_create)"
304+
result = await db.execute_many(query=query, values=values)
305+
Config.logger.info(f'Insert task logs success: ({len(values)})')
306+
query_feeds.cache_clear()
307+
return result
308+
except Exception:
309+
Config.logger.error(f'Inserting task logs failed: {format_exc()}')

0 commit comments

Comments
 (0)