forked from miaowm5/RSSGen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider.py
103 lines (90 loc) · 3.08 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# !/usr/bin/env python
# -*- coding:utf-8 -*-
import leancloud, auth
from datetime import datetime, timedelta
from leancloud import Object, Query, LeanCloudError
from lib import PyRSS2Gen
import recipe
Feed = Object.extend('FeedItem')
TestFeed = Object.extend('TestFeedItem')
FeedInfo = Object.extend('FeedInfo')
DebugLog = Object.extend('DebugLog')
def get_info(name):
query = Query(FeedInfo).equal_to('name', name)
try: info = query.first()
except LeanCloudError, e:
if e.code == 101:
info = FeedInfo()
info.set('name', name)
else: raise(e)
return info
def save_data(data):
try: data.save()
except LeanCloudError, e: print "Save feed error: %s" % str(e)
def set_feed_data(item, name, data):
item = item()
item.set('name', name)
item.set('title', data[0])
item.set('time', data[1])
item.set('link', data[2])
item.set('content', data[3])
save_data(item)
def save_rss(name, recipe, item):
info = get_info(name)
rss = recipe(info=info)
print('Spider %s' % name)
count = 0
for data in rss.get_item():
count += 1
set_feed_data(item, name, data)
print('Spider over, add %s new feed' % count)
if count > 0: save_data(info)
if len(rss.log) == 0: return
log = DebugLog()
log.set('name', name)
for key,value in rss.log: log.set(str(key), value)
save_data(log)
def save():
for r in rss_list():
try: save_rss(r.name,r,Feed)
except Exception as e: print('save %s fail : %s' % (r.name, str(e)))
def force_save(feed_name=None):
for r in rss_list(all_feed=True):
name = r.name
if feed_name and feed_name != name: continue
try: save_rss(r.name,r,Feed)
except Exception as e: print('save %s fail : %s' % (r.name, str(e)))
def test_save(feed_name=None):
for r in rss_list(all_feed=True):
name = r.name
if feed_name and feed_name != name: continue
save_rss(name,r,TestFeed)
def rss_list(all_feed=False):
if all_feed: return set(recipe.recipe_list)
else: return (set(recipe.recipe_list) ^ set(recipe.hide_list))
def get_all_feed(name):
query = Query(Feed)
query.equal_to('name', name).descending("time")
return query.find()
def show(name):
rss = PyRSS2Gen.RSS2(title=name,link="https://github.com/miaowm5",description="RSSGen By Miaowm5")
for e in get_all_feed(name):
title = e.get('title')
time = e.get('time')
time = datetime(*(time.utctimetuple()[0:6]))
link = e.get('link')
content = e.get('content')
item = PyRSS2Gen.RSSItem(title=title,pubDate=time,
link=link,description=content)
rss.items.append(item)
return rss.to_xml(encoding='utf-8')
def clear():
for r in rss_list():
name = r.name
oldest = datetime.now() - timedelta(days=r.oldest)
remove = []
query = Query(Feed)
query.equal_to('name', name).less_than("time", oldest)
for e in query.find():
print('delete old feed: %s (%s)' % (e.get('title').encode('utf-8'), e.get('time')))
e.destroy()