This repository has been archived by the owner on Jul 26, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 53
/
Pocket.recipe
225 lines (205 loc) · 8.81 KB
/
Pocket.recipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
"""
Pocket Calibre Recipe v1.4
"""
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from string import Template
import json
import operator
import re
import tempfile
import urllib
import urllib2
__license__ = 'GPL v3'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
2012-2013, tBunnyMan <Wag That Tail At Me dot com>
'''
class Pocket(BasicNewsRecipe):
title = 'Pocket'
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
description = '''Personalized news feeds. Go to getpocket.com to setup up
your news. This version displays pages of articles from
oldest to newest, with max & minimum counts, and marks
articles read after downloading.'''
publisher = 'getpocket.com'
category = 'news, custom'
#Settings people change
oldest_article = 7.0
max_articles_per_feed = 50
minimum_articles = 10
mark_as_read_after_dl = True # Set this to False for testing
sort_method = 'oldest' # MUST be either 'oldest' or 'newest'
# To filter by tag this needs to be a single tag in quotes; IE 'calibre'
only_pull_tag = None
#You don't want to change anything under
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
articles_are_obfuscated = True
apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
index_url = u'https://getpocket.com'
read_api_url = index_url + u'/v3/get'
modify_api_url = index_url + u'/v3/send'
legacy_login_url = index_url + u'/l' # We use this to cheat oAuth
articles = []
def get_browser(self, *args, **kwargs):
"""
We need to pretend to be a recent version of safari for the mac to
prevent User-Agent checks Pocket api requires username and password so
fail loudly if it's missing from the config.
"""
br = BasicNewsRecipe.get_browser(self,
user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; \
en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) \
Version/5.0.3 Safari/533.19.4')
if self.username is not None and self.password is not None:
br.open(self.legacy_login_url)
br.select_form(nr=0)
br['feed_id'] = self.username
br['password'] = self.password
br.submit()
else:
self.user_error("This Recipe requires authentication")
return br
def get_auth_uri(self):
"""Quick function to return the authentication part of the url"""
uri = ""
uri = u'{0}&apikey={1!s}'.format(uri, self.apikey)
if self.username is None or self.password is None:
self.user_error("Username or password is blank.")
else:
uri = u'{0}&username={1!s}'.format(uri, self.username)
uri = u'{0}&password={1!s}'.format(uri, self.password)
return uri
def get_pull_articles_uri(self):
uri = ""
uri = u'{0}&state={1}'.format(uri, u'unread')
uri = u'{0}&contentType={1}'.format(uri, u'article')
uri = u'{0}&sort={1}'.format(uri, self.sort_method)
uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed)
if self.only_pull_tag is not None:
uri = u'{0}&tag={1}'.format(uri, self.only_pull_tag)
return uri
def parse_index(self):
pocket_feed = []
fetch_url = u"{0}?{1}{2}".format(
self.read_api_url,
self.get_auth_uri(),
self.get_pull_articles_uri()
)
try:
request = urllib2.Request(fetch_url)
response = urllib2.urlopen(request)
pocket_feed = json.load(response)['list']
except urllib2.HTTPError as e:
self.log.exception("Pocket returned an error: {0}".format(e.info()))
return []
except urllib2.URLError as e:
self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
return []
if len(pocket_feed) < self.minimum_articles:
self.mark_as_read_after_dl = False
self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed)))
for pocket_article in pocket_feed.iteritems():
self.articles.append({
'item_id': pocket_article[0],
'title': pocket_article[1]['resolved_title'],
'date': pocket_article[1]['time_updated'],
'url': u'{0}/a/read/{1}'.format(self.index_url, pocket_article[0]),
'real_url': pocket_article[1]['resolved_url'],
'description': pocket_article[1]['excerpt'],
'sort': pocket_article[1]['sort_id']
})
self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
def get_textview(self, url):
"""
Since Pocket's v3 API they removed access to textview. They also
redesigned their page to make it much harder to scrape their textview.
We need to pull the article, retrieve the formcheck id, then use it
to querty for the json version
This function will break when pocket hates us
"""
ajax_url = self.index_url + u'/a/x/getArticle.php'
soup = self.index_to_soup(url)
fc_tag = soup.find('script', text=re.compile("formCheck"))
fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
article_id = url.split("/")[-1]
data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
try:
response = self.browser.open(ajax_url, data)
except urllib2.HTTPError as e:
self.log.exception("unable to get textview {0}".format(e.info()))
raise e
return json.load(response)['article']
def get_obfuscated_article(self, url):
"""
Our get_textview returns parsed json so prettify it to something well
parsed by calibre.
"""
article = self.get_textview(url)
template = Template('<h1>$title</h1><div class="body">$body</div>')
with tempfile.NamedTemporaryFile(delete=False) as tf:
tmpbody = article['article']
for img in article['images']:
imgdiv = '<div id="RIL_IMG_{0}" class="RIL_IMG"></div>'.format(article['images'][img]['image_id'])
imgtag = '<img src="{0}" \>'.format(article['images'][img]['src'])
tmpbody = tmpbody.replace(imgdiv, imgtag)
tf.write(template.safe_substitute(
title=article['title'],
body=tmpbody
))
return tf.name
def mark_as_read(self, mark_list):
actions_list = []
for article_id in mark_list:
actions_list.append({
'action': 'archive',
'item_id': article_id
})
mark_read_url = u'{0}?actions={1}{2}'.format(
self.modify_api_url,
json.dumps(actions_list, separators=(',', ':')),
self.get_auth_uri()
)
try:
request = urllib2.Request(mark_read_url)
response = urllib2.urlopen(request)
except urllib2.HTTPError as e:
self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e))
return []
except urllib2.URLError as e:
self.log.exception("Unable to connect to getpocket.com's modify api: {0}".format(e))
return []
def cleanup(self):
if self.mark_as_read_after_dl:
self.mark_as_read([x['item_id'] for x in self.articles])
else:
pass
def default_cover(self, cover_file):
"""
Create a generic cover for recipes that don't have a cover
This override adds time to the cover
"""
try:
from calibre.ebooks import calibre_cover
title = self.title if isinstance(self.title, unicode) else \
self.title.decode('utf-8', 'replace')
date = strftime(self.timefmt)
time = strftime('[%I:%M %p]')
img_data = calibre_cover(title, date, time)
cover_file.write(img_data)
cover_file.flush()
except:
self.log.exception('Failed to generate default cover')
return False
return True
def user_error(self, error_message):
if hasattr(self, 'abort_recipe_processing'):
self.abort_recipe_processing(error_message)
else:
self.log.exception(error_message)
raise RuntimeError(error_message)
# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4