Skip to content

Commit a6723d4

Browse files
committed
First commit
0 parents  commit a6723d4

File tree

1,001 files changed

+1582
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,001 files changed

+1582
-0
lines changed

.gitignore

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
__pycache__/
2+
*.py[cod]
3+
4+
*.mp4
5+
*.json
6+
*.jpg
7+
8+
crawler/migrate
9+
crawler/tests
10+
config.py

README.md

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Peristop - Simple Periscope archiver
2+
3+
This application will retrieve or record the most popular Periscope broadcasts on a regular basis, and make these accessible through a handy Flask web interface.
4+
5+
Popular records are recorded live as soon as they reach a viewer thresold for the last 24h, and fully retrieved if replay is available. Chat is recorded too. Video chunk concatenation and thumbnail generation is done using ffmpeg.
6+
7+
Dependencies: Python ≥ 3.5, Flask, Python-Requests, AIOHTTP, PIL, Python-Websocket, Nginx, ffmpeg.
8+
9+
## Usage
10+
11+
You need a MySQL server running. Database schema is created through `crawler/scheme.sql`.
12+
13+
mysql -u username -p password < crawler/scheme.sql
14+
15+
Fill in a `config.py` file, based on `config.sample.py`.
16+
17+
Run those two scripts in different terminal tabs:
18+
19+
```
20+
$ cd crawler
21+
$ ./peristopd.py
22+
```
23+
24+
```
25+
$ cd webapp
26+
$ ./run.sh
27+
```
28+
29+
Then, launch nginx using the `nginx.conf` script.
30+
31+
```
32+
$ sudo nginx -c nginx.conf
33+
```
34+
35+
You will then access the Periscope interface on [http://localhost:80/](http://localhost:80/) (if you didn't change the port).

config.sample.py

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/python3
2+
3+
USER = 'your mysql user'
4+
PASSWORD = 'your mysql password'
5+
COUNTRY = 'FR' # Uppercase two-letter ISO code, or no code for recording worldwide
6+
COOKIE = 'your usual base64 Periscope cookie'

crawler/api.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/python3
2+
#-*- encoding: Utf-8 -*-
3+
from traceback import format_exc
4+
from requests import get, post
5+
from json import loads
6+
from time import sleep
7+
8+
from os.path import dirname, realpath
9+
__import__('sys').path.append(dirname(realpath(__file__)) + '/..')
10+
from config import COOKIE
11+
12+
headers = {"User-Agent": "tv.periscope.android/1.3.5 (1900208)",
13+
"package": "tv.periscope.android",
14+
"build": "37aaa50",
15+
"locale": "fr",
16+
"install_id": "1915a41ecaa5f41c-tv.periscope.android",
17+
"os": "5.1.1/22/LMY48T"}
18+
19+
headers2 = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'}
20+
21+
cookie = 'WGkGYTFQWEVkd1JCeXJqZXZo5aBR3R3o_PzVqgZs7niLEeb_6jYo0XjaOhe-D9Fhag==' # 1PXEdwRByrjevh
22+
23+
def call(endpoint, params={}):
24+
delay_429 = 2
25+
26+
while True:
27+
try:
28+
if 'Public' not in endpoint:
29+
resp = post('https://api.periscope.tv/api/v2/' + endpoint, json={
30+
**params,
31+
'cookie': COOKIE
32+
}, headers=headers, timeout=20)
33+
else:
34+
resp = get('https://api.periscope.tv/api/v2/' + endpoint, params,
35+
timeout=20, headers=headers2)
36+
37+
resp.encoding = 'utf-8'
38+
if not resp.text or resp.text[0] not in '[{':
39+
if resp.text.lower().strip() == 'not found':
40+
return {}
41+
42+
print('[!] %s: Failed with "%s" (%d), retrying in %d...' % (endpoint, repr(resp.text), resp.status_code, delay_429))
43+
sleep(delay_429)
44+
45+
delay_429 = min(delay_429 * 2, 30)
46+
else:
47+
break
48+
49+
except OSError: # name or service not known?
50+
print(format_exc())
51+
sleep(5)
52+
53+
return loads(resp.text)
54+
55+
def decodeUnk(str_):
56+
if str_:
57+
for enc in ['iso-8859-2', 'cp1252', 'sjis_2004']:
58+
try:
59+
str_ = str_.encode(enc).decode('utf8')
60+
break
61+
except (UnicodeEncodeError, UnicodeDecodeError):
62+
pass
63+
return str_

crawler/chat.py

+257
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
#!/usr/bin/python3
2+
#-*- encoding: Utf-8 -*-
3+
from websocket._exceptions import WebSocketConnectionClosedException
4+
from collections import OrderedDict, Counter
5+
from json import loads, load, dump, dumps
6+
from websocket import WebSocketApp
7+
from traceback import format_exc
8+
from unidecode import unidecode
9+
from subprocess import Popen
10+
from threading import Timer
11+
from h264 import H264Reader
12+
from time import time
13+
from re import sub
14+
from api import *
15+
16+
from logging import getLogger, DEBUG
17+
getLogger('websocket').setLevel(DEBUG)
18+
19+
import ssl
20+
ssl_defaults = ssl.get_default_verify_paths()
21+
sslopt_ca_certs = {'ca_certs': ssl_defaults.cafile}
22+
23+
tofilter = {}
24+
25+
CHAT = 1
26+
CONTROL = 2
27+
AUTH = 3
28+
29+
CHAT = 1
30+
HEART = 2
31+
JOIN = 3
32+
LOCATION = 4
33+
BROADCAST_ENDED = 5
34+
INVITE_FOLLOWERS = 6
35+
BROADCAST_STARTED_LOCALLY = 7
36+
BROADCASTER_UPLOADED_REPLAY = 8
37+
TIMESTAMP = 9
38+
LOCAL_PROMPT_TO_FOLLOW_BROADCASTER = 10
39+
LOCAL_PROMPT_TO_SHARE_BROADCAST = 11
40+
BROADCASTER_BLOCKED_VIEWER = 12
41+
SUBSCRIBER_SHARED_ON_TWITTER = 13
42+
SUBSCRIBER_BLOCKED_VIEWER = 14
43+
SUBSCRIBER_SHARED_ON_FACEBOOK = 15
44+
SCREENSHOT = 16
45+
46+
47+
class ChatEngine:
48+
def __init__(self, bcst, info, endpoint=None, token=None):
49+
self.bcst = bcst
50+
51+
self.chat = []
52+
53+
self.users = OrderedDict()
54+
self.users[info['user_id']] = (info['username'], info['user_display_name'], info.get('twitter_username'), info.get('profile_image_url'), 0)
55+
56+
# Connect to websocket
57+
if endpoint:
58+
self.endpoint = endpoint
59+
self.token = token
60+
61+
print('[[D]] Connecting %s...' % self.bcst)
62+
self.timeout = None
63+
self.connect()
64+
if self.timeout:
65+
self.timeout.cancel()
66+
67+
else:
68+
print('[[D]] Init DL %s...' % self.bcst)
69+
self.getViewers()
70+
71+
print('[[D]] Closed ChatEngine thread %s' % self.bcst)
72+
73+
def getUser(self, userid):
74+
viewer = call('getUserPublic', {'user_id': userid})
75+
76+
if 'user' in viewer:
77+
viewer = viewer['user']
78+
79+
if viewer.get('profile_image_urls'):
80+
small_pic = min(viewer['profile_image_urls'], key=lambda i: i['width'])['ssl_url']
81+
else:
82+
small_pic = None
83+
84+
self.users[viewer['id']] = (viewer['username'], viewer['display_name'], viewer['twitter_screen_name'], small_pic, 0)
85+
return True
86+
return False
87+
88+
def getViewers(self):
89+
# Download viewers info
90+
91+
viewers = call('getBroadcastViewers', {'broadcast_id': self.bcst})
92+
93+
for viewer in viewers['live'] + viewers['replay']:
94+
if viewer.get('profile_image_urls'):
95+
small_pic = min(viewer['profile_image_urls'], key=lambda i: i['width'])['ssl_url']
96+
else:
97+
small_pic = None
98+
99+
self.users[viewer['id']] = (viewer['username'], viewer['display_name'], viewer['twitter_screen_name'], small_pic, viewer['n_hearts_given'])
100+
101+
if not viewers['replay']:
102+
self.nbViewersLive = len(viewers['live'])
103+
104+
def ratamioche(self):
105+
print('[[D]] Asking close %s...' % self.bcst)
106+
self.ws.close()
107+
#self.close()
108+
109+
def connect(self):
110+
self.ended = True
111+
self.hadCtrl = False
112+
self.reconnectTime = time()
113+
114+
if self.timeout:
115+
self.timeout.cancel()
116+
117+
ws = WebSocketApp(self.endpoint.replace('https:','wss:') + '/chatapi/v1/chatnow',
118+
on_open = self.authentify,
119+
on_message = self.parse,
120+
on_error = self.error,
121+
on_close = self.close, header={'User-Agent': 'ChatMan/1 (Android) '})
122+
123+
self.timeout = Timer(80, self.ratamioche)
124+
self.timeout.daemon = True
125+
self.timeout.start()
126+
127+
self.ws = ws
128+
ws.run_forever(sslopt=sslopt_ca_certs, ping_timeout=90)
129+
130+
def authentify(self, ws):
131+
ws.send(dumps({'payload': dumps({'access_token': self.token}), 'kind': AUTH}))
132+
ws.send(dumps({'payload': dumps({'body': dumps({'room': self.bcst}), 'kind': CHAT}), 'kind': CONTROL}))
133+
134+
if len(self.users) <= 1:
135+
self.getViewers()
136+
137+
self.ended = False
138+
print('[[D]] Have logged %s...' % self.bcst)
139+
140+
def parse(self, ws=None, msg=None):
141+
if type(msg) == str:
142+
msg = loads(msg)
143+
if msg['kind'] == CONTROL:
144+
self.hadCtrl = True
145+
return
146+
147+
msg = loads(msg['payload'])
148+
body = loads(msg['body'])
149+
sender = msg['sender']
150+
151+
if sender['user_id'] not in self.users:
152+
if sender.get('username') or not self.getUser(sender['user_id']):
153+
self.users[sender['user_id']] = (sender.get('username'), sender.get('display_name'), sender.get('twitter_username'), sender.get('profile_image_url'), 0)
154+
155+
senderId = list(self.users).index(sender['user_id'])
156+
157+
iOSorAndroid = bool(body['timestamp'] >> 33)
158+
159+
tsServer = msg['timestamp'] / 1000000000
160+
tsClient = body['timestamp'] / 1000 if iOSorAndroid else body['timestamp']
161+
tsLive = (body['ntpForLiveFrame'] / 0x100000000 - 2208988800) if body.get('ntpForLiveFrame') else None
162+
tsBcster = (body['ntpForBroadcasterFrame'] / 0x100000000 - 2208988800) if body.get('ntpForBroadcasterFrame') else None
163+
tsOfDisplay = tsBcster or tsLive or tsClient
164+
165+
evdata = []
166+
if body['type'] == CHAT:
167+
evdata = [body['body']]
168+
elif body['type'] == LOCATION:
169+
evdata = [body['lat'], body['lng'], body.get('heading')]
170+
elif body['type'] == INVITE_FOLLOWERS:
171+
evdata = [body['invited_count']]
172+
elif body['type'] == BROADCASTER_BLOCKED_VIEWER:
173+
if body['broadcasterBlockedRemoteID'] not in self.users:
174+
self.users[body['broadcasterBlockedRemoteID']] = (body['broadcasterBlockedUsername'], None, None, None, 0)
175+
176+
evdata = [list(self.users).index(body['broadcasterBlockedRemoteID']), body.get('broadcasterBlockedMessageBody')]
177+
178+
elif body['type'] == BROADCAST_ENDED and ws:
179+
self.ended = True
180+
self.ratamioche()
181+
182+
self.chat.append([body['type'], senderId, tsOfDisplay] + evdata)
183+
184+
def error(self, ws, error):
185+
if type(error) != WebSocketConnectionClosedException:
186+
print('[[D]] Errored %s...' % self.bcst)
187+
self.ended = True
188+
189+
def close(self, ws=None):
190+
print('[[D]] Closed %s...' % self.bcst)
191+
if not self.ended and time() - 74 < self.reconnectTime < time() - 10 and self.hadCtrl:
192+
self.connect()
193+
elif len(self.users) > 1:
194+
print('[[D]] Saving %s...' % self.bcst)
195+
self.save()
196+
197+
def save(self):
198+
with open('storage/chat/%s.json' % self.bcst, 'w') as fd:
199+
dump({
200+
'users': list(self.users.values()),
201+
'chat': sorted(self.chat, key=lambda i: i[2]),
202+
'nbViewersLive': getattr(self, 'nbViewersLive', len(self.users) - 1)
203+
}, fd, ensure_ascii=False, separators=(',', ':'))
204+
205+
print('[[D]] Dumped %s' % self.bcst)
206+
207+
tmr = Timer(30, postProcessChat, (self.bcst,))
208+
tmr.start()
209+
#postProcessChat(self.bcst)
210+
211+
def postProcessChat(bcst, retry=False):
212+
try:
213+
with open('storage/chat/%s.json' % bcst) as fd:
214+
chat = load(fd)
215+
216+
meta = H264Reader(bcst).meta
217+
218+
with open('storage/chat/%s.json' % bcst) as fd:
219+
chat = load(fd)
220+
chat['timestamps'] = meta['timestamps'].copy()
221+
chat['orientations'] = meta['orientations'].copy()
222+
223+
with open('storage/chat/%s.json' % bcst, 'w') as fd:
224+
dump(chat, fd, ensure_ascii=False, separators=(',', ':'))
225+
226+
# Badword filter
227+
bad = set('elppin pin ssa boob xes evelne toh luc krewt ennob elleb notet nies ertnom'[::-1].split(' '))
228+
common = list('me sa est toi ca pas on t les en ton ou c qui peri un une le elle ta des je vous a de la va tu se ce pour lui il t\'es c\'est et te tes t\'a t\'as'.split(' '))
229+
230+
with open('storage/chat/%s.json' % bcst) as fd:
231+
chat = load(fd)
232+
233+
words = []
234+
for msg in chat['chat']:
235+
if msg[0] == 1: # CHAT
236+
msg = decodeUnk(msg[3])
237+
for word in msg.split(' '):
238+
word = sub('[.!?]', '', unidecode(word).lower()).rstrip('s')
239+
if word and word[0] != '@':
240+
words.append(word)
241+
words = dict(Counter(words))
242+
243+
for j in common:
244+
if j in words:
245+
del words[j]
246+
247+
words = sorted(words, key=lambda x: (words[x], x))
248+
letop = bad.intersection(words[-10:])
249+
250+
global tofilter
251+
tofilter[bcst] = bool(letop)
252+
253+
except Exception:
254+
if not retry:
255+
Timer(20 * 60, postProcessChat, (bcst, True)).start()
256+
257+
print(format_exc())

0 commit comments

Comments
 (0)