-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
58 lines (50 loc) · 1.9 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import requests
import json
from pprint import pprint
import re
import sys
def decode(s):
return s.replace("\\'", "'").replace("\\\\\"", "\\\"")
def scrapeInstagramData(username):
url = "https://www.instagram.com/" + username
r = requests.get(url)
s = str(r.content)
part1 = """<script type="text/javascript">window._sharedData = """
part2 = """;</script>"""
pattern = part1 + "(.*?)" + part2
result = re.search(pattern, s)
if result:
decoded = decode(result[1])
data = json.loads(decoded)
data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["edge_owner_to_timeline_media"]["edges"] = "----"
return data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["edge_followed_by"]["count"], data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["is_verified"]
else:
print("No data found for", username, file=sys.stderr)
def scrapeFacebookData(username):
url = "https://www.facebook.com/" + username
r = requests.get(url)
s = str(r.content)
verified = "Das blaue Verifizierungsabzeichen" in s
pattern = r"Gefällt ([\d\.]+) Mal"
result = re.search(pattern, s)
if result:
return int(result[1].replace(".", "")), verified
else:
print("No data found for", username, file=sys.stderr)
return 0, verified
def scrapeTwitterData(username):
url = "https://www.twitter.com/" + username
r = requests.get(url)
s = str(r.content)
verified = "ProfileHeaderCard-badges" in s
pattern = r' title="([\d\.]+) Follower"'
result = re.search(pattern, s)
if result:
return int(result[1].replace(".", "")), verified
else:
print("No data found for", username, file=sys.stderr)
return 0, verified
if __name__ == '__main__':
print(scrapeFacebookData("B90DieGruenen"))
print(scrapeTwitterData("Die_Gruenen"))
print(scrapeInstagramData("die_gruenen"))