-
Notifications
You must be signed in to change notification settings - Fork 0
/
webscraper.py
93 lines (76 loc) · 2.45 KB
/
webscraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Local modules imports
from algorithms import PERCalculator
# Packages
from bs4 import BeautifulSoup
from functools import reduce
import re
import requests
# Prepare the xml for parsing
def cook_soup(url, data):
r = requests.post(url, data=data)
print(r.status_code)
return BeautifulSoup(r.text, "lxml")
# Make a dictionary of {Player : Team}
# Capturing AJAX and parsing
def parse(soup, sibl_end, team=False, name=False):
# Help function
# Search for next sibling, i.e. column until the specified number (determined by the sibl_end parameter)
def find_column(element):
count = 0
while count < sibl_end:
element = element.next_sibling
count += 1
return element.string
# Find teams' rows
trows = soup("td", class_="druzyna")
# Find player's rows
prows = soup("td", class_="zawodnik")
# If the team parameter is specified - return team stats. If not, check for player's, else return League stats
if team:
# Find the team's row
for ind, row in enumerate(trows):
a = row.find("strong")
#print(a) # Just for testing
if team in a.string:
break
return float(find_column(trows[ind]))
elif name:
for ind, row in enumerate(prows):
a = row.find("strong")
#print(a) # Just for testing
if name in a.string:
break
try: # minutes are given in mmm:sss format, so gotta check for that, f%#$ annoying
return float(find_column(prows[ind]))
except ValueError:
mo = re.search('(\d+):(\d+)', find_column(prows[ind]))
return float( float(mo.group(1)) + (float(mo.group(2))/60) )
else:
results = []
for row in trows:
results.append(find_column(row))
#print(results) # Just for testing
average = reduce(lambda x, y: float(x)+float(y), results)/(len(results)+1)
return round(average,2)
# Unfortunately need to make a very similar function, but for opponent stats
def opponent_parse(soup, sibl_end, team):
# Help function
# Search for next sibling, i.e. column until the specified number (determined by the sibl_end parameter)
def find_column(element):
count = 0
while count < sibl_end:
element = element.next_sibling
count += 1
return element.string
trows = soup("td", class_="druzyna")
results = []
for ind, row in enumerate(trows):
a = row.find("strong")
#print(a) # Just for testing
if team in a.string:
continue
else:
results.append(find_column(row))
#print(results) # Just for testing
average = reduce(lambda x, y: float(x)+float(y), results)/(len(results)+1)
return round(average,2)