-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbaseball_reference.py
682 lines (546 loc) · 28.1 KB
/
baseball_reference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
"""
baseball_reference.py
Module used for scraping data from baseballreference.com
"""
import datetime
import bs4
from beautiful_soup_helper import *
from datetime import date, timedelta
BASE_URL = "http://www.baseball-reference.com"
HITTER_RELEVANT_STAT_KEYS = ["G", "PA", "AB", "R", "H", "2B", "3B", "HR", "RBI", "SB", "CS", "BB", "SO", "TB",
"GDP", "HBP", "SH", "SF", "IBB"]
class PlayerIdentifier(object):
def __init__(self, name: str, baseball_reference_id: str, team_abbrev: str):
self._name = name
self._baseball_reference_id = baseball_reference_id
self._team_abbrev = team_abbrev
def get_id(self) -> str:
return self._baseball_reference_id
def get_name(self) -> str:
return self._name
def get_last_name(self) -> str:
return " ".join(self._name.split()[1:])
def get_team(self) -> str:
return self._team_abbrev
def get_hitter_empty_stats() -> dict:
return {key: 0.0 for key in HITTER_RELEVANT_STAT_KEYS}
def get_season_hitter_identifiers(year: int) -> [PlayerIdentifier]:
"""
Given a year, get the name and Baseball Reference ID of all hitters that participated in that year
:param year: year of interest
:type year: int
:return: list of identifiers for a particular player
:rtype: [PlayerIdentifier]
"""
soup = get_hitter_soup(year)
season_hitter_ids = list()
try:
hitter_table = soup.find("table", {"id": "players_standard_batting"})
hitter_table = hitter_table.find("tbody")
hitter_table_rows = hitter_table.findAll("tr")
except AttributeError:
return season_hitter_ids
for hitter_table_row in hitter_table_rows:
if hitter_table_row.get("class")[0] != "thead":
try:
hitter_name_entry = hitter_table_row.find("td", {"data-stat": "player"}).find("a")
hitter_name = hitter_name_entry.text.replace(u'\xa0', ' ')
hitter_id = hitter_name_entry.get("href").split("/")
hitter_id = str(hitter_id[len(hitter_id)-1]).replace(".shtml", "")
team_entry = hitter_table_row.find("td", {"data-stat": "team_ID"}).find("a")
team_abbrev = team_entry.get("href").split("/")[2]
season_hitter_ids.append(PlayerIdentifier(hitter_name, hitter_id, team_abbrev))
except IndexError:
continue
except AttributeError:
continue
return season_hitter_ids
def get_hitter_id(full_name, team, year=None, soup=None):
""" Get the BaseballReference ID from the players name and team
:param full_name: the full name of the player
:param team: the BaseballReference team abbreviation
:param year: an integer representing the year of interest (this is particularly useful because players may
change teams (default is the current year)
:param soup: BeautifulSoup object of all players in the given year
:return: string representation of the player's BaseballReference ID
"""
if year is None:
year = date.today().year
if soup is None:
soup = get_hitter_soup(year)
if soup is None:
raise PlayerNameNotFound(full_name)
try:
hitter_table = soup.find("table", {"id": "players_standard_batting"})
hitter_table = hitter_table.find("tbody")
hitter_table_rows = hitter_table.findAll("tr")
except AttributeError:
raise PlayerNameNotFound(full_name)
for hitter_table_row in hitter_table_rows:
if hitter_table_row.get("class")[0] != "thead":
try:
hitter_entries = hitter_table_row.findAll("td")
hitter_name_entry = hitter_entries[0].find("a")
if hitter_name_entry.text.replace(u'\xa0', ' ') == full_name:
if team == hitter_entries[2].text:
hitter_id = hitter_name_entry.get("href").split("/")
return str(hitter_id[len(hitter_id)-1]).replace(".shtml", "")
except IndexError:
continue
except AttributeError:
continue
raise PlayerNameNotFound(full_name)
def get_season_pitcher_identifiers(year: int) -> [PlayerIdentifier]:
soup = get_pitcher_soup(year)
season_pitcher_ids = list()
try:
pitcher_table = soup.find("table", {"id": "players_standard_pitching"})
pitcher_table = pitcher_table.find("tbody")
pitcher_table_rows = pitcher_table.findAll("tr")
except AttributeError:
return season_pitcher_ids
for pitcher_table_row in pitcher_table_rows:
if pitcher_table_row.get("class")[0] != "thead":
try:
pitcher_entries = pitcher_table_row.findAll("td")
pitcher_name_entry = pitcher_entries[0].find("a")
pitcher_name = pitcher_name_entry.text.replace(u'\xa0', ' ')
pitcher_id = pitcher_name_entry.get("href").split("/")
pitcher_id = str(pitcher_id[len(pitcher_id) - 1]).replace(".shtml", "")
team_entry = pitcher_table_row.find("td", {"data-stat": "team_ID"}).find("a")
team_abbrev = team_entry.get("href").split("/")[2]
season_pitcher_ids.append(PlayerIdentifier(pitcher_name, pitcher_id, team_abbrev))
except IndexError:
continue
except AttributeError:
continue
return season_pitcher_ids
def get_pitcher_id(full_name, team, year=None, soup=None):
""" Get the BaseballReference ID from the players name and team
:param full_name: the full name of the player
:param team: the BaseballReference team abbreviation
:param year: an integer representing the year of interest (this is particularly useful because players may
change teams (default is the current year)
:param soup: BeautifulSoup object of all players in the given year
:return: string representation of the player's ID
"""
if year is None:
year = date.today().year
if soup is None:
soup = get_pitcher_soup(year)
try:
pitcher_table = soup.find("table", {"id": "players_standard_pitching"}).find("tbody")
pitcher_table_rows = pitcher_table.findAll("tr")
except AttributeError:
raise PlayerNameNotFound(full_name)
for pitcher_table_row in pitcher_table_rows:
if pitcher_table_row.get("class")[0] != "thead":
try:
pitcher_entries = pitcher_table_row.findAll("td")
pitcher_name_entry = pitcher_entries[0].find("a")
if pitcher_name_entry.text.replace(u'\xa0', ' ') == full_name:
if team == pitcher_entries[2].text:
pitcher_id = pitcher_name_entry.get("href").split("/")
return str(pitcher_id[len(pitcher_id)-1]).replace(".shtml", "")
except IndexError:
continue
except AttributeError:
continue
raise PlayerNameNotFound(full_name)
class PlayerNameNotFound(Exception):
def __init__(self, name_str):
super(PlayerNameNotFound, self).__init__("Player '%s' not found in the Baseball Reference page" % name_str)
def get_hitter_soup(year: int = None) -> bs4.BeautifulSoup:
"""
:param year: integer representation of the year of interest (default is current year)
:return: BeautifulSoup object of the home page for this hitter
"""
if year is None:
year = date.today().year
hitter_year_url = BASE_URL + "/leagues/MLB/" + str(year) + "-standard-batting.shtml"
return get_comment_soup_from_url(hitter_year_url)
def get_pitcher_soup(year: int = None) -> bs4.BeautifulSoup:
"""
:param year: integer representation of the year of interest (default is current year)
:return: BeautifulSoup object of the home page for this pitcher
"""
if year is None:
year = date.today().year
pitcher_year_url = BASE_URL + "/leagues/MLB/" + str(year) + "-standard-pitching.shtml"
return get_comment_soup_from_url(pitcher_year_url)
class TableNotFound(Exception):
def __init__(self, table_name):
super(TableNotFound, self).__init__("Table '%s' not found in the Baseball Reference page" % table_name)
class TableRowNotFound(Exception):
def __init__(self, table_row, table_column, table_name):
super(TableRowNotFound, self).__init__("Table row '%s' not found in the column '%s' in the "
"table %s in the Baseball Reference page" %
(table_row, table_column, table_name))
class DidNotFacePitcher(Exception):
def __init__(self, hitter_name, pitcher_name):
super(DidNotFacePitcher, self).__init__("Player %s has never faced pitcher %s" % hitter_name, pitcher_name)
def get_vs_table_row_dict(soup, batter_id, pitcher_id):
""" Special version of get_table_row_dict. Since Baseball Reference's batter vs. pitcher
tables don't really have a standardized row name, we have to just count the number of rows and
accumulate the stats.
:param soup: BeautifulSoup object containing the table HTML
:param batter_id: the Baseball Reference ID of the relevant batter
:param pitcher_id: the Baseball Reference ID of the relevant pitcher
:return: a dictionary representing the stats
"""
# Note: we seem to need BASE_URL as a prefix during unit tests
batter_vs_pitcher_base = "/baseball/batter_vs_pitcher.cgi?batter="
try:
results_table = soup.find("table", {"id": "result_table"})
table_header_list = results_table.find("thead").findAll("th")
table_header_list = [x.text for x in table_header_list]
table_body = results_table.find("tbody")
except AttributeError:
raise TableNotFound("ajax_result_table")
matching_url = batter_vs_pitcher_base + batter_id + "&pitcher=" + pitcher_id + "&post=0"
try:
stat_row = table_body.find("a", {"href": matching_url}).parent.parent
except AttributeError:
raise TableRowNotFound(matching_url, "NULL", "ajax_result_table")
# Create a dictionary of the stat attributes
stat_dict = dict()
stat_entries = stat_row.findAll("td")
# The names are now labeled as "th"
if len(stat_entries)+1 != len(table_header_list):
raise TableRowNotFound(matching_url, "NULL", "ajax_result_table")
for i in range(0, len(stat_entries)):
if stat_entries[i].text == "":
stat_dict[table_header_list[i+1]] = 0
else:
stat_dict[table_header_list[i+1]] = stat_entries[i].text.replace(u"\xa0", " ")
return stat_dict
def get_all_table_row_dicts(soup: bs4.BeautifulSoup, table_name: str) -> (list, [dict]):
"""
Get the column header labels as well as all rows in a given table in the given BeautifulSoup object
:param soup: BeautifulSoup object containing the table_name table as a child
:type soup: bs4.BeautifulSoup
:param table_name: name of the table of interest
:type table_name: str
:return: list of dictionaries of all rows in a given table
:rtype: [dict]
"""
results_table = soup.find("table", {"id": table_name})
if results_table is None:
raise TableNotFound(table_name)
table_header_list = results_table.find("thead").findAll("th")
table_header_list = [x.text for x in table_header_list]
stat_rows = results_table.findAll("tr")
stat_dict_list = list()
for stat_row in stat_rows:
# Create a dictionary of the stat attributes
stat_dict = dict()
stat_entries = stat_row.findAll(["th", "td"])
# The dictionary does not have valid entries, move on to the next row
if len(stat_entries) != len(table_header_list):
continue
for i in range(1, len(stat_entries)):
if stat_entries[i].text == "" or stat_entries[i].name != "td":
stat_dict[table_header_list[i]] = 0
else:
stat_dict[table_header_list[i]] = stat_entries[i].text.replace(u"\xa0", " ")
stat_dict_list.append(stat_dict)
return table_header_list, stat_dict_list
def get_table_row_dict(soup, table_name, table_row_label, table_column_label):
""" Get a dictionary representation of a Baseball Reference table of stats
:param soup: BeautifulSoup object containing the table HTML
:param table_name: HTML "id" tag for the table
:param table_row_label: bare text label for the row of interest
:param table_column_label: bare text label for the column of interest
:return: a dictionary representing the stats
"""
try:
table_header_list, stat_dicts = get_all_table_row_dicts(soup, table_name)
except AttributeError:
raise TableRowNotFound(table_row_label, table_column_label, table_name)
for stat_dict in stat_dicts:
try:
if stat_dict[table_column_label] == table_row_label:
return stat_dict
except KeyError:
raise TableRowNotFound(table_row_label, table_column_label, table_name)
raise TableRowNotFound(table_row_label, table_column_label, table_name)
def get_table_body_row_dict(soup, table_name, table_row_label, table_column_label):
""" Get a dictionary representation of a Baseball Reference table of stats
:param soup: BeautifulSoup object containing the table HTML
:param table_name: HTML "id" tag for the table
:param table_row_label: bare text label for the row of interest
:param table_column_label: bare text label for the column of interest
:return: a dictionary representing the stats
"""
results_table = soup.find("table", {"id": table_name})
if results_table is None:
raise TableNotFound(table_name)
try:
table_header_list = results_table.find("thead").findAll("th")
except AttributeError:
raise TableRowNotFound(table_row_label, table_column_label, table_name)
table_header_list = [x.text for x in table_header_list]
stat_rows = results_table.findAll("tr")
for stat_row in stat_rows:
# Create a dictionary of the stat attributes
stat_dict = dict()
stat_entries = stat_row.findAll(["th", "td"])
# The dictionary does not have valid entries, move on to the next row
if len(stat_entries) != len(table_header_list):
continue
for i in range(0, len(stat_entries)):
if stat_entries[i].text == "":
stat_dict[table_header_list[i]] = 0
else:
stat_dict[table_header_list[i]] = stat_entries[i].text.replace(u"\xa0", " ")
try:
if stat_dict[table_column_label] == table_row_label:
return stat_dict
except KeyError:
raise TableRowNotFound(table_row_label, table_column_label, table_name)
raise TableRowNotFound(table_row_label, table_column_label, table_name)
def get_career_regular_season_hitting_soup(hitter_id: str) -> BeautifulSoup:
url = BASE_URL + "/players/" + hitter_id[0] + "/" + str(hitter_id) + ".shtml"
return get_soup_from_url(url)
def get_career_postseason_hitting_soup(hitter_id: str) -> BeautifulSoup:
url = BASE_URL + "/players/" + hitter_id[0] + "/" + str(hitter_id) + ".shtml"
return get_comment_soup_from_url(url)
def get_hitting_stats_table(soup: BeautifulSoup, table_id: str) -> dict:
results_table = soup.find("table", {"id": table_id})
if results_table is None:
raise TableNotFound(table_id)
table_footer = results_table.find("tfoot")
if table_footer is None:
raise TableNotFound(table_id)
table_header = results_table.find("thead")
if table_header is None:
raise TableNotFound(table_id)
career_row_header = table_footer.find("th", {"data-stat": "player_stats_summary_explain"})
# If there is no data, then return zeros for all categories
if career_row_header is None:
return get_hitter_empty_stats()
career_row = career_row_header.parent
column_span = int(career_row_header["colspan"])
stat_labels = [x.text for x in table_header.findAll("th")[column_span:-2]]
stat_values = [x.text for x in career_row.findAll("td")[:-2]]
stat_dict = dict()
for idx in range(0, len(stat_labels)):
if stat_values[idx] == "":
stat_dict[stat_labels[idx]] = 0
else:
stat_dict[stat_labels[idx]] = stat_values[idx]
return stat_dict
def get_career_regular_season_hitting_stats(baseball_reference_id, soup=None):
""" Get a dictionary representation of the hitter stats for the given hitter id
:param baseball_reference_id: unique BaseballReference ID for this hitter
:param soup: BeautifulSoup object of the hitter career stats page (default is the URL for the given hitter)
:return: dictionary representation of the hitter's stat home page
"""
if soup is None:
soup = get_career_regular_season_hitting_soup(baseball_reference_id)
return get_hitting_stats_table(soup, "batting_standard")
def get_career_postseason_hitting_stats(hitter_id: str, soup=None):
if soup is None:
soup = get_career_postseason_hitting_soup(hitter_id)
return get_hitting_stats_table(soup, "batting_postseason")
def get_career_hitting_stats(baseball_reference_id: str, soup: BeautifulSoup = None) -> dict:
reg_season_stat_dict = get_career_regular_season_hitting_stats(baseball_reference_id)
playoff_stat_dict = get_career_postseason_hitting_stats(baseball_reference_id)
return {label: int(reg_season_stat_dict[label]) + int(playoff_stat_dict[label]) for label in HITTER_RELEVANT_STAT_KEYS}
def get_vs_hand_hitting_stats(baseball_reference_id, hand_value, soup=None):
""" Get a dictionary representation of the hitter stats against the given pitcher hand
:param baseball_reference_id: BaseballReference unique ID for this hitter
:param hand_value: "L" for left, "R" for right
:param soup: BeautifulSoup object of the hitter career stats page (default is the URL for the given hitter)
:return: dictionary representation of the hitter's stat home page
"""
if soup is None:
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=Career&t=b"
soup = get_comment_soup_from_url(url)
if hand_value == "L":
hand = "vs LHP"
elif hand_value == "R":
hand = "vs RHP"
else:
print("Invalid hand enum %s." % hand_value)
return None
return get_table_row_dict(soup, "plato", hand, "Split")
def get_recent_hitting_stats(baseball_reference_id, soup=None):
""" Get a dictionary representation of the hitter's stats from the last 7 days
:param baseball_reference_id: BaseballReference unique ID for this hitter
:param soup: BeautifulSoup object of the hitter's stat home page (default is the URL for the given hitter)
:return: dictionary representation of the hitter's stats
"""
if soup is None:
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=Career&t=b"
soup = get_comment_soup_from_url(url)
return get_table_row_dict(soup, "total", "Last 7 days", "Split")
def get_season_hitting_stats(baseball_reference_id, year=None, soup=None):
""" Get a dictionary representation of the hitter's stats for the current season
:param baseball_reference_id: BaseballReference unique ID for the given hitter
:param year: integer representation of the year of interest (default is current year)
:param soup: BeautifulSoup representation of the hitter's stat home page (default is the URL for the given hitter)
:return: dictionary representation of the hitter's stats
"""
if year is None:
year = date.today().year
if soup is None:
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=" + str(year) + "&t=b"
print(url)
soup = get_comment_soup_from_url(url)
return get_table_body_row_dict(soup, "total", str(year) + " Totals", "Split")
def get_vs_pitcher_stats(batter_id, pitcher_id, soup=None):
""" Get a dictionary representation of the hitter's stats against the given pitcher
:param batter_id: BaseballReference unique ID for the hitter of interest
:param pitcher_id: BaseballReference unique ID for the pitcher of interest
:param soup: BeautifulSoup representation of the hitter's vs. pitcher home page (default is the URL for the given hitter)
:return: dictionary representation of the hitter's stats
"""
if soup is None:
url = "https://stathead.com/baseball/batter_vs_pitcher.cgi?batter=" + str(batter_id) + "&utm_medium=br&utm_source=player-finder-links&utm_campaign=baseball"
print(url)
soup = get_soup_from_url(url)
return get_vs_table_row_dict(soup, batter_id, pitcher_id)
def get_hitter_page_career_soup(baseball_reference_id):
""" Get the BeautifulSoup object for the hitter stat home page
:param baseball_reference_id: BaseballReference unique ID for the hitter of interest
:return: BeautifulSoup for the hitter stat home page
"""
return get_comment_soup_from_url(BASE_URL + "/players/split.fcgi?id=" +
str(baseball_reference_id) + "&year=Career&t=b")
def get_career_pitching_stats(baseball_reference_id, soup=None):
""" Get a dictionary representation of the career stats for the given pitcher
:param baseball_reference_id: BaseballReference unique ID for the pitcher of interest
:param soup: BeautifulSoup object of the pitcher's stat home page
:return: dictionary representation of the career stats
"""
if soup is None:
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=Career&t=p"
soup = get_comment_soup_from_url(url)
return get_table_row_dict(soup, "total_extra", "Career Totals", "Split")
def get_pitcher_page_career_soup(baseball_reference_id):
""" Get the career stats for the given pitcher
:param baseball_reference_id: BaseballReference ID of the pitcher of interest
:return: BeautifulSoup object of the pitcher's stat home page
"""
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=Career&t=p"
print(url)
return get_comment_soup_from_url(url)
def get_season_pitcher_stats(baseball_reference_id, year=None, soup=None):
""" Get the season stats for the given pitcher
:param baseball_reference_id: BaseballReference unique ID for the pitcher of interest
:param year: integer representation of the year
:param soup: BeautifulSoup of the stat page for the given year (default is the URL for this year)
:return: dictionary representation of the pitcher's season stats
"""
if year is None:
year = date.today().year
if soup is None:
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=" + str(year) + "&t=p"
print(url)
soup = get_comment_soup_from_url(url)
return get_table_body_row_dict(soup, "total_extra", str(year) + " Totals", "Split")
def get_recent_pitcher_stats(baseball_reference_id, soup=None):
""" Get a dictionary representation of the pitcher stats for the last 14 days
:param baseball_reference_id: BaseballReference unique ID for the pitcher of interest
:param soup: BeautifulSoup object of the pitcher's stat home page (default is the URL for the given ID)
:return: dictionary representation of the pitcher stats
"""
if soup is None:
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=Career&t=p"
soup = get_comment_soup_from_url(url)
try:
table_row_dict = get_table_row_dict(soup, "total_extra", "Last 14 days", "Split")
except TableRowNotFound:
url = BASE_URL + "/players/split.fcgi?id=" + str(baseball_reference_id) + "&year=Career&t=p"
table_row_dict = get_table_row_dict(get_comment_soup_from_url(url), "total_extra", "Last 14 days", "Split")
return table_row_dict
def get_season_hitting_game_logs(baseball_reference_id: str, year: int) -> (dict, [dict]):
"""
Get the hitting game logs for an entire season for a given player
:param baseball_reference_id: BaseballReference ID for a particular player
:type baseball_reference_id: str
:param year: year of interest
:type year: int
:return: column header labels as well as all rows in the hitting game log table
:rtype: (dict, [dict])
"""
url = BASE_URL + "/players/gl.fcgi?id=" + str(baseball_reference_id) + "&t=b&year=" + str(year)
soup = get_soup_from_url(url)
return get_all_table_row_dicts(soup, "batting_gamelogs")
def get_hitting_game_log(baseball_reference_id, soup=None, game_date=None):
""" Get a dictionary representation of hitting stats for a particular player on a particular day
:param baseball_reference_id: BaseballReference unique ID for the hitter of interest
:param soup: BeautifulSoup object of the hitter game log (default is the URL for the game log of the given ID)
:param game_date: date of the game of interest (default is today)
:return: dictionary representation of the game log stats
"""
if game_date is None:
game_date = date.today()
if soup is None:
url = BASE_URL + "/players/gl.fcgi?id=" + str(baseball_reference_id) + "&t=b&year=" + str(game_date.year)
soup = get_soup_from_url(url)
try:
return get_table_row_dict(soup, "batting_gamelogs", date_abbreviations[game_date.month] + " " +
str(game_date.day), "Date")
except TableNotFound as e:
print(e)
return None
except TableRowNotFound as e1:
print(e1)
return None
def get_season_pitching_game_logs(baseball_reference_id: str, year: int) -> (dict, [dict]):
url = BASE_URL + "/players/gl.fcgi?id=" + str(baseball_reference_id) + "&t=p&year=" + str(year)
soup = get_soup_from_url(url)
return get_all_table_row_dicts(soup, "pitching_gamelogs")
def get_pitching_game_log(baseball_reference_id, soup=None, game_date=None):
""" Get a dictionary representation of the game log stats from the given date
:param baseball_reference_id: BaseballReference unique ID for the pitcher of interest
:param soup: BeautifulSoup object of the pitcher game log (default is the URL for the game log of the given ID)
:param game_date: the game date of interest (in format yyyy-mm-dd)
:return: dictionary representation of the game log stats
"""
if game_date is None:
game_date = date.today()
if soup is None:
url = BASE_URL + "/players/gl.fcgi?id=" + str(baseball_reference_id) + "&t=p&year=" + str(game_date.year)
soup = get_soup_from_url(url)
try:
return get_table_row_dict(soup, "pitching_gamelogs", date_abbreviations[game_date.month] + " " +
str(game_date.day), "Date")
except TableNotFound as e:
print(e)
return None
except TableRowNotFound as e1:
print(e1)
return None
def get_team_info(team_name, year_of_interest=None, team_soup=None):
""" Get the BaseballReference hitter/pitcher factors for the given team
:param team_name: name of the team of interest
:param year_of_interest: integer representation of the year of interest
:param team_soup: BeautifulSoup object for the team information page
:return: hitter factor, pitcher factor tuple for the given team
"""
url = "/about/parkadjust.shtml"
team_abbreviation = team_name
if year_of_interest is None:
year_of_interest = date.today().year
if team_soup is None:
url = BASE_URL + "/teams/" + team_abbreviation + "/" + str(year_of_interest) + ".shtml"
team_soup = get_soup_from_url(url)
try:
sub_nodes = team_soup.find("a", {"href": url}).parent.parent.findAll("strong")
except AttributeError:
return None, None
for sub_node in sub_nodes:
for content in sub_node.contents:
if content is not None:
try:
if "multi-year:" in content.lower():
factor_string = sub_node.next_sibling.split(",")
hitter_factor = int(factor_string[0].split("-")[1].strip().split(" ")[0])
pitcher_factor = int(factor_string[1].split("-")[1].strip().split(" ")[0])
return hitter_factor, pitcher_factor
except TypeError:
continue
return None, None