Skip to content

Commit 8ec26a0

Browse files
committed
Add rawboxscore.xml to stats retrieval, fixes #30
1 parent 27b04a1 commit 8ec26a0

File tree

2 files changed

+123
-38
lines changed

2 files changed

+123
-38
lines changed

mlbgame/data.py

+10
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,16 @@ def get_box_score(game_id):
5151
except HTTPError:
5252
raise ValueError('Could not find a game with that id.')
5353

54+
def get_raw_box_score(game_id):
55+
"""Return the raw box score file of a game with matching id."""
56+
year, month, day = get_date_from_game_id(game_id)
57+
try:
58+
return urlopen(GAME_URL.format(year, month, day,
59+
game_id,
60+
'rawboxscore.xml'))
61+
except HTTPError:
62+
raise ValueError('Could not find a game with that id.')
63+
5464

5565
def get_game_events(game_id):
5666
"""Return the game events file of a game with matching id."""

mlbgame/stats.py

+113-38
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@
88

99
import lxml.etree as etree
1010

11-
1211
def __player_stats_info(data, name):
1312
home = []
1413
away = []
1514
for y in data:
16-
# loops through pitchers
15+
# loops through pitchers and batters
1716
for x in y.findall(name):
1817
stats = {}
1918
# loop through and save stats
@@ -26,65 +25,133 @@ def __player_stats_info(data, name):
2625
away.append(stats)
2726
return (home, away)
2827

28+
def __raw_player_stats_info(data):
29+
home_pitchers = []
30+
away_pitchers = []
31+
home_batters = []
32+
away_batters = []
33+
34+
for team in data.findall('team'):
35+
home_flag = team.attrib['team_flag'] == 'home'
36+
pitching = team.find('pitching')
37+
for pitcher in pitching.findall('pitcher'):
38+
stats = {}
39+
for i in pitcher.attrib:
40+
stats[i] = pitcher.attrib[i]
41+
home_pitchers.append(stats) if home_flag else away_pitchers.append(stats)
42+
43+
batting = team.find('batting')
44+
for batter in batting.findall('batter'):
45+
stats = {}
46+
for i in batter.attrib:
47+
stats[i] = batter.attrib[i]
48+
home_batters.append(stats) if home_flag else away_batters.append(stats)
49+
home = {
50+
'pitchers': home_pitchers,
51+
'batters': home_batters
52+
}
53+
54+
away = {
55+
'pitchers': away_pitchers,
56+
'batters': away_batters
57+
}
58+
return (home, away)
2959

3060
def player_stats(game_id):
31-
"""Return dictionary of individual stats of a game with matching id."""
61+
"""Return dictionary of individual stats of a game with matching id.
62+
63+
The additional pitching/batting is mostly the same stats, except it contains
64+
some useful stats such as groundouts/flyouts per pitcher (go/ao). MLB decided
65+
to have two box score files, thus we return the data from both.
66+
"""
3267
# get data from data module
33-
data = mlbgame.data.get_box_score(game_id)
68+
box_score = mlbgame.data.get_box_score(game_id)
69+
raw_box_score = mlbgame.data.get_raw_box_score(game_id)
3470
# parse XML
35-
parsed = etree.parse(data)
36-
root = parsed.getroot()
71+
box_score_tree = etree.parse(box_score).getroot()
72+
raw_box_score_tree = etree.parse(raw_box_score).getroot()
3773
# get pitching and batting info
38-
pitching = root.findall('pitching')
39-
batting = root.findall('batting')
74+
pitching = box_score_tree.findall('pitching')
75+
batting = box_score_tree.findall('batting')
4076
# get parsed stats
4177
pitching_info = __player_stats_info(pitching, 'pitcher')
4278
batting_info = __player_stats_info(batting, 'batter')
79+
# get parsed additional stats
80+
additional_stats = __raw_player_stats_info(raw_box_score_tree)
81+
addl_home_pitching = additional_stats[0]['pitchers']
82+
addl_home_batting = additional_stats[0]['batters']
83+
addl_away_pitching = additional_stats[1]['pitchers']
84+
addl_away_batting = additional_stats[1]['batters']
85+
4386
output = {
4487
'home_pitching': pitching_info[0],
4588
'away_pitching': pitching_info[1],
4689
'home_batting': batting_info[0],
47-
'away_batting': batting_info[1]
90+
'away_batting': batting_info[1],
91+
'home_additional_pitching': addl_home_pitching,
92+
'away_additional_pitching': addl_away_pitching,
93+
'home_additional_batting': addl_home_batting,
94+
'away_additional_batting': addl_away_batting
4895
}
4996
return output
5097

51-
52-
def team_stats(game_id):
53-
"""Return team stats of a game with matching id."""
54-
# get data from data module
55-
data = mlbgame.data.get_box_score(game_id)
56-
# parse XML
57-
parsed = etree.parse(data)
58-
root = parsed.getroot()
59-
# get pitching and batting ingo
60-
pitching = root.findall('pitching')
61-
batting = root.findall('batting')
62-
# dictionary for output
63-
output = {}
64-
# loop through pitching info
65-
for x in pitching:
98+
def __team_stats_info(data, output, output_key):
99+
for x in data:
66100
stats = {}
67101
# loop through stats and save
68102
for y in x.attrib:
69103
stats[y] = x.attrib[y]
70104
# apply to correct team
71105
if x.attrib['team_flag'] == 'home':
72-
output['home_pitching'] = stats
106+
# Example: 'home_batting' when output_key is 'batting'
107+
output['home_' + output_key] = stats
73108
elif x.attrib['team_flag'] == 'away':
74-
output['away_pitching'] = stats
75-
# loop through pitching info
76-
for x in batting:
109+
output['away_' + output_key] = stats
110+
return output
111+
112+
def __raw_team_stats_info(data, output):
113+
for team in data.findall('team'):
114+
home_flag = team.attrib['team_flag'] == 'home'
115+
pitching = team.find('pitching')
77116
stats = {}
78-
# loop through stats and save
79-
for y in x.attrib:
80-
stats[y] = x.attrib[y]
81-
# apply to correct team
82-
if x.attrib['team_flag'] == 'home':
83-
output['home_batting'] = stats
84-
elif x.attrib['team_flag'] == 'away':
85-
output['away_batting'] = stats
117+
for stat in pitching.attrib:
118+
stats[stat] = pitching.attrib[stat]
119+
if home_flag:
120+
output['home_additional_pitching'] = stats
121+
else:
122+
output['away_additional_pitching'] = stats
123+
124+
stats = {}
125+
batting = team.find('batting')
126+
for stat in batting.attrib:
127+
stats[stat] = batting.attrib[stat]
128+
if home_flag:
129+
output['home_additional_batting'] = stats
130+
else:
131+
output['away_additional_batting'] = stats
86132
return output
87133

134+
def team_stats(game_id):
135+
"""Return team stats of a game with matching id.
136+
137+
The additional pitching/batting is mostly the same stats. MLB decided
138+
to have two box score files, thus we return the data from both.
139+
"""
140+
# get data from data module
141+
box_score = mlbgame.data.get_box_score(game_id)
142+
raw_box_score = mlbgame.data.get_raw_box_score(game_id)
143+
# parse XML
144+
box_score_tree = etree.parse(box_score).getroot()
145+
raw_box_score_tree = etree.parse(raw_box_score).getroot()
146+
# get pitching and batting ingo
147+
pitching = box_score_tree.findall('pitching')
148+
batting = box_score_tree.findall('batting')
149+
# dictionary for output
150+
output = {}
151+
output = __team_stats_info(pitching, output, 'pitching')
152+
output = __team_stats_info(batting, output, 'batting')
153+
output = __raw_team_stats_info(raw_box_score_tree, output)
154+
return output
88155

89156
class Stats(object):
90157
"""Hold stats information for a game.
@@ -95,6 +162,10 @@ class Stats(object):
95162
game_id
96163
home_batting
97164
home_pitching
165+
away_additional_pitching
166+
away_additional_batting
167+
home_additional_pitching
168+
home_additional_batting
98169
"""
99170

100171
def __init__(self, data, game_id, player):
@@ -104,7 +175,8 @@ def __init__(self, data, game_id, player):
104175
"""
105176
self.game_id = game_id
106177
output = {'home_pitching': [], 'away_pitching': [], 'home_batting': [],
107-
'away_batting': []}
178+
'away_batting': [], 'home_additional_pitching': [], 'home_additional_batting': [],
179+
'away_additional_pitching': [], 'away_additional_batting': []}
108180
for y in data:
109181
# create objects for all data
110182
if player:
@@ -118,7 +190,10 @@ def __init__(self, data, game_id, player):
118190
self.away_pitching = output['away_pitching']
119191
self.home_batting = output['home_batting']
120192
self.away_batting = output['away_batting']
121-
193+
self.home_additional_pitching = output['home_additional_pitching']
194+
self.away_additional_pitching = output['away_additional_pitching']
195+
self.home_additional_batting = output['home_additional_batting']
196+
self.away_additional_batting = output['away_additional_batting']
122197

123198
class PlayerStats(mlbgame.object.Object):
124199
"""Holds stats information for a player.

0 commit comments

Comments
 (0)