Skip to content

Commit

Permalink
Merge pull request #58 from ajbowler/raw-box-score
Browse files Browse the repository at this point in the history
Add rawboxscore.xml to stats retrieval, fixes #30 and #57
  • Loading branch information
panzarino authored Jan 30, 2018
2 parents 27b04a1 + 6a11125 commit 00775ce
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 42 deletions.
10 changes: 10 additions & 0 deletions mlbgame/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@ def get_box_score(game_id):
except HTTPError:
raise ValueError('Could not find a game with that id.')

def get_raw_box_score(game_id):
"""Return the raw box score file of a game with matching id."""
year, month, day = get_date_from_game_id(game_id)
try:
return urlopen(GAME_URL.format(year, month, day,
game_id,
'rawboxscore.xml'))
except HTTPError:
raise ValueError('Could not find a game with that id.')


def get_game_events(game_id):
"""Return the game events file of a game with matching id."""
Expand Down
156 changes: 114 additions & 42 deletions mlbgame/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,83 +8,147 @@

import lxml.etree as etree


def __player_stats_info(data, name):
home = []
away = []
for y in data:
# loops through pitchers
# loops through pitchers and batters
for x in y.findall(name):
stats = {}
# loop through and save stats
for i in x.attrib:
stats[i] = x.attrib[i]
# apply to correct list
if y.attrib['team_flag'] == 'home':
home.append(stats)
elif not home:
away.append(stats)
home.append(stats) if y.attrib['team_flag'] == 'home' else away.append(stats)
return (home, away)

def __raw_player_stats_info(data):
home_pitchers = []
away_pitchers = []
home_batters = []
away_batters = []

for team in data.findall('team'):
home_flag = team.attrib['team_flag'] == 'home'
pitching = team.find('pitching')
for pitcher in pitching.findall('pitcher'):
stats = {}
for i in pitcher.attrib:
stats[i] = pitcher.attrib[i]
home_pitchers.append(stats) if home_flag else away_pitchers.append(stats)

batting = team.find('batting')
for batter in batting.findall('batter'):
stats = {}
for i in batter.attrib:
stats[i] = batter.attrib[i]
home_batters.append(stats) if home_flag else away_batters.append(stats)
home = {
'pitchers': home_pitchers,
'batters': home_batters
}

away = {
'pitchers': away_pitchers,
'batters': away_batters
}
return (home, away)

def player_stats(game_id):
"""Return dictionary of individual stats of a game with matching id."""
"""Return dictionary of individual stats of a game with matching id.
The additional pitching/batting is mostly the same stats, except it contains
some useful stats such as groundouts/flyouts per pitcher (go/ao). MLB decided
to have two box score files, thus we return the data from both.
"""
# get data from data module
data = mlbgame.data.get_box_score(game_id)
box_score = mlbgame.data.get_box_score(game_id)
raw_box_score = mlbgame.data.get_raw_box_score(game_id)
# parse XML
parsed = etree.parse(data)
root = parsed.getroot()
box_score_tree = etree.parse(box_score).getroot()
raw_box_score_tree = etree.parse(raw_box_score).getroot()
# get pitching and batting info
pitching = root.findall('pitching')
batting = root.findall('batting')
pitching = box_score_tree.findall('pitching')
batting = box_score_tree.findall('batting')
# get parsed stats
pitching_info = __player_stats_info(pitching, 'pitcher')
batting_info = __player_stats_info(batting, 'batter')
# get parsed additional stats
additional_stats = __raw_player_stats_info(raw_box_score_tree)
addl_home_pitching = additional_stats[0]['pitchers']
addl_home_batting = additional_stats[0]['batters']
addl_away_pitching = additional_stats[1]['pitchers']
addl_away_batting = additional_stats[1]['batters']

output = {
'home_pitching': pitching_info[0],
'away_pitching': pitching_info[1],
'home_batting': batting_info[0],
'away_batting': batting_info[1]
'away_batting': batting_info[1],
'home_additional_pitching': addl_home_pitching,
'away_additional_pitching': addl_away_pitching,
'home_additional_batting': addl_home_batting,
'away_additional_batting': addl_away_batting
}
return output


def team_stats(game_id):
"""Return team stats of a game with matching id."""
# get data from data module
data = mlbgame.data.get_box_score(game_id)
# parse XML
parsed = etree.parse(data)
root = parsed.getroot()
# get pitching and batting ingo
pitching = root.findall('pitching')
batting = root.findall('batting')
# dictionary for output
output = {}
# loop through pitching info
for x in pitching:
def __team_stats_info(data, output, output_key):
for x in data:
stats = {}
# loop through stats and save
for y in x.attrib:
stats[y] = x.attrib[y]
# apply to correct team
if x.attrib['team_flag'] == 'home':
output['home_pitching'] = stats
# Example: 'home_batting' when output_key is 'batting'
output['home_' + output_key] = stats
elif x.attrib['team_flag'] == 'away':
output['away_pitching'] = stats
# loop through pitching info
for x in batting:
output['away_' + output_key] = stats
return output

def __raw_team_stats_info(data, output):
for team in data.findall('team'):
home_flag = team.attrib['team_flag'] == 'home'
pitching = team.find('pitching')
stats = {}
# loop through stats and save
for y in x.attrib:
stats[y] = x.attrib[y]
# apply to correct team
if x.attrib['team_flag'] == 'home':
output['home_batting'] = stats
elif x.attrib['team_flag'] == 'away':
output['away_batting'] = stats
for stat in pitching.attrib:
stats[stat] = pitching.attrib[stat]
if home_flag:
output['home_additional_pitching'] = stats
else:
output['away_additional_pitching'] = stats

stats = {}
batting = team.find('batting')
for stat in batting.attrib:
stats[stat] = batting.attrib[stat]
if home_flag:
output['home_additional_batting'] = stats
else:
output['away_additional_batting'] = stats
return output

def team_stats(game_id):
"""Return team stats of a game with matching id.
The additional pitching/batting is mostly the same stats. MLB decided
to have two box score files, thus we return the data from both.
"""
# get data from data module
box_score = mlbgame.data.get_box_score(game_id)
raw_box_score = mlbgame.data.get_raw_box_score(game_id)
# parse XML
box_score_tree = etree.parse(box_score).getroot()
raw_box_score_tree = etree.parse(raw_box_score).getroot()
# get pitching and batting ingo
pitching = box_score_tree.findall('pitching')
batting = box_score_tree.findall('batting')
# dictionary for output
output = {}
output = __team_stats_info(pitching, output, 'pitching')
output = __team_stats_info(batting, output, 'batting')
output = __raw_team_stats_info(raw_box_score_tree, output)
return output

class Stats(object):
"""Hold stats information for a game.
Expand All @@ -95,6 +159,10 @@ class Stats(object):
game_id
home_batting
home_pitching
away_additional_pitching
away_additional_batting
home_additional_pitching
home_additional_batting
"""

def __init__(self, data, game_id, player):
Expand All @@ -104,7 +172,8 @@ def __init__(self, data, game_id, player):
"""
self.game_id = game_id
output = {'home_pitching': [], 'away_pitching': [], 'home_batting': [],
'away_batting': []}
'away_batting': [], 'home_additional_pitching': [], 'home_additional_batting': [],
'away_additional_pitching': [], 'away_additional_batting': []}
for y in data:
# create objects for all data
if player:
Expand All @@ -118,7 +187,10 @@ def __init__(self, data, game_id, player):
self.away_pitching = output['away_pitching']
self.home_batting = output['home_batting']
self.away_batting = output['away_batting']

self.home_additional_pitching = output['home_additional_pitching']
self.away_additional_pitching = output['away_additional_pitching']
self.home_additional_batting = output['home_additional_batting']
self.away_additional_batting = output['away_additional_batting']

class PlayerStats(mlbgame.object.Object):
"""Holds stats information for a player.
Expand Down

0 comments on commit 00775ce

Please sign in to comment.