Skip to content

Commit

Permalink
add star player plot to README
Browse files Browse the repository at this point in the history
  • Loading branch information
gsverhoeven committed Jun 12, 2022
1 parent c3c9a85 commit d43016b
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 6 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,10 @@ See https://gsverhoeven.github.io/post/blood-bowl-fumbbl-dataset/ for a data pap

The folder `analysis/` contains Rmarkdown and Jupyter notebooks with additional analyses.

# Example analysis: matches played on FUMBBL by division



# Example analysis: BB2020 Top 10 star player usage by week

![](star_players_by_week.png)
65 changes: 65 additions & 0 deletions analysis/star_player_usage_by_week_by_star.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import pandas as pd
import numpy as np
import plotnine as p9

# point this to the location of the HDF5 datasets
path_to_datasets = 'datasets/current/'

# FUMBBL matches
target = 'df_matches.h5'
df_matches = pd.read_hdf(path_to_datasets + target)

# FUMBBL matches by team
target = 'df_mbt.h5'
df_mbt = pd.read_hdf(path_to_datasets + target)

# FUMBBL inducements
target = 'inducements.h5'
inducements = pd.read_hdf(path_to_datasets + target)

# top 10 star players in BB2020
top10 = (inducements
.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id')
.query("star_player == 1 and division_name == 'Competitive'")
.groupby(['inducements'])
.agg(
n_games = ('match_id', 'count')
)
.reset_index()
.sort_values('n_games',ascending = False)
.head(10)['inducements'])

res = (inducements
.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id')
.query("star_player == 1 and division_name == 'Competitive' and inducements in @top10")
.groupby(['inducements', 'week_date'])
.agg(
n_games = ('match_id', 'count')
)
.reset_index())

# week totals over all star players
res2 = (inducements
.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id')
.assign(inducements = 'total')
.query("star_player == 1 and division_name == 'Competitive'")
.groupby(['inducements', 'week_date'])
.agg(
n_games = ('match_id', 'count')
)
.reset_index())

res = pd.concat([res, res2], axis = 0)

my_plot = (p9.ggplot(data = res, mapping = p9.aes(x = 'week_date', y = 'n_games',
group = 'factor(inducements)', color = 'factor(inducements)'))
+ p9.geom_point()
+ p9.geom_line()
+ p9.expand_limits(y=[0,1])
+ p9.scale_size_area()
+ p9.geom_vline(xintercept = '2021-09-01', color = "red")
+ p9.ggtitle("FUMBBL BB2020 Star player usage over time")
+ p9.theme(figure_size = (10, 6))
+ p9.ylab("matches"))

my_plot.save(filename = 'star_players_by_week.png', height=6, width=10, units = 'in')
11 changes: 5 additions & 6 deletions fumbbl_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1405,12 +1405,11 @@
"* Scraping the players (only most recent version, so no player development history)\n",
"* Scraping the rulesets (for example to identify resurrection tournaments where players choose skills and use tiers)\n",
"* Switch to feather or Parquet dataformat\n",
"* catch exception: \n",
"**PM we cannot deal yet with the situation HTTPSConnectionPool(host='fumbbl.com', port=443): Max retries exceeded with url: /api/match/get/4221820 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f4acff12be0>: Failed to establish a new connection: [Errno 110] Connection timed out',))**\n",
"*PM we now have tournament id as well, possibly this allows to at least pinpoint when rulesets might have changed**\n",
"**PM we see that (NAF) matches played previously under ruleset 2228 are now labeled as ruleset 2310?\n",
"this has a few changes (tier, gold, crossleague)\n",
"Do we also see this in the XML API**\n"
"* catch exception: **PM we cannot deal yet with the situation HTTPSConnectionPool(host='fumbbl.com', port=443): Max retries exceeded with url: /api/match/get/4221820 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f4acff12be0>: Failed to establish a new connection: [Errno 110] Connection timed out',))**\n",
"* PM we now have tournament id as well, possibly this allows to at least pinpoint when rulesets might have changed\n",
"* PM we see that (NAF) matches played previously under ruleset 2228 are now labeled as ruleset 2310? this has a few changes (tier, gold, crossleague)\n",
"* Do we also see this in the XML API\n",
"* cr_bin variable is gone?\n"
]
}
],
Expand Down
Binary file added star_players_by_week.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit d43016b

Please sign in to comment.