diff --git a/README.md b/README.md index ae709c5..a7a8920 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,10 @@ See https://gsverhoeven.github.io/post/blood-bowl-fumbbl-dataset/ for a data pap The folder `analysis/` contains Rmarkdown and Jupyter notebooks with additional analyses. +# Example analysis: matches played on FUMBBL by division + + + +# Example analysis: BB2020 Top 10 star player usage by week + +![](star_players_by_week.png) \ No newline at end of file diff --git a/analysis/star_player_usage_by_week_by_star.py b/analysis/star_player_usage_by_week_by_star.py new file mode 100644 index 0000000..6b0edf8 --- /dev/null +++ b/analysis/star_player_usage_by_week_by_star.py @@ -0,0 +1,65 @@ +import pandas as pd +import numpy as np +import plotnine as p9 + +# point this to the location of the HDF5 datasets +path_to_datasets = 'datasets/current/' + +# FUMBBL matches +target = 'df_matches.h5' +df_matches = pd.read_hdf(path_to_datasets + target) + +# FUMBBL matches by team +target = 'df_mbt.h5' +df_mbt = pd.read_hdf(path_to_datasets + target) + +# FUMBBL inducements +target = 'inducements.h5' +inducements = pd.read_hdf(path_to_datasets + target) + +# top 10 star players in BB2020 +top10 = (inducements +.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id') +.query("star_player == 1 and division_name == 'Competitive'") +.groupby(['inducements']) +.agg( + n_games = ('match_id', 'count') +) +.reset_index() +.sort_values('n_games',ascending = False) +.head(10)['inducements']) + +res = (inducements +.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id') +.query("star_player == 1 and division_name == 'Competitive' and inducements in @top10") +.groupby(['inducements', 'week_date']) +.agg( + n_games = ('match_id', 'count') +) +.reset_index()) + +# week totals over all star players +res2 = (inducements +.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id') +.assign(inducements = 'total') +.query("star_player == 1 and division_name == 'Competitive'") +.groupby(['inducements', 'week_date']) +.agg( + n_games = ('match_id', 'count') +) +.reset_index()) + +res = pd.concat([res, res2], axis = 0) + +my_plot = (p9.ggplot(data = res, mapping = p9.aes(x = 'week_date', y = 'n_games', +group = 'factor(inducements)', color = 'factor(inducements)')) + + p9.geom_point() + + p9.geom_line() + + p9.expand_limits(y=[0,1]) + + p9.scale_size_area() + + p9.geom_vline(xintercept = '2021-09-01', color = "red") + + p9.ggtitle("FUMBBL BB2020 Star player usage over time") + + p9.theme(figure_size = (10, 6)) + + p9.ylab("matches")) + +my_plot.save(filename = 'star_players_by_week.png', height=6, width=10, units = 'in') \ No newline at end of file diff --git a/fumbbl_dataset.ipynb b/fumbbl_dataset.ipynb index 1ba06fb..5f086f1 100644 --- a/fumbbl_dataset.ipynb +++ b/fumbbl_dataset.ipynb @@ -1405,12 +1405,11 @@ "* Scraping the players (only most recent version, so no player development history)\n", "* Scraping the rulesets (for example to identify resurrection tournaments where players choose skills and use tiers)\n", "* Switch to feather or Parquet dataformat\n", - "* catch exception: \n", - "**PM we cannot deal yet with the situation HTTPSConnectionPool(host='fumbbl.com', port=443): Max retries exceeded with url: /api/match/get/4221820 (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 110] Connection timed out',))**\n", - "*PM we now have tournament id as well, possibly this allows to at least pinpoint when rulesets might have changed**\n", - "**PM we see that (NAF) matches played previously under ruleset 2228 are now labeled as ruleset 2310?\n", - "this has a few changes (tier, gold, crossleague)\n", - "Do we also see this in the XML API**\n" + "* catch exception: **PM we cannot deal yet with the situation HTTPSConnectionPool(host='fumbbl.com', port=443): Max retries exceeded with url: /api/match/get/4221820 (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 110] Connection timed out',))**\n", + "* PM we now have tournament id as well, possibly this allows to at least pinpoint when rulesets might have changed\n", + "* PM we see that (NAF) matches played previously under ruleset 2228 are now labeled as ruleset 2310? this has a few changes (tier, gold, crossleague)\n", + "* Do we also see this in the XML API\n", + "* cr_bin variable is gone?\n" ] } ], diff --git a/star_players_by_week.png b/star_players_by_week.png new file mode 100644 index 0000000..4256629 Binary files /dev/null and b/star_players_by_week.png differ