From 3c38ee872226ed4ae550e468efd8d2c498c3cc05 Mon Sep 17 00:00:00 2001 From: kegl Date: Mon, 18 Nov 2019 15:33:07 +0000 Subject: [PATCH] fixing leaderboards --- .../ramp_database/tools/leaderboard.py | 242 ++++++++++-------- .../tools/tests/test_leaderboard.py | 57 +++-- .../ramp_frontend/views/leaderboard.py | 6 +- 3 files changed, 172 insertions(+), 133 deletions(-) diff --git a/ramp-database/ramp_database/tools/leaderboard.py b/ramp-database/ramp_database/tools/leaderboard.py index da41ff843..97f686ec3 100644 --- a/ramp-database/ramp_database/tools/leaderboard.py +++ b/ramp-database/ramp_database/tools/leaderboard.py @@ -1,5 +1,3 @@ -from itertools import product - import numpy as np import pandas as pd @@ -18,9 +16,9 @@ pd.set_option('display.max_colwidth', -1) -def _compute_leaderboard(session, submissions, leaderboard_type, event_name, - with_links=True): - """Format the leaderboard. +def _compute_public_leaderboard(session, submissions, + event_name, with_links=True): + """Format the public leaderboard. Parameters ---------- @@ -28,8 +26,6 @@ def _compute_leaderboard(session, submissions, leaderboard_type, event_name, The session to directly perform the operation on the database. submissions : list of :class:`ramp_database.model.Submission` The submission to report in the leaderboard. - leaderboard_type : {'public', 'private'} - The type of leaderboard to built. event_name : str The name of the event. with_links : bool @@ -40,104 +36,145 @@ def _compute_leaderboard(session, submissions, leaderboard_type, event_name, leaderboard : dataframe The leaderboard in a dataframe format. """ - record_score = [] event = session.query(Event).filter_by(name=event_name).one() map_score_precision = {score_type.name: score_type.precision for score_type in event.score_types} + leaderboard_df = pd.DataFrame() for sub in submissions: - # take only max n bag + row = pd.Series() + row['team'] = 'team' + row['submission'] = 'submission' + row['team'] = sub.team.name + row['submission'] = sub.name_with_link if with_links else sub.name + + # bagging returns "learning curves", here we only need the last bag df_scores_bag = get_bagged_scores(session, sub.id) - highest_level = df_scores_bag.index.get_level_values('n_bag').max() - df_scores_bag = df_scores_bag.loc[(slice(None), highest_level), :] + n_bag = df_scores_bag.index.get_level_values('n_bag').max() + df_scores_bag = df_scores_bag.loc[(slice(None), n_bag), :] df_scores_bag.index = df_scores_bag.index.droplevel('n_bag') df_scores_bag = df_scores_bag.round(map_score_precision) + for col in df_scores_bag.columns: + precision = map_score_precision[col] + row[col] = round(df_scores_bag[col].loc['valid'], precision) - df_scores = get_scores(session, sub.id) - df_scores = df_scores.round(map_score_precision) + row['contributivity'] = int(round(100 * sub.contributivity)) + row['historical contributivity'] = int(round( + 100 * sub.historical_contributivity)) df_time = get_time(session, sub.id) df_time = df_time.stack().to_frame() df_time.index = df_time.index.set_names(['fold', 'step']) df_time = df_time.rename(columns={0: 'time'}) - df_time = df_time.astype('int') - - df = pd.concat([df_scores, df_time], axis=1) - df_mean = df.groupby('step').mean() - df_std = df.groupby('step').std() - - # select only the validation and testing steps and rename them to - # public and private - map_renaming = {'valid': 'public', 'test': 'private'} - df_mean = (df_mean.loc[list(map_renaming.keys())] - .rename(index=map_renaming) - .stack().to_frame().T) - df_std = (df_std.loc[list(map_renaming.keys())] - .rename(index=map_renaming) - .stack().to_frame().T) - df_scores_bag = (df_scores_bag.rename(index=map_renaming) - .stack().to_frame().T) - - df = pd.concat([df_scores_bag, df_mean, df_std], axis=1, - keys=['bag', 'mean', 'std']) - - df.columns = df.columns.set_names(['stat', 'set', 'score']) - - # change the multi-index into a stacked index - df.columns = df.columns.map(lambda x: " ".join(x)) - - df['team'] = sub.team.name - df['submission'] = sub.name_with_link if with_links else sub.name - df['contributivity'] = int(round(100 * sub.contributivity)) - df['historical contributivity'] = int(round( - 100 * sub.historical_contributivity)) - df['max RAM [MB]'] = get_submission_max_ram(session, sub.id) - df['submitted at (UTC)'] = pd.Timestamp(sub.submission_timestamp) - record_score.append(df) - - # stack all the records - df = pd.concat(record_score, axis=0, ignore_index=True, sort=False) - - # keep only second precision for the time stamp - df['submitted at (UTC)'] = df['submitted at (UTC)'].astype('datetime64[s]') - # rename the column of the time - df = df.rename(columns={'mean public time': 'train time [s]', - 'std public time': 'train time std [s]', - 'mean private time': 'test time [s]', - 'std private time': 'test time std [s]'}) - - # reordered the column - stats_order = (['bag', 'mean', 'std'] if leaderboard_type == 'private' - else ['bag']) - dataset_order = (['public', 'private'] if leaderboard_type == 'private' - else ['public']) - score_order = ([event.official_score_name] + - [score_type.name for score_type in event.score_types - if score_type.name != event.official_score_name]) - score_list = [ - '{} {} {}'.format(stat, dataset, score) - for stat, dataset, score in product(stats_order, dataset_order, - score_order) - ] - col_ordered = ( - ['team', 'submission'] + - score_list + - ['contributivity', 'historical contributivity', - 'train time [s]', 'test time [s]', - 'max RAM [MB]', 'submitted at (UTC)'] + df_time_mean = df_time.groupby('step').mean() + + row['train time [s]'] = df_time_mean['time'].loc['train'].round() + row['valid time [s]'] = df_time_mean['time'].loc['valid'].round() + row['max RAM [MB]'] = round(get_submission_max_ram(session, sub.id)) + row['submitted at (UTC)'] = pd.Timestamp(sub.submission_timestamp) + leaderboard_df = leaderboard_df.append(row, ignore_index=True) + leaderboard_df = leaderboard_df[row.index] # reordering columns + + # Formatting time and integer columns + timestamp_cols = ['submitted at (UTC)'] + leaderboard_df[timestamp_cols] = leaderboard_df[timestamp_cols].astype( + 'datetime64[s]') + int_cols = ['train time [s]', 'valid time [s]', 'max RAM [MB]', + 'contributivity', 'historical contributivity'] + leaderboard_df[int_cols] = leaderboard_df[int_cols].astype(int) + + # Sorting according to the official score, best on the top + leaderboard_df = leaderboard_df.sort_values( + event.official_score_name, + ascending=event.get_official_score_type(session).is_lower_the_better ) - df = df[col_ordered] + return leaderboard_df + + +def _compute_private_leaderboard(session, submissions, + event_name, with_links=True): + """Format the private leaderboard. + + Parameters + ---------- + session : :class:`sqlalchemy.orm.Session` + The session to directly perform the operation on the database. + submissions : list of :class:`ramp_database.model.Submission` + The submission to report in the leaderboard. + event_name : str + The name of the event. + with_links : bool + Whether or not the submission name should be clickable. + + Returns + ------- + leaderboard : dataframe + The leaderboard in a dataframe format. + """ + event = session.query(Event).filter_by(name=event_name).one() + map_score_precision = {score_type.name: score_type.precision + for score_type in event.score_types} + leaderboard_df = pd.DataFrame() + for sub in submissions: + row = pd.Series() + row['team'] = 'team' + row['submission'] = 'submission' + row['team'] = sub.team.name + row['submission'] = sub.name_with_link if with_links else sub.name + + # bagging returns "learning curves", here we only need the last bag + df_scores_bag = get_bagged_scores(session, sub.id) + n_bag = df_scores_bag.index.get_level_values('n_bag').max() + df_scores_bag = df_scores_bag.loc[(slice(None), n_bag), :] + df_scores_bag.index = df_scores_bag.index.droplevel('n_bag') + df_scores = get_scores(session, sub.id) + df_scores_mean = df_scores.groupby('step').mean() + df_scores_std = df_scores.groupby('step').std() + for col in df_scores_bag.columns: + precision = map_score_precision[col] + row['bagged test ' + col] = round( + df_scores_bag[col].loc['test'], precision) + row['mean test ' + col] = round( + df_scores_mean[col].loc['test'], precision) + row['std test ' + col] = round( + df_scores_std[col].loc['test'], precision + 1) + row['bagged valid ' + col] = round( + df_scores_bag[col].loc['valid'], precision) + row['mean valid ' + col] = round( + df_scores_mean[col].loc['valid'], precision) + row['std valid ' + col] = round( + df_scores_std[col].loc['valid'], precision + 1) + row['contributivity'] = int(round(100 * sub.contributivity)) + row['historical contributivity'] = int(round( + 100 * sub.historical_contributivity)) - df = df.sort_values( - "bag {} {}".format(leaderboard_type, event.official_score_name), + df_time = get_time(session, sub.id) + df_time = df_time.stack().to_frame() + df_time.index = df_time.index.set_names(['fold', 'step']) + df_time = df_time.rename(columns={0: 'time'}) + df_time_mean = df_time.groupby('step').mean() + + row['train time [s]'] = df_time_mean['time'].loc['train'].round() + row['valid time [s]'] = df_time_mean['time'].loc['valid'].round() + row['test time [s]'] = df_time_mean['time'].loc['test'].round() + row['max RAM [MB]'] = get_submission_max_ram(session, sub.id) + row['submitted at (UTC)'] = pd.Timestamp(sub.submission_timestamp) + leaderboard_df = leaderboard_df.append(row, ignore_index=True) + leaderboard_df = leaderboard_df[row.index] # reordering columns + + # Formatting time and integer columns + timestamp_cols = ['submitted at (UTC)'] + leaderboard_df[timestamp_cols] = leaderboard_df[timestamp_cols].astype( + 'datetime64[s]') + int_cols = ['train time [s]', 'valid time [s]', 'test time [s]', + 'max RAM [MB]', 'contributivity', 'historical contributivity'] + leaderboard_df[int_cols] = leaderboard_df[int_cols].astype(int) + + # Sorting according to the official score, best on the top + leaderboard_df = leaderboard_df.sort_values( + 'bagged test {}'.format(event.official_score_name), ascending=event.get_official_score_type(session).is_lower_the_better ) - - # rename the column name for the public leaderboard - if leaderboard_type == 'public': - df = df.rename(columns={ - key: value for key, value in zip(score_list, score_order) - }) - return df + return leaderboard_df def _compute_competition_leaderboard(session, submissions, leaderboard_type, @@ -164,18 +201,18 @@ def _compute_competition_leaderboard(session, submissions, leaderboard_type, score_type = event.get_official_score_type(session) score_name = event.official_score_name - private_leaderboard = _compute_leaderboard(session, submissions, 'private', - event_name, with_links=False) + private_leaderboard = _compute_private_leaderboard( + session, submissions, event_name, with_links=False) col_selected_private = (['team', 'submission'] + - ['bag private ' + score_name, - 'bag public ' + score_name] + - ['train time [s]', 'test time [s]', + ['bagged test ' + score_name, + 'bagged valid ' + score_name] + + ['train time [s]', 'valid time [s]', 'submitted at (UTC)']) leaderboard_df = private_leaderboard[col_selected_private] leaderboard_df = leaderboard_df.rename( - columns={'bag private ' + score_name: 'private ' + score_name, - 'bag public ' + score_name: 'public ' + score_name} + columns={'bagged test ' + score_name: 'private ' + score_name, + 'bagged valid ' + score_name: 'public ' + score_name} ) # select best submission for each team @@ -226,8 +263,8 @@ def _compute_competition_leaderboard(session, submissions, leaderboard_type, col_selected = [ leaderboard_type + ' rank', 'team', 'submission', - leaderboard_type + ' ' + score_name, 'train time [s]', 'test time [s]', - 'submitted at (UTC)' + leaderboard_type + ' ' + score_name, 'train time [s]', + 'valid time [s]', 'submitted at (UTC)' ] if leaderboard_type == 'private': col_selected.insert(1, 'move') @@ -288,11 +325,12 @@ def get_leaderboard(session, leaderboard_type, event_name, user_name=None, if not submissions: return None - if leaderboard_type in ['public', 'private']: - df = _compute_leaderboard( - session, submissions, leaderboard_type, event_name, - with_links=with_links - ) + if leaderboard_type == 'public': + df = _compute_public_leaderboard( + session, submissions, event_name, with_links=with_links) + elif leaderboard_type == 'private': + df = _compute_private_leaderboard( + session, submissions, event_name, with_links=with_links) elif leaderboard_type in ['new', 'failed']: columns = ['team', 'submission', diff --git a/ramp-database/ramp_database/tools/tests/test_leaderboard.py b/ramp-database/ramp_database/tools/tests/test_leaderboard.py index 205d7c69b..c5a6e28bf 100644 --- a/ramp-database/ramp_database/tools/tests/test_leaderboard.py +++ b/ramp-database/ramp_database/tools/tests/test_leaderboard.py @@ -191,40 +191,41 @@ def test_get_leaderboard(session_toy_db): # check the difference between the public and private leaderboard assert leaderboard_private.count('') > leaderboard_public.count('') - for private_term in ['bag', 'mean', 'std', 'private']: + for private_term in ['bagged', 'mean', 'std', 'test time']: assert private_term not in leaderboard_public assert private_term in leaderboard_private # check the column name in each leaderboard assert """team submission - bag public acc - bag public error - bag public nll - bag public f1_70 - bag private acc - bag private error - bag private nll - bag private f1_70 - mean public acc - mean public error - mean public nll - mean public f1_70 - mean private acc - mean private error - mean private nll - mean private f1_70 - std public acc - std public error - std public nll - std public f1_70 - std private acc - std private error - std private nll - std private f1_70 + bagged test acc + mean test acc + std test acc + bagged valid acc + mean valid acc + std valid acc + bagged test error + mean test error + std test error + bagged valid error + mean valid error + std valid error + bagged test nll + mean test nll + std test nll + bagged valid nll + mean valid nll + std valid nll + bagged test f1_70 + mean test f1_70 + std test f1_70 + bagged valid f1_70 + mean valid f1_70 + std valid f1_70 contributivity historical contributivity train time [s] + valid time [s] test time [s] max RAM [MB] submitted at (UTC)""" in leaderboard_private @@ -237,7 +238,7 @@ def test_get_leaderboard(session_toy_db): contributivity historical contributivity train time [s] - test time [s] + valid time [s] max RAM [MB] submitted at (UTC)""" in leaderboard_public assert """team @@ -251,7 +252,7 @@ def test_get_leaderboard(session_toy_db): submission acc train time [s] - test time [s] + valid time [s] submitted at (UTC)""" in competition_public assert """rank move @@ -259,5 +260,5 @@ def test_get_leaderboard(session_toy_db): submission acc train time [s] - test time [s] + valid time [s] submitted at (UTC)""" in competition_private diff --git a/ramp-frontend/ramp_frontend/views/leaderboard.py b/ramp-frontend/ramp_frontend/views/leaderboard.py index 04c142a17..f89f939e9 100644 --- a/ramp-frontend/ramp_frontend/views/leaderboard.py +++ b/ramp-frontend/ramp_frontend/views/leaderboard.py @@ -72,7 +72,7 @@ def my_submissions(event_name): leaderboard=leaderboard_html, failed_leaderboard=failed_leaderboard_html, new_leaderboard=new_leaderboard_html, - sorting_column_index=4, + sorting_column_index=2, sorting_direction=sorting_direction, event=event, admin=admin) @@ -115,7 +115,7 @@ def leaderboard(event_name): leaderboard_kwargs = dict( leaderboard=leaderboard_html, leaderboard_title='Leaderboard', - sorting_column_index=4, + sorting_column_index=2, sorting_direction=sorting_direction, event=event ) @@ -228,7 +228,7 @@ def private_leaderboard(event_name): 'leaderboard.html', leaderboard_title='Leaderboard', leaderboard=leaderboard_html, - sorting_column_index=5, + sorting_column_index=2, sorting_direction=sorting_direction, event=event, private=True,