From 3c38ee872226ed4ae550e468efd8d2c498c3cc05 Mon Sep 17 00:00:00 2001
From: kegl <balazs.kegl@gmail.com>
Date: Mon, 18 Nov 2019 15:33:07 +0000
Subject: [PATCH] fixing leaderboards

---
 .../ramp_database/tools/leaderboard.py        | 242 ++++++++++--------
 .../tools/tests/test_leaderboard.py           |  57 +++--
 .../ramp_frontend/views/leaderboard.py        |   6 +-
 3 files changed, 172 insertions(+), 133 deletions(-)

diff --git a/ramp-database/ramp_database/tools/leaderboard.py b/ramp-database/ramp_database/tools/leaderboard.py
index da41ff843..97f686ec3 100644
--- a/ramp-database/ramp_database/tools/leaderboard.py
+++ b/ramp-database/ramp_database/tools/leaderboard.py
@@ -1,5 +1,3 @@
-from itertools import product
-
 import numpy as np
 import pandas as pd
 
@@ -18,9 +16,9 @@
 pd.set_option('display.max_colwidth', -1)
 
 
-def _compute_leaderboard(session, submissions, leaderboard_type, event_name,
-                         with_links=True):
-    """Format the leaderboard.
+def _compute_public_leaderboard(session, submissions,
+                                event_name, with_links=True):
+    """Format the public leaderboard.
 
     Parameters
     ----------
@@ -28,8 +26,6 @@ def _compute_leaderboard(session, submissions, leaderboard_type, event_name,
         The session to directly perform the operation on the database.
     submissions : list of :class:`ramp_database.model.Submission`
         The submission to report in the leaderboard.
-    leaderboard_type : {'public', 'private'}
-        The type of leaderboard to built.
     event_name : str
         The name of the event.
     with_links : bool
@@ -40,104 +36,145 @@ def _compute_leaderboard(session, submissions, leaderboard_type, event_name,
     leaderboard : dataframe
         The leaderboard in a dataframe format.
     """
-    record_score = []
     event = session.query(Event).filter_by(name=event_name).one()
     map_score_precision = {score_type.name: score_type.precision
                            for score_type in event.score_types}
+    leaderboard_df = pd.DataFrame()
     for sub in submissions:
-        # take only max n bag
+        row = pd.Series()
+        row['team'] = 'team'
+        row['submission'] = 'submission'
+        row['team'] = sub.team.name
+        row['submission'] = sub.name_with_link if with_links else sub.name
+
+        # bagging returns "learning curves", here we only need the last bag
         df_scores_bag = get_bagged_scores(session, sub.id)
-        highest_level = df_scores_bag.index.get_level_values('n_bag').max()
-        df_scores_bag = df_scores_bag.loc[(slice(None), highest_level), :]
+        n_bag = df_scores_bag.index.get_level_values('n_bag').max()
+        df_scores_bag = df_scores_bag.loc[(slice(None), n_bag), :]
         df_scores_bag.index = df_scores_bag.index.droplevel('n_bag')
         df_scores_bag = df_scores_bag.round(map_score_precision)
+        for col in df_scores_bag.columns:
+            precision = map_score_precision[col]
+            row[col] = round(df_scores_bag[col].loc['valid'], precision)
 
-        df_scores = get_scores(session, sub.id)
-        df_scores = df_scores.round(map_score_precision)
+        row['contributivity'] = int(round(100 * sub.contributivity))
+        row['historical contributivity'] = int(round(
+            100 * sub.historical_contributivity))
 
         df_time = get_time(session, sub.id)
         df_time = df_time.stack().to_frame()
         df_time.index = df_time.index.set_names(['fold', 'step'])
         df_time = df_time.rename(columns={0: 'time'})
-        df_time = df_time.astype('int')
-
-        df = pd.concat([df_scores, df_time], axis=1)
-        df_mean = df.groupby('step').mean()
-        df_std = df.groupby('step').std()
-
-        # select only the validation and testing steps and rename them to
-        # public and private
-        map_renaming = {'valid': 'public', 'test': 'private'}
-        df_mean = (df_mean.loc[list(map_renaming.keys())]
-                          .rename(index=map_renaming)
-                          .stack().to_frame().T)
-        df_std = (df_std.loc[list(map_renaming.keys())]
-                        .rename(index=map_renaming)
-                        .stack().to_frame().T)
-        df_scores_bag = (df_scores_bag.rename(index=map_renaming)
-                                      .stack().to_frame().T)
-
-        df = pd.concat([df_scores_bag, df_mean, df_std], axis=1,
-                       keys=['bag', 'mean', 'std'])
-
-        df.columns = df.columns.set_names(['stat', 'set', 'score'])
-
-        # change the multi-index into a stacked index
-        df.columns = df.columns.map(lambda x: " ".join(x))
-
-        df['team'] = sub.team.name
-        df['submission'] = sub.name_with_link if with_links else sub.name
-        df['contributivity'] = int(round(100 * sub.contributivity))
-        df['historical contributivity'] = int(round(
-            100 * sub.historical_contributivity))
-        df['max RAM [MB]'] = get_submission_max_ram(session, sub.id)
-        df['submitted at (UTC)'] = pd.Timestamp(sub.submission_timestamp)
-        record_score.append(df)
-
-    # stack all the records
-    df = pd.concat(record_score, axis=0, ignore_index=True, sort=False)
-
-    # keep only second precision for the time stamp
-    df['submitted at (UTC)'] = df['submitted at (UTC)'].astype('datetime64[s]')
-    # rename the column of the time
-    df = df.rename(columns={'mean public time': 'train time [s]',
-                            'std public time': 'train time std [s]',
-                            'mean private time': 'test time [s]',
-                            'std private time': 'test time std [s]'})
-
-    # reordered the column
-    stats_order = (['bag', 'mean', 'std'] if leaderboard_type == 'private'
-                   else ['bag'])
-    dataset_order = (['public', 'private'] if leaderboard_type == 'private'
-                     else ['public'])
-    score_order = ([event.official_score_name] +
-                   [score_type.name for score_type in event.score_types
-                    if score_type.name != event.official_score_name])
-    score_list = [
-        '{} {} {}'.format(stat, dataset, score)
-        for stat, dataset, score in product(stats_order, dataset_order,
-                                            score_order)
-    ]
-    col_ordered = (
-        ['team', 'submission'] +
-        score_list +
-        ['contributivity', 'historical contributivity',
-         'train time [s]', 'test time [s]',
-         'max RAM [MB]', 'submitted at (UTC)']
+        df_time_mean = df_time.groupby('step').mean()
+
+        row['train time [s]'] = df_time_mean['time'].loc['train'].round()
+        row['valid time [s]'] = df_time_mean['time'].loc['valid'].round()
+        row['max RAM [MB]'] = round(get_submission_max_ram(session, sub.id))
+        row['submitted at (UTC)'] = pd.Timestamp(sub.submission_timestamp)
+        leaderboard_df = leaderboard_df.append(row, ignore_index=True)
+        leaderboard_df = leaderboard_df[row.index]  # reordering columns
+
+    # Formatting time and integer columns
+    timestamp_cols = ['submitted at (UTC)']
+    leaderboard_df[timestamp_cols] = leaderboard_df[timestamp_cols].astype(
+        'datetime64[s]')
+    int_cols = ['train time [s]', 'valid time [s]', 'max RAM [MB]',
+                'contributivity', 'historical contributivity']
+    leaderboard_df[int_cols] = leaderboard_df[int_cols].astype(int)
+
+    # Sorting according to the official score, best on the top
+    leaderboard_df = leaderboard_df.sort_values(
+        event.official_score_name,
+        ascending=event.get_official_score_type(session).is_lower_the_better
     )
-    df = df[col_ordered]
+    return leaderboard_df
+
+
+def _compute_private_leaderboard(session, submissions,
+                                 event_name, with_links=True):
+    """Format the private leaderboard.
+
+    Parameters
+    ----------
+    session : :class:`sqlalchemy.orm.Session`
+        The session to directly perform the operation on the database.
+    submissions : list of :class:`ramp_database.model.Submission`
+        The submission to report in the leaderboard.
+    event_name : str
+        The name of the event.
+    with_links : bool
+        Whether or not the submission name should be clickable.
+
+    Returns
+    -------
+    leaderboard : dataframe
+        The leaderboard in a dataframe format.
+    """
+    event = session.query(Event).filter_by(name=event_name).one()
+    map_score_precision = {score_type.name: score_type.precision
+                           for score_type in event.score_types}
+    leaderboard_df = pd.DataFrame()
+    for sub in submissions:
+        row = pd.Series()
+        row['team'] = 'team'
+        row['submission'] = 'submission'
+        row['team'] = sub.team.name
+        row['submission'] = sub.name_with_link if with_links else sub.name
+
+        # bagging returns "learning curves", here we only need the last bag
+        df_scores_bag = get_bagged_scores(session, sub.id)
+        n_bag = df_scores_bag.index.get_level_values('n_bag').max()
+        df_scores_bag = df_scores_bag.loc[(slice(None), n_bag), :]
+        df_scores_bag.index = df_scores_bag.index.droplevel('n_bag')
+        df_scores = get_scores(session, sub.id)
+        df_scores_mean = df_scores.groupby('step').mean()
+        df_scores_std = df_scores.groupby('step').std()
+        for col in df_scores_bag.columns:
+            precision = map_score_precision[col]
+            row['bagged test ' + col] = round(
+                df_scores_bag[col].loc['test'], precision)
+            row['mean test ' + col] = round(
+                df_scores_mean[col].loc['test'], precision)
+            row['std test ' + col] = round(
+                df_scores_std[col].loc['test'], precision + 1)
+            row['bagged valid ' + col] = round(
+                df_scores_bag[col].loc['valid'], precision)
+            row['mean valid ' + col] = round(
+                df_scores_mean[col].loc['valid'], precision)
+            row['std valid ' + col] = round(
+                df_scores_std[col].loc['valid'], precision + 1)
+        row['contributivity'] = int(round(100 * sub.contributivity))
+        row['historical contributivity'] = int(round(
+            100 * sub.historical_contributivity))
 
-    df = df.sort_values(
-        "bag {} {}".format(leaderboard_type, event.official_score_name),
+        df_time = get_time(session, sub.id)
+        df_time = df_time.stack().to_frame()
+        df_time.index = df_time.index.set_names(['fold', 'step'])
+        df_time = df_time.rename(columns={0: 'time'})
+        df_time_mean = df_time.groupby('step').mean()
+
+        row['train time [s]'] = df_time_mean['time'].loc['train'].round()
+        row['valid time [s]'] = df_time_mean['time'].loc['valid'].round()
+        row['test time [s]'] = df_time_mean['time'].loc['test'].round()
+        row['max RAM [MB]'] = get_submission_max_ram(session, sub.id)
+        row['submitted at (UTC)'] = pd.Timestamp(sub.submission_timestamp)
+        leaderboard_df = leaderboard_df.append(row, ignore_index=True)
+        leaderboard_df = leaderboard_df[row.index]  # reordering columns
+
+    # Formatting time and integer columns
+    timestamp_cols = ['submitted at (UTC)']
+    leaderboard_df[timestamp_cols] = leaderboard_df[timestamp_cols].astype(
+        'datetime64[s]')
+    int_cols = ['train time [s]', 'valid time [s]', 'test time [s]',
+                'max RAM [MB]', 'contributivity', 'historical contributivity']
+    leaderboard_df[int_cols] = leaderboard_df[int_cols].astype(int)
+
+    # Sorting according to the official score, best on the top
+    leaderboard_df = leaderboard_df.sort_values(
+        'bagged test {}'.format(event.official_score_name),
         ascending=event.get_official_score_type(session).is_lower_the_better
     )
-
-    # rename the column name for the public leaderboard
-    if leaderboard_type == 'public':
-        df = df.rename(columns={
-            key: value for key, value in zip(score_list, score_order)
-        })
-    return df
+    return leaderboard_df
 
 
 def _compute_competition_leaderboard(session, submissions, leaderboard_type,
@@ -164,18 +201,18 @@ def _compute_competition_leaderboard(session, submissions, leaderboard_type,
     score_type = event.get_official_score_type(session)
     score_name = event.official_score_name
 
-    private_leaderboard = _compute_leaderboard(session, submissions, 'private',
-                                               event_name, with_links=False)
+    private_leaderboard = _compute_private_leaderboard(
+        session, submissions, event_name, with_links=False)
 
     col_selected_private = (['team', 'submission'] +
-                            ['bag private ' + score_name,
-                             'bag public ' + score_name] +
-                            ['train time [s]', 'test time [s]',
+                            ['bagged test ' + score_name,
+                             'bagged valid ' + score_name] +
+                            ['train time [s]', 'valid time [s]',
                              'submitted at (UTC)'])
     leaderboard_df = private_leaderboard[col_selected_private]
     leaderboard_df = leaderboard_df.rename(
-        columns={'bag private ' + score_name: 'private ' + score_name,
-                 'bag public ' + score_name: 'public ' + score_name}
+        columns={'bagged test ' + score_name: 'private ' + score_name,
+                 'bagged valid ' + score_name: 'public ' + score_name}
     )
 
     # select best submission for each team
@@ -226,8 +263,8 @@ def _compute_competition_leaderboard(session, submissions, leaderboard_type,
 
     col_selected = [
         leaderboard_type + ' rank', 'team', 'submission',
-        leaderboard_type + ' ' + score_name, 'train time [s]', 'test time [s]',
-        'submitted at (UTC)'
+        leaderboard_type + ' ' + score_name, 'train time [s]',
+        'valid time [s]', 'submitted at (UTC)'
     ]
     if leaderboard_type == 'private':
         col_selected.insert(1, 'move')
@@ -288,11 +325,12 @@ def get_leaderboard(session, leaderboard_type, event_name, user_name=None,
     if not submissions:
         return None
 
-    if leaderboard_type in ['public', 'private']:
-        df = _compute_leaderboard(
-            session, submissions, leaderboard_type, event_name,
-            with_links=with_links
-        )
+    if leaderboard_type == 'public':
+        df = _compute_public_leaderboard(
+            session, submissions, event_name, with_links=with_links)
+    elif leaderboard_type == 'private':
+        df = _compute_private_leaderboard(
+            session, submissions, event_name, with_links=with_links)
     elif leaderboard_type in ['new', 'failed']:
         columns = ['team',
                    'submission',
diff --git a/ramp-database/ramp_database/tools/tests/test_leaderboard.py b/ramp-database/ramp_database/tools/tests/test_leaderboard.py
index 205d7c69b..c5a6e28bf 100644
--- a/ramp-database/ramp_database/tools/tests/test_leaderboard.py
+++ b/ramp-database/ramp_database/tools/tests/test_leaderboard.py
@@ -191,40 +191,41 @@ def test_get_leaderboard(session_toy_db):
 
     # check the difference between the public and private leaderboard
     assert leaderboard_private.count('<td>') > leaderboard_public.count('<td>')
-    for private_term in ['bag', 'mean', 'std', 'private']:
+    for private_term in ['bagged', 'mean', 'std', 'test time']:
         assert private_term not in leaderboard_public
         assert private_term in leaderboard_private
 
     # check the column name in each leaderboard
     assert """<th>team</th>
       <th>submission</th>
-      <th>bag public acc</th>
-      <th>bag public error</th>
-      <th>bag public nll</th>
-      <th>bag public f1_70</th>
-      <th>bag private acc</th>
-      <th>bag private error</th>
-      <th>bag private nll</th>
-      <th>bag private f1_70</th>
-      <th>mean public acc</th>
-      <th>mean public error</th>
-      <th>mean public nll</th>
-      <th>mean public f1_70</th>
-      <th>mean private acc</th>
-      <th>mean private error</th>
-      <th>mean private nll</th>
-      <th>mean private f1_70</th>
-      <th>std public acc</th>
-      <th>std public error</th>
-      <th>std public nll</th>
-      <th>std public f1_70</th>
-      <th>std private acc</th>
-      <th>std private error</th>
-      <th>std private nll</th>
-      <th>std private f1_70</th>
+      <th>bagged test acc</th>
+      <th>mean test acc</th>
+      <th>std test acc</th>
+      <th>bagged valid acc</th>
+      <th>mean valid acc</th>
+      <th>std valid acc</th>
+      <th>bagged test error</th>
+      <th>mean test error</th>
+      <th>std test error</th>
+      <th>bagged valid error</th>
+      <th>mean valid error</th>
+      <th>std valid error</th>
+      <th>bagged test nll</th>
+      <th>mean test nll</th>
+      <th>std test nll</th>
+      <th>bagged valid nll</th>
+      <th>mean valid nll</th>
+      <th>std valid nll</th>
+      <th>bagged test f1_70</th>
+      <th>mean test f1_70</th>
+      <th>std test f1_70</th>
+      <th>bagged valid f1_70</th>
+      <th>mean valid f1_70</th>
+      <th>std valid f1_70</th>
       <th>contributivity</th>
       <th>historical contributivity</th>
       <th>train time [s]</th>
+      <th>valid time [s]</th>
       <th>test time [s]</th>
       <th>max RAM [MB]</th>
       <th>submitted at (UTC)</th>""" in leaderboard_private
@@ -237,7 +238,7 @@ def test_get_leaderboard(session_toy_db):
       <th>contributivity</th>
       <th>historical contributivity</th>
       <th>train time [s]</th>
-      <th>test time [s]</th>
+      <th>valid time [s]</th>
       <th>max RAM [MB]</th>
       <th>submitted at (UTC)</th>""" in leaderboard_public
     assert """<th>team</th>
@@ -251,7 +252,7 @@ def test_get_leaderboard(session_toy_db):
       <th>submission</th>
       <th>acc</th>
       <th>train time [s]</th>
-      <th>test time [s]</th>
+      <th>valid time [s]</th>
       <th>submitted at (UTC)</th>""" in competition_public
     assert """<th>rank</th>
       <th>move</th>
@@ -259,5 +260,5 @@ def test_get_leaderboard(session_toy_db):
       <th>submission</th>
       <th>acc</th>
       <th>train time [s]</th>
-      <th>test time [s]</th>
+      <th>valid time [s]</th>
       <th>submitted at (UTC)</th>""" in competition_private
diff --git a/ramp-frontend/ramp_frontend/views/leaderboard.py b/ramp-frontend/ramp_frontend/views/leaderboard.py
index 04c142a17..f89f939e9 100644
--- a/ramp-frontend/ramp_frontend/views/leaderboard.py
+++ b/ramp-frontend/ramp_frontend/views/leaderboard.py
@@ -72,7 +72,7 @@ def my_submissions(event_name):
                            leaderboard=leaderboard_html,
                            failed_leaderboard=failed_leaderboard_html,
                            new_leaderboard=new_leaderboard_html,
-                           sorting_column_index=4,
+                           sorting_column_index=2,
                            sorting_direction=sorting_direction,
                            event=event,
                            admin=admin)
@@ -115,7 +115,7 @@ def leaderboard(event_name):
     leaderboard_kwargs = dict(
         leaderboard=leaderboard_html,
         leaderboard_title='Leaderboard',
-        sorting_column_index=4,
+        sorting_column_index=2,
         sorting_direction=sorting_direction,
         event=event
     )
@@ -228,7 +228,7 @@ def private_leaderboard(event_name):
         'leaderboard.html',
         leaderboard_title='Leaderboard',
         leaderboard=leaderboard_html,
-        sorting_column_index=5,
+        sorting_column_index=2,
         sorting_direction=sorting_direction,
         event=event,
         private=True,