Skip to content

Commit

Permalink
Merge pull request #1536 from microsoft/andreas/doc_fixes
Browse files Browse the repository at this point in the history
Fix docstrings in evaluation
  • Loading branch information
anargyri authored Sep 22, 2021
2 parents bd6c48f + c0fc991 commit 09e31a5
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 18 deletions.
26 changes: 11 additions & 15 deletions recommenders/evaluation/python_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ def merge_ranking_true_pred(
Returns:
pandas.DataFrame, pandas.DataFrame, int: DataFrame of recommendation hits, sorted by `col_user` and `rank`
DataFrmae of hit counts vs actual relevant items per user number of unique user ids
DataFrame of hit counts vs actual relevant items per user number of unique user ids
"""

# Make sure the prediction and true data frames have the same set of users
Expand Down Expand Up @@ -1070,19 +1070,17 @@ def user_diversity(
Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist:
introducing serendipity into music recommendation, WSDM 2012
Args:
train_df (pandas.DataFrame): Data set with historical data for users and items they
have interacted with; contains col_user, col_item. Assumed to not contain any duplicate rows.
reco_df (pandas.DataFrame): Recommender's prediction output, containing col_user, col_item,
col_relevance (optional). Assumed to not contain any duplicate user-item pairs.
Args:
train_df (pandas.DataFrame): Data set with historical data for users and items they have interacted with; contains col_user, col_item. Assumed to not contain any duplicate rows.
reco_df (pandas.DataFrame): Recommender's prediction output, containing col_user, col_item, col_relevance (optional). Assumed to not contain any duplicate user-item pairs.
item_feature_df (pandas.DataFrame): (Optional) It is required only when item_sim_measure='item_feature_vector'. It contains two columns: col_item and features (a feature vector).
item_sim_measure (str): (Optional) This column indicates which item similarity measure to be used. Available measures include item_cooccurrence_count (default choice) and item_feature_vector.
col_item_features (str): item feature column name.
col_user (str): User id column name.
col_item (str): Item id column name.
col_sim (str): This column indicates the column name for item similarity.
col_relevance (str): This column indicates whether the recommended item is actually
relevant to the user or not.
col_relevance (str): This column indicates whether the recommended item is actually relevant to the user or not.
Returns:
pandas.DataFrame: A dataframe with the following columns: col_user, user_diversity.
"""
Expand Down Expand Up @@ -1122,19 +1120,17 @@ def diversity(
):
"""Calculate average diversity of recommendations across all users.
Args:
train_df (pandas.DataFrame): Data set with historical data for users and items they
have interacted with; contains col_user, col_item. Assumed to not contain any duplicate rows.
reco_df (pandas.DataFrame): Recommender's prediction output, containing col_user, col_item,
col_relevance (optional). Assumed to not contain any duplicate user-item pairs.
Args:
train_df (pandas.DataFrame): Data set with historical data for users and items they have interacted with; contains col_user, col_item. Assumed to not contain any duplicate rows.
reco_df (pandas.DataFrame): Recommender's prediction output, containing col_user, col_item, col_relevance (optional). Assumed to not contain any duplicate user-item pairs.
item_feature_df (pandas.DataFrame): (Optional) It is required only when item_sim_measure='item_feature_vector'. It contains two columns: col_item and features (a feature vector).
item_sim_measure (str): (Optional) This column indicates which item similarity measure to be used. Available measures include item_cooccurrence_count (default choice) and item_feature_vector.
col_item_features (str): item feature column name.
col_user (str): User id column name.
col_item (str): Item id column name.
col_sim (str): This column indicates the column name for item similarity.
col_relevance (str): This column indicates whether the recommended item is actually
relevant to the user or not.
col_relevance (str): This column indicates whether the recommended item is actually relevant to the user or not.
Returns:
float: diversity.
"""
Expand Down
5 changes: 2 additions & 3 deletions recommenders/evaluation/spark_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def exp_var(self):


class SparkRankingEvaluation:
"""SparkRankingEvaluation"""
"""Spark Ranking Evaluator"""

def __init__(
self,
Expand Down Expand Up @@ -487,7 +487,7 @@ def _get_relevant_items_by_timestamp(


class SparkDiversityEvaluation:
"""Spark Diversity Evaluator"""
"""Spark Evaluator for diversity, coverage, novelty, serendipity"""

def __init__(
self,
Expand All @@ -508,7 +508,6 @@ def __init__(
1. catalog_coverage, which measures the proportion of items that get recommended from the item catalog;
2. distributional_coverage, which measures how unequally different items are recommended in the
recommendations to all users.
* Novelty - A more novel item indicates it is less popular, i.e. it gets recommended less frequently.
* Diversity - The dissimilarity of items being recommended.
* Serendipity - The "unusualness" or "surprise" of recommendations to a user. When 'col_relevance' is used, it indicates how "pleasant surprise" of recommendations is to a user.
Expand Down

0 comments on commit 09e31a5

Please sign in to comment.