-
Notifications
You must be signed in to change notification settings - Fork 3
/
metric.py
249 lines (213 loc) · 8.65 KB
/
metric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import sys
import numpy as np
import pandas as pd
# from functools import wraps
# from sklearn.preprocessing import minmax_scale
DEFAULT_USER_COL = "userID"
DEFAULT_ITEM_COL = "itemID"
DEFAULT_RATING_COL = "rating"
DEFAULT_LABEL_COL = "label"
DEFAULT_RELEVANCE_COL = "relevance"
DEFAULT_TIMESTAMP_COL = "timestamp"
DEFAULT_PREDICTION_COL = "prediction"
DEFAULT_SIMILARITY_COL = "sim"
DEFAULT_ITEM_FEATURES_COL = "features"
DEFAULT_ITEM_SIM_MEASURE = "item_cooccurrence_count"
COL_DICT = {
"col_user": DEFAULT_USER_COL,
"col_item": DEFAULT_ITEM_COL,
"col_rating": DEFAULT_RATING_COL,
"col_prediction": DEFAULT_PREDICTION_COL,
}
# Filtering variables
DEFAULT_K = 10
DEFAULT_THRESHOLD = 10
# Other
SEED = 42
def merge_ranking_true_pred(
rating_true,
rating_pred,
col_user,
col_item,
col_rating,
col_prediction,
relevancy_method,
k=DEFAULT_K,
threshold=DEFAULT_THRESHOLD,
):
"""Filter truth and prediction data frames on common users
Args:
rating_true (pandas.DataFrame): True DataFrame
rating_pred (pandas.DataFrame): Predicted DataFrame
col_user (str): column name for user
col_item (str): column name for item
col_rating (str): column name for rating
col_prediction (str): column name for prediction
relevancy_method (str): method for determining relevancy ['top_k', 'by_threshold', None]. None means that the
top k items are directly provided, so there is no need to compute the relevancy operation.
k (int): number of top k items per user (optional)
threshold (float): threshold of top items per user (optional)
Returns:
pandas.DataFrame, pandas.DataFrame, int: DataFrame of recommendation hits, sorted by `col_user` and `rank`
DataFrmae of hit counts vs actual relevant items per user number of unique user ids
"""
# Make sure the prediction and true data frames have the same set of users
common_users = set(rating_true[col_user]).intersection(set(rating_pred[col_user]))
rating_true_common = rating_true[rating_true[col_user].isin(common_users)]
rating_pred_common = rating_pred[rating_pred[col_user].isin(common_users)]
n_users = len(common_users)
# Return hit items in prediction data frame with ranking information. This is used for calculating NDCG and MAP.
# Use first to generate unique ranking values for each item. This is to align with the implementation in
# Spark evaluation metrics, where index of each recommended items (the indices are unique to items) is used
# to calculate penalized precision of the ordered items.
if relevancy_method == "top_k":
top_k = k
elif relevancy_method == "by_threshold":
top_k = threshold
elif relevancy_method is None:
top_k = None
else:
raise NotImplementedError("Invalid relevancy_method")
df_hit = get_top_k_items(
dataframe=rating_pred_common,
col_user=col_user,
col_rating=col_prediction,
k=top_k,
)
df_hit = pd.merge(df_hit, rating_true_common, on=[col_user, col_item])[
[col_user, col_item, "rank"]
]
# count the number of hits vs actual relevant items per user
df_hit_count = pd.merge(
df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
{"actual": "count"}
),
on=col_user,
)
return df_hit, df_hit_count, n_users
def recall_at_k(
rating_true,
rating_pred,
col_user=DEFAULT_USER_COL,
col_item=DEFAULT_ITEM_COL,
col_rating=DEFAULT_RATING_COL,
col_prediction=DEFAULT_PREDICTION_COL,
relevancy_method="top_k",
k=DEFAULT_K,
threshold=DEFAULT_THRESHOLD,
):
"""Recall at K.
Args:
rating_true (pandas.DataFrame): True DataFrame
rating_pred (pandas.DataFrame): Predicted DataFrame
col_user (str): column name for user
col_item (str): column name for item
col_rating (str): column name for rating
col_prediction (str): column name for prediction
relevancy_method (str): method for determining relevancy ['top_k', 'by_threshold', None]. None means that the
top k items are directly provided, so there is no need to compute the relevancy operation.
k (int): number of top k items per user
threshold (float): threshold of top items per user (optional)
Returns:
float: recall at k (min=0, max=1). The maximum value is 1 even when fewer than
k items exist for a user in rating_true.
"""
df_hit, df_hit_count, n_users = merge_ranking_true_pred(
rating_true=rating_true,
rating_pred=rating_pred,
col_user=col_user,
col_item=col_item,
col_rating=col_rating,
col_prediction=col_prediction,
relevancy_method=relevancy_method,
k=k,
threshold=threshold,
)
if df_hit.shape[0] == 0:
return 0.0
return (df_hit_count["hit"] / df_hit_count["actual"]).sum() / n_users
def ndcg_at_k(
rating_true,
rating_pred,
col_user=DEFAULT_USER_COL,
col_item=DEFAULT_ITEM_COL,
col_rating=DEFAULT_RATING_COL,
col_prediction=DEFAULT_PREDICTION_COL,
relevancy_method="top_k",
k=DEFAULT_K,
threshold=DEFAULT_THRESHOLD,
):
"""Normalized Discounted Cumulative Gain (nDCG).
Info: https://en.wikipedia.org/wiki/Discounted_cumulative_gain
Args:
rating_true (pandas.DataFrame): True DataFrame
rating_pred (pandas.DataFrame): Predicted DataFrame
col_user (str): column name for user
col_item (str): column name for item
col_rating (str): column name for rating
col_prediction (str): column name for prediction
relevancy_method (str): method for determining relevancy ['top_k', 'by_threshold', None]. None means that the
top k items are directly provided, so there is no need to compute the relevancy operation.
k (int): number of top k items per user
threshold (float): threshold of top items per user (optional)
Returns:
float: nDCG at k (min=0, max=1).
"""
df_hit, df_hit_count, n_users = merge_ranking_true_pred(
rating_true=rating_true,
rating_pred=rating_pred,
col_user=col_user,
col_item=col_item,
col_rating=col_rating,
col_prediction=col_prediction,
relevancy_method=relevancy_method,
k=k,
threshold=threshold,
)
if df_hit.shape[0] == 0:
return 0.0
# calculate discounted gain for hit items
df_dcg = df_hit.copy()
# relevance in this case is always 1
df_dcg["dcg"] = 1 / np.log1p(df_dcg["rank"])
# sum up discount gained to get discount cumulative gain
df_dcg = df_dcg.groupby(col_user, as_index=False, sort=False).agg({"dcg": "sum"})
# calculate ideal discounted cumulative gain
df_ndcg = pd.merge(df_dcg, df_hit_count, on=[col_user])
df_ndcg["idcg"] = df_ndcg["actual"].apply(
lambda x: sum(1 / np.log1p(range(1, min(x, k) + 1)))
)
# DCG over IDCG is the normalized DCG
return (df_ndcg["dcg"] / df_ndcg["idcg"]).sum() / n_users
def get_top_k_items(
dataframe, col_user=DEFAULT_USER_COL, col_rating=DEFAULT_RATING_COL, k=DEFAULT_K
):
"""Get the input customer-item-rating tuple in the format of Pandas
DataFrame, output a Pandas DataFrame in the dense format of top k items
for each user.
Note:
If it is implicit rating, just append a column of constants to be
ratings.
Args:
dataframe (pandas.DataFrame): DataFrame of rating data (in the format
customerID-itemID-rating)
col_user (str): column name for user
col_rating (str): column name for rating
k (int or None): number of items for each user; None means that the input has already been
filtered out top k items and sorted by ratings and there is no need to do that again.
Returns:
pandas.DataFrame: DataFrame of top k items for each user, sorted by `col_user` and `rank`
"""
# Sort dataframe by col_user and (top k) col_rating
if k is None:
top_k_items = dataframe
else:
top_k_items = (
dataframe.groupby(col_user, as_index=False)
.apply(lambda x: x.nlargest(k, col_rating))
.reset_index(drop=True)
)
# Add ranks
top_k_items["rank"] = top_k_items.groupby(col_user, sort=False).cumcount() + 1
return top_k_items