-
Notifications
You must be signed in to change notification settings - Fork 323
/
calculate_metric.py
51 lines (40 loc) · 1.26 KB
/
calculate_metric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Compute the distance between two stores based on the definition in the paper.
import pickle
import random
import numpy
f = open('feature_train_data.pickle', 'rb')
(X, y) = pickle.load(f)
dictlist = [{} for _ in range(1115)]
for feature, sale in zip(X, y):
store = feature[1]
dictlist[store][tuple(feature[2:7])] = sale
with open("embeddings.pickle", 'rb') as f:
embeddings = pickle.load(f)
store_embeddings = embeddings[0]
def distance(store_pairs, dictlist):
'''Distance as defined in the paper'''
absdiffs = []
a, b = store_pairs
for key in dictlist[a]:
if key in dictlist[b]:
absdiffs.append(abs(dictlist[a][key] - dictlist[b][key]))
return sum(absdiffs) / float(len(absdiffs))
def embed_distance(store_pairs, em):
'''Distance in the embedding space'''
a, b = store_pairs
a_vec = em[a]
b_vec = em[b]
return(numpy.linalg.norm(a_vec - b_vec))
# Generate n random store pairs
n = 10000
pairs = set()
while len(pairs) < n:
a, b = random.sample(range(1115), 2)
if a < b:
pairs.add((a, b))
# Calcuate distances
with open('distances.csv', 'w') as f:
for pair in pairs:
d = distance(pair, dictlist)
d_em = embed_distance(pair, store_embeddings)
print(d, d_em, file=f)