-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
45 lines (32 loc) · 1.53 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import itertools
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from get_locations import load_and_preprocess_locations
class CharCountVectorizer(CountVectorizer):
def __init__(self, n_gram, n_gram_upper=None):
super().__init__(analyzer="char", ngram_range=(n_gram, n_gram if n_gram_upper is None else n_gram_upper))
def transform(self, raw_documents):
return super().transform(raw_documents).toarray()
def fit_transform(self, raw_documents, y=None):
return super().fit_transform(raw_documents, y).toarray()
class Suggester:
def __init__(self, pipeline, out_size=10):
self.pipeline = pipeline
self.out_size = out_size
self.locations = load_and_preprocess_locations()
self.X = pipeline.fit_transform(self.locations)
def __call__(self, word):
return list(itertools.islice(closest_locations_by_vector_distance(self.X, word, self.pipeline, self.locations),
self.out_size))
def suggest_locations(pipeline):
locations = load_and_preprocess_locations()
X = pipeline.fit_transform(locations)
suggestions = None
while True:
word = yield suggestions
suggestions = list(itertools.islice(closest_locations_by_vector_distance(X, word, pipeline, locations), 20))
def closest_locations_by_vector_distance(X, word, transformer, locations):
x_test = transformer.transform([word])
d = np.linalg.norm(x_test - X, axis=1)
for idx in np.argsort(d):
yield locations[idx]