-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathw2v_scraper.py
98 lines (67 loc) · 2.4 KB
/
w2v_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import requests
import json
BASE_URL = "https://api.explosion.ai/sense2vec/"
def getmostsimilar(word, sense, n_items = 10):
"""
Retrive most similar words with confidence of similarity
params:
....word : word that needed to be searched
....sense : sence of the word to find similarity, for example -- NOUN <NOTE: DO NOT USE 'AUTO' OR 'auto'>
....n_items : number of most similar words <Default value -- 10>
returns:
....words: A list of tuples (text, confidence_score)
"""
response = get(word, sense)
words = []
for item in response['results'][:n_items]:
words.append((item["text"], item["score"]))
print(words)
return words
def get_with_threshold(word, sense, threshold = 0.75):
"""
Retrive most similar words with confidence more than or equal to that of specified threshold
params:
....word : word that needed to be searched
....sense : sence of the word to find similarity, for example -- NOUN <NOTE: DO NOT USE 'AUTO' OR 'auto'>
....threshold : thershold to get most similar words <Default value : 0.75>
returns:
....words: A list of tuples (text, confidence_score)
"""
response = get(word, sense)
words = []
for item in response['results']:
if item['score'] >= threshold:
words.append((item['text'], item['score']))
print(words)
return words
def get_all(word, sense):
"""
Retrive all possible similar words
params:
....word : word that needed to be searched
....sense : sence of the word to find similarity, for example -- NOUN <NOTE: DO NOT USE 'AUTO' OR 'auto'>
returns:
....words: A list of tuples (text, confidence_score)
"""
response = get(word, sense)
words = []
for item in response['results']:
words.append((item['text'], item['score']))
print(words)
return words
def get(word, sense):
"""
Requests the explosion API server for word2vec similar words
params:
....word : word that needed to be searched
....sense : sence of the word to find similarity, for example -- NOUN <NOTE: DO NOT USE 'AUTO' OR 'auto'>
returns:
....A loaded json response object <type dict>
"""
APPEND_URL = word + "|" + sense
REQUEST_URL = BASE_URL + APPEND_URL
response = requests.get(REQUEST_URL)
response = requests.get(REQUEST_URL)
return json.loads(response.text)
get_with_threshold("Hello", "NOUN")
# get_most_similar_googlenews("hello")