-
Notifications
You must be signed in to change notification settings - Fork 900
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Hai Liang Wang
committed
Sep 21, 2020
1 parent
1fdfaae
commit 0dbe1ec
Showing
9 changed files
with
493 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,4 +12,4 @@ synonyms.egg-info | |
.vscode/ | ||
build/ | ||
.env | ||
synonyms/data/words.vector | ||
synonyms/data/words.vector* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,34 @@ | ||
# synonyms 分数评测 [(v3.11.0)](https://pypi.python.org/pypi/synonyms/3.11.0) | ||
| 词1 | 词2 | synonyms | 人工评定 | | ||
| --- | --- | --- | --- | | ||
| 轿车 | 汽车 | 0.892 | 0.98 | | ||
| 宝石 | 宝物 | 1.0 | 0.96 | | ||
| 旅游 | 游历 | 0.649 | 0.96 | | ||
| 男孩子 | 小伙子 | 0.77 | 0.94 | | ||
| 海岸 | 海滨 | 0.889 | 0.925 | | ||
| 庇护所 | 精神病院 | 0.211 | 0.9025 | | ||
| 魔术师 | 巫师 | 0.95 | 0.875 | | ||
| 中午 | 正午 | 0.9 | 0.855 | | ||
| 火炉 | 炉灶 | 0.889 | 0.7775 | | ||
| 食物 | 水果 | 0.363 | 0.77 | | ||
| 鸟 | 公鸡 | 0.895 | 0.7625 | | ||
| 鸟 | 鹤 | 1.0 | 0.7425 | | ||
| 工具 | 器械 | 0.881 | 0.7375 | | ||
| 兄弟 | 和尚 | 0.139 | 0.705 | | ||
| 起重机 | 器械 | 0.195 | 0.42 | | ||
| 小伙子 | 兄弟 | 0.703 | 0.415 | | ||
| 旅行 | 轿车 | 0.088 | 0.29 | | ||
| 和尚 | 圣贤 | 0.222 | 0.275 | | ||
| 墓地 | 林地 | 0.874 | 0.2375 | | ||
| 食物 | 公鸡 | 0.151 | 0.2225 | | ||
| 海岸 | 丘陵 | 0.248 | 0.2175 | | ||
| 森林 | 墓地 | 0.14 | 0.21 | | ||
| 岸边 | 林地 | 0.193 | 0.1575 | | ||
| 和尚 | 奴隶 | 0.059 | 0.1375 | | ||
| 海岸 | 森林 | 0.23 | 0.105 | | ||
| 小伙子 | 巫师 | 0.182 | 0.105 | | ||
| 琴弦 | 微笑 | 0.089 | 0.0325 | | ||
| 玻璃 | 魔术师 | 0.02 | 0.0275 | | ||
| 中午 | 绳子 | 0.049 | 0.02 | | ||
| 公鸡 | 航行 | 0.0 | 0.02 | | ||
# synonyms 分数评测 [(v3.12.0)](https://pypi.python.org/pypi/synonyms/3.12.0) | ||
|
||
| 词 1 | 词 2 | synonyms | 人工评定 | | ||
| ------ | -------- | -------- | -------- | | ||
| 轿车 | 汽车 | 0.892 | 0.98 | | ||
| 宝石 | 宝物 | 1.0 | 0.96 | | ||
| 旅游 | 游历 | 0.649 | 0.96 | | ||
| 男孩子 | 小伙子 | 0.77 | 0.94 | | ||
| 海岸 | 海滨 | 0.889 | 0.925 | | ||
| 庇护所 | 精神病院 | 0.211 | 0.9025 | | ||
| 魔术师 | 巫师 | 0.95 | 0.875 | | ||
| 中午 | 正午 | 0.9 | 0.855 | | ||
| 火炉 | 炉灶 | 0.889 | 0.7775 | | ||
| 食物 | 水果 | 0.363 | 0.77 | | ||
| 鸟 | 公鸡 | 0.895 | 0.7625 | | ||
| 鸟 | 鹤 | 1.0 | 0.7425 | | ||
| 工具 | 器械 | 0.881 | 0.7375 | | ||
| 兄弟 | 和尚 | 0.139 | 0.705 | | ||
| 起重机 | 器械 | 0.195 | 0.42 | | ||
| 小伙子 | 兄弟 | 0.703 | 0.415 | | ||
| 旅行 | 轿车 | 0.088 | 0.29 | | ||
| 和尚 | 圣贤 | 0.222 | 0.275 | | ||
| 墓地 | 林地 | 0.874 | 0.2375 | | ||
| 食物 | 公鸡 | 0.151 | 0.2225 | | ||
| 海岸 | 丘陵 | 0.248 | 0.2175 | | ||
| 森林 | 墓地 | 0.14 | 0.21 | | ||
| 岸边 | 林地 | 0.193 | 0.1575 | | ||
| 和尚 | 奴隶 | 0.059 | 0.1375 | | ||
| 海岸 | 森林 | 0.23 | 0.105 | | ||
| 小伙子 | 巫师 | 0.182 | 0.105 | | ||
| 琴弦 | 微笑 | 0.089 | 0.0325 | | ||
| 玻璃 | 魔术师 | 0.02 | 0.0275 | | ||
| 中午 | 绳子 | 0.049 | 0.02 | | ||
| 公鸡 | 航行 | 0.0 | 0.02 | |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,7 @@ | |
__copyright__ = "Copyright (c) (2017-2020) Chatopera Inc. All Rights Reserved" | ||
__author__ = "Hu Ying Xi<>, Hai Liang Wang<[email protected]>" | ||
__date__ = "2017-09-27" | ||
__version__ = "3.11.0" | ||
__version__ = "3.12.0" | ||
|
||
import os | ||
import sys | ||
|
@@ -56,6 +56,7 @@ | |
from .utils import is_digit | ||
import jieba | ||
from .jieba import posseg as _tokenizer | ||
import wget | ||
|
||
''' | ||
globals | ||
|
@@ -119,19 +120,28 @@ def _segment_words(sen): | |
word embedding | ||
''' | ||
# vectors | ||
_f_model = os.path.join(curdir, 'data', 'words.vector') | ||
_f_url = os.environ.get("SYNONYMS_WORD2VEC_BIN_URL_ZH_CN", "https://static-public.chatopera.com/ml/synonyms/words.vector.gz") | ||
_f_model = os.path.join(curdir, 'data', 'words.vector.gz') | ||
_download_model = not os.path.exists(_f_model) | ||
if "SYNONYMS_WORD2VEC_BIN_MODEL_ZH_CN" in ENVIRON: | ||
_f_model = ENVIRON["SYNONYMS_WORD2VEC_BIN_MODEL_ZH_CN"] | ||
_download_model = False | ||
|
||
def _load_w2v(model_file=_f_model, binary=True): | ||
''' | ||
load word2vec model | ||
''' | ||
if not os.path.exists(model_file): | ||
print("os.path : ", os.path) | ||
if not os.path.exists(model_file) and _download_model: | ||
print("\n[Synonyms] downloading data from %s to %s ... \n this only happens if SYNONYMS_WORD2VEC_BIN_URL_ZH_CN is not present and Synonyms initialization for the first time. \n It would take minutes that depends on network." % (_f_url, model_file)) | ||
wget.download(_f_url, out = model_file) | ||
print("\n[Synonyms] download is done.\n") | ||
elif not os.path.exists(model_file): | ||
print("[Synonyms] os.path : ", os.path) | ||
raise Exception("Model file [%s] does not exist." % model_file) | ||
|
||
return KeyedVectors.load_word2vec_format( | ||
model_file, binary=binary, unicode_errors='ignore') | ||
print(">> Synonyms on loading vectors [%s] ..." % _f_model) | ||
print("[Synonyms] on loading vectors [%s] ..." % _f_model) | ||
_vectors = _load_w2v(model_file=_f_model) | ||
|
||
def _get_wv(sentence, ignore=False): | ||
|
Oops, something went wrong.