From 20494f6e11b246c269c35027f0182a182f264940 Mon Sep 17 00:00:00 2001 From: MelodicTechno <617179025@qq.com> Date: Fri, 26 Jul 2024 22:48:15 +0800 Subject: [PATCH 1/3] Modify: enable UTF-8 encoding --- .../similarity-analogy_origin.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md b/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md index 93a6cd1f7..91449867e 100644 --- a/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md +++ b/chapter_natural-language-processing-pretraining/similarity-analogy_origin.md @@ -81,7 +81,7 @@ class TokenEmbedding: data_dir = d2l.download_extract(embedding_name) # GloVe website: https://nlp.stanford.edu/projects/glove/ # fastText website: https://fasttext.cc/ - with open(os.path.join(data_dir, 'vec.txt'), 'r') as f: + with open(os.path.join(data_dir, 'vec.txt'), 'r', encoding='utf-8') as f: for line in f: elems = line.rstrip().split(' ') token, elems = elems[0], [float(elem) for elem in elems[1:]] From 7e285a9e63f8da92974469be1905ca8fac65c66b Mon Sep 17 00:00:00 2001 From: MelodicTechno <617179025@qq.com> Date: Fri, 26 Jul 2024 23:10:56 +0800 Subject: [PATCH 2/3] Modify: enable UTF-8 encoding in mxnet & paddle --- d2l/mxnet.py | 2 +- d2l/paddle.py | 2 +- d2l/tensorflow.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/d2l/mxnet.py b/d2l/mxnet.py index 25fc6eaf3..80e6af6d8 100644 --- a/d2l/mxnet.py +++ b/d2l/mxnet.py @@ -2071,7 +2071,7 @@ def _load_embedding(self, embedding_name): data_dir = d2l.download_extract(embedding_name) # GloVe网站:https://nlp.stanford.edu/projects/glove/ # fastText网站:https://fasttext.cc/ - with open(os.path.join(data_dir, 'vec.txt'), 'r') as f: + with open(os.path.join(data_dir, 'vec.txt'), 'r', encoding='utf-8') as f: for line in f: elems = line.rstrip().split(' ') token, elems = elems[0], [float(elem) for elem in elems[1:]] diff --git a/d2l/paddle.py b/d2l/paddle.py index 6c5813aeb..fc03a9a8a 100644 --- a/d2l/paddle.py +++ b/d2l/paddle.py @@ -2201,7 +2201,7 @@ def _load_embedding(self, embedding_name): data_dir = d2l.download_extract(embedding_name) # GloVe网站:https://nlp.stanford.edu/projects/glove/ # fastText网站:https://fasttext.cc/ - with open(os.path.join(data_dir, 'vec.txt'), 'r') as f: + with open(os.path.join(data_dir, 'vec.txt'), 'r' , encoding='utf-8') as f: for line in f: elems = line.rstrip().split(' ') token, elems = elems[0], [float(elem) for elem in elems[1:]] diff --git a/d2l/tensorflow.py b/d2l/tensorflow.py index 85705dceb..7b8895388 100644 --- a/d2l/tensorflow.py +++ b/d2l/tensorflow.py @@ -1,4 +1,4 @@ -DATA_HUB = dict() +gitDATA_HUB = dict() DATA_URL = 'http://d2l-data.s3-accelerate.amazonaws.com/' import numpy as np From 53da05a4bcc8d8cfea5432a4c9b5e4ad3c07ce33 Mon Sep 17 00:00:00 2001 From: MelodicTechno <617179025@qq.com> Date: Fri, 26 Jul 2024 23:13:12 +0800 Subject: [PATCH 3/3] delete the 'git' that added accidentally --- d2l/tensorflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/d2l/tensorflow.py b/d2l/tensorflow.py index 7b8895388..85705dceb 100644 --- a/d2l/tensorflow.py +++ b/d2l/tensorflow.py @@ -1,4 +1,4 @@ -gitDATA_HUB = dict() +DATA_HUB = dict() DATA_URL = 'http://d2l-data.s3-accelerate.amazonaws.com/' import numpy as np