dmlc · leezu · Aug 5, 2019 · Aug 3, 2019 · Aug 3, 2019 · Aug 3, 2019
@@ -17,5 +17,5 @@ dependencies:
   - regex
   - pip:
     - pylint-quotes<0.2
-    - mxnet-mkl>=1.4.1
+    - mxnet-mkl==1.4.1
     - sentencepiece<0.2
@@ -17,6 +17,6 @@ dependencies:
   - regex
   - pip:
     - pylint-quotes<0.2
-    - mxnet-mkl>=1.5.0b20190407
+    - mxnet-mkl==1.4.1
     - sacremoses
     - sentencepiece<0.2
@@ -27,7 +27,7 @@ dependencies:
   - regex
   - pip:
     - pylint-quotes<0.2
-    - mxnet-mkl>=1.4.1
+    - mxnet-mkl==1.4.1
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/mli/mx-theme/tarball/v0.3.9

@@ -27,7 +27,7 @@ dependencies:
   - regex
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu101mkl>=1.4.1
+    - mxnet-cu101mkl==1.4.1
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/mli/mx-theme/tarball/v0.3.9

@@ -17,5 +17,5 @@ dependencies:
   - regex
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu92mkl>=1.4.1
+    - mxnet-cu101mkl==1.4.1
     - sentencepiece<0.2
@@ -27,7 +27,7 @@ dependencies:
   - regex
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu92mkl>=1.5.0b20190407
+    - mxnet-cu101mkl==1.4.1
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/mli/mx-theme/tarball/v0.3.9

@@ -27,7 +27,7 @@ dependencies:
   - regex
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu92mkl>=1.4.1
+    - mxnet-cu101mkl==1.4.1
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/mli/mx-theme/tarball/v0.3.9

@@ -253,7 +253,7 @@ def flatten_numpy(arr):
         # seq_len x batch_size
 
         if is_train or arc_targets is not None:
-            correct = np.equal(arc_preds.asnumpy(), arc_targets)
+            correct = np.equal(arc_preds.asnumpy(), arc_targets)  # pylint: disable=assignment-from-no-return
             arc_correct = correct.astype(np.float32) * mask
             arc_accuracy = np.sum(arc_correct) / num_tokens
             targets_1D = flatten_numpy(arc_targets)

@@ -69,7 +69,7 @@ def evaluate_official_script(parser, vocab, num_buckets_test, test_batch_size,
             gold_arc = gold_arc[1:length + 1]
             gold_rel = gold_rel[1:length + 1]
 
-            arc_mask = np.equal(pred_arc, gold_arc)
+            arc_mask = np.equal(pred_arc, gold_arc)  # pylint: disable=assignment-from-no-return
             uc += np.sum(arc_mask)
             total += length
 

@@ -360,7 +360,7 @@ def test_finetune_inference(dataset):
 @pytest.mark.remote_required
 @pytest.mark.integration
 @pytest.mark.parametrize('dataset', ['XNLI', 'LCQMC', 'ChnSentiCorp'])
-@pytest.mark.skipif(datetime.date.today() < datetime.date(2019, 7, 18),
+@pytest.mark.skipif(datetime.date.today() < datetime.date(2019, 8, 18),
                     reason='Disabled for 4 weeks due to DNS error.')
 def test_finetune_chinese_inference(dataset):
     arguments = ['--log_interval', '100', '--epsilon', '1e-8', '--optimizer',

@@ -234,6 +234,8 @@ def __init__(self, counter=None, max_size=None, min_freq=1, unknown_token=C.UNK_
 
         if token_to_idx:
             self._sort_index_according_to_user_specification(token_to_idx)
+            if unknown_token:
+                self._token_to_idx._default = self._token_to_idx[unknown_token]
 
 
     def _index_counter_keys(self, counter, unknown_token, special_tokens, max_size,

@@ -176,7 +176,7 @@ def test_pretrained_bert_models(disable_missing_parameters):
             assert len(vocab) == vocab_size[dataset]
             for token in special_tokens:
                 assert token in vocab, "Token %s not found in the vocab" % token
-            assert vocab['RandomWordByHaibin'] == 0
+            assert vocab['RandomWordByHaibin'] == vocab[vocab.unknown_token]
             assert vocab.padding_token == '[PAD]'
             assert vocab.unknown_token == '[UNK]'
             assert vocab.bos_token is None

@@ -1442,3 +1442,20 @@ def test_vocab_backwards_compatibility_prior_v0_7_corrupted_index_bug():
     assert v.idx_to_token[2] == '<bos>'
     assert v.idx_to_token[3] == '<eos>'
     assert v.idx_to_token[4] == 'token'
+
+
+@pytest.mark.parametrize('unknown_token', ['<unk>', '<UNK>'])
+@pytest.mark.parametrize('padding_token', ['<pad>', '<eos>', None])
+@pytest.mark.parametrize('eos_token', ['<eos>', None])
+@pytest.mark.parametrize('reserved_tokens', [['<tok>'], []])
+def test_vocab_remapped_unknown_token_idx(unknown_token, padding_token, eos_token, reserved_tokens,
+                                          counter):
+    Vocab = functools.partial(nlp.Vocab, counter, max_size=None, min_freq=1,
+                              unknown_token=unknown_token, padding_token=padding_token,
+                              bos_token=None, eos_token=eos_token)
+
+    v = Vocab()
+    assert v['UNKNOWNWORD'] == 0
+
+    v = Vocab(token_to_idx={unknown_token: 1})
+    assert v['UNKNOWNWORD'] == 1