diff --git a/env/cpu/py2.yml b/env/cpu/py2.yml
deleted file mode 100644
index a41131b0e2..0000000000
--- a/env/cpu/py2.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-channels:
-  - conda-forge
-dependencies:
-  - python=2.7
-  - pip=18.1
-  - perl
-  - pylint=1.9.2
-  - flake8
-  - sphinx=1.7.7
-  - spacy
-  - nltk
-  - pytest=4.5.0
-  - flaky=3.5.3
-  - pytest-cov=2.7.1
-  - mock<3
-  - pytest-xdist<2
-  - regex
-  - pip:
-    - pylint-quotes<0.2
-    - mxnet-mkl>=1.4.1
-    - sentencepiece<0.2
diff --git a/env/cpu/py3-master.yml b/env/cpu/py3-master.yml
index 758078eb95..e112cded68 100644
--- a/env/cpu/py3-master.yml
+++ b/env/cpu/py3-master.yml
@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy
@@ -16,7 +16,7 @@ dependencies:
   - pytest-xdist<2
   - regex
   - pip:
-    - pylint-quotes<0.2
+    - pylint-quotes==0.2.1
     - mxnet-mkl>=1.5.0b20190407
     - sacremoses
     - sentencepiece<0.2
diff --git a/env/cpu/py3.yml b/env/cpu/py3.yml
index e3e14cc31b..35701d0afa 100644
--- a/env/cpu/py3.yml
+++ b/env/cpu/py3.yml
@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy
@@ -26,7 +26,7 @@ dependencies:
   - ipykernel
   - regex
   - pip:
-    - pylint-quotes<0.2
+    - pylint-quotes==0.2.1
     - mxnet-mkl>=1.4.1
     - sacremoses
     - sentencepiece<0.2
diff --git a/env/gpu/py2.yml b/env/gpu/py2.yml
deleted file mode 100644
index 2e8b04bb4a..0000000000
--- a/env/gpu/py2.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-channels:
-  - conda-forge
-dependencies:
-  - python=2.7
-  - pip=18.1
-  - perl
-  - pylint=1.9.2
-  - flake8
-  - sphinx=1.7.7
-  - spacy
-  - nltk
-  - pytest=4.5.0
-  - flaky=3.5.3
-  - pytest-cov=2.7.1
-  - mock<3
-  - pytest-xdist<2
-  - regex
-  - pip:
-    - pylint-quotes<0.2
-    - mxnet-cu92mkl>=1.4.1
-    - sentencepiece<0.2
diff --git a/env/gpu/py3-master.yml b/env/gpu/py3-master.yml
index 84669b83d1..e5e1bfef18 100644
--- a/env/gpu/py3-master.yml
+++ b/env/gpu/py3-master.yml
@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy
diff --git a/env/gpu/py3.yml b/env/gpu/py3.yml
index 536d1ec51f..6643387782 100644
--- a/env/gpu/py3.yml
+++ b/env/gpu/py3.yml
@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy
@@ -26,7 +26,7 @@ dependencies:
   - ipykernel
   - regex
   - pip:
-    - pylint-quotes<0.2
+    - pylint-quotes==0.2.1
     - mxnet-cu92mkl>=1.4.1
     - sacremoses
     - sentencepiece<0.2
diff --git a/scripts/bert/create_pretraining_data.py b/scripts/bert/create_pretraining_data.py
index f1230e4608..6d81cdcbe0 100644
--- a/scripts/bert/create_pretraining_data.py
+++ b/scripts/bert/create_pretraining_data.py
@@ -33,7 +33,7 @@
 from gluonnlp.data import BERTTokenizer
 
 
-class TrainingInstance(object):
+class TrainingInstance:
     """A single training instance (sentence pair)."""
 
     def __init__(self, tokens, segment_ids, masked_lm_positions,
diff --git a/scripts/bert/data/classification.py b/scripts/bert/data/classification.py
index 3a8443a8b7..5549e06522 100644
--- a/scripts/bert/data/classification.py
+++ b/scripts/bert/data/classification.py
@@ -30,7 +30,7 @@
     from baidu_ernie_data import BaiduErnieXNLI, BaiduErnieLCQMC, BaiduErnieChnSentiCorp
 
 
-class GlueTask(object):
+class GlueTask:
     """Abstract GLUE task class.
 
     Parameters
diff --git a/scripts/bert/data/dataloader.py b/scripts/bert/data/dataloader.py
index 2bb890ffca..594955908c 100644
--- a/scripts/bert/data/dataloader.py
+++ b/scripts/bert/data/dataloader.py
@@ -24,7 +24,7 @@
 import multiprocessing
 from gluonnlp.data.stream import _PathDataset
 
-class DatasetFn(object):
+class DatasetFn:
     """Callable object to generate a gluon.data.Dataset given a url.
 
     Subclasses should override the __call__ method.
@@ -32,7 +32,7 @@ class DatasetFn(object):
     def __call__(self, dataset_url):
         raise NotImplementedError
 
-class SamplerFn(object):
+class SamplerFn:
     """Callable object to generate a gluon.data.sampler.Sampler given a dataset.
 
     Subclasses should override the __call__ method.
@@ -40,7 +40,7 @@ class SamplerFn(object):
     def __call__(self, dataset):
         raise NotImplementedError
 
-class DataLoaderFn(object):
+class DataLoaderFn:
     """Callable object to generate a DataLoader object given a dataset and sampler.
 
     Subclasses should override the __call__ method.
@@ -48,7 +48,7 @@ class DataLoaderFn(object):
     def __call__(self, dataset, sampler):
         raise NotImplementedError
 
-class SimpleDataLoaderFn(object):
+class SimpleDataLoaderFn:
     """A simple callable object that geneartes a data loader by applying
     dataloader_cls(dataset, batch_sampler=sampler, **dataset_params)
     """
@@ -77,7 +77,7 @@ def _worker_fn(url, dataset_fn, sampler_fn):
     sampler = sampler_fn(dataset)
     return (dataset, sampler)
 
-class _MultiWorkerIter(object):
+class _MultiWorkerIter:
     """Internal multi-worker iterator for DataLoader."""
     def __init__(self, worker_pool, worker_fn, dataset, file_sampler,
                  dataset_fn, sampler_fn, dataloader_fn, prefetch):
@@ -165,7 +165,7 @@ def __iter__(self):
         return self
 
 
-class DatasetLoader(object):
+class DatasetLoader:
     """Loads data from a list of datasets and returns mini-batches of data.
 
     One dataset is loaded at a time.
diff --git a/scripts/bert/data/ner.py b/scripts/bert/data/ner.py
index 0de373db04..801d2d85c5 100644
--- a/scripts/bert/data/ner.py
+++ b/scripts/bert/data/ner.py
@@ -188,7 +188,7 @@ def load_segment(file_path, bert_tokenizer):
     return subword_sentences
 
 
-class BERTTaggingDataset(object):
+class BERTTaggingDataset:
     """
 
     Parameters
diff --git a/scripts/bert/data/qa.py b/scripts/bert/data/qa.py
index 4e335c756e..27ae89397b 100644
--- a/scripts/bert/data/qa.py
+++ b/scripts/bert/data/qa.py
@@ -23,7 +23,7 @@
 
 __all__ = ['SQuADTransform', 'preprocess_dataset']
 
-class SquadExample(object):
+class SquadExample:
     """A single training/test example for SQuAD question.
 
        For examples without an answer, the start and end position are -1.
@@ -86,7 +86,7 @@ def preprocess_dataset(dataset, transform, num_workers=8):
     return dataset, dataset_len
 
 
-class SQuADFeature(object):
+class SQuADFeature:
     """Single feature of a single example transform of the SQuAD question.
 
     """
@@ -120,7 +120,7 @@ def __init__(self,
         self.is_impossible = is_impossible
 
 
-class SQuADTransform(object):
+class SQuADTransform:
     """Dataset Transformation for BERT-style QA.
 
     The transformation is processed in the following steps:
diff --git a/scripts/bert/data/transform.py b/scripts/bert/data/transform.py
index 35966b017b..07acfeaea1 100644
--- a/scripts/bert/data/transform.py
+++ b/scripts/bert/data/transform.py
@@ -21,7 +21,7 @@
 import numpy as np
 from gluonnlp.data import BERTSentenceTransform
 
-class BERTDatasetTransform(object):
+class BERTDatasetTransform:
     """Dataset transformation for BERT-style sentence classification or regression.
 
     Parameters
diff --git a/scripts/bert/embedding.py b/scripts/bert/embedding.py
index ad7ed41c0a..f680583a3a 100644
--- a/scripts/bert/embedding.py
+++ b/scripts/bert/embedding.py
@@ -55,7 +55,7 @@ def to_unicode(s):
 logger = logging.getLogger(__name__)
 
 
-class BertEmbedding(object):
+class BertEmbedding:
     """
     Encoding from BERT model.
 
diff --git a/scripts/bert/fp16_utils.py b/scripts/bert/fp16_utils.py
index b7651adc46..ec5a15d121 100644
--- a/scripts/bert/fp16_utils.py
+++ b/scripts/bert/fp16_utils.py
@@ -107,7 +107,7 @@ def group_by_ctx(arr_list):
     return total_norm, chosen_scale, is_finite
 
 
-class FP16Trainer(object):
+class FP16Trainer:
     """ Trainer for mixed precision training.
 
     Parameters
@@ -182,7 +182,7 @@ def step(self, batch_size, max_norm=None):
         # update scale based on overflow information
         self._scaler.update_scale(overflow)
 
-class LossScaler(object):
+class LossScaler:
     """Abstract loss scaler"""
     def has_overflow(self, params):
         """ detect inf and nan """
@@ -208,7 +208,6 @@ def __init__(self, init_scale=1):
 
     def update_scale(self, overflow):
         """update loss scale"""
-        pass
 
 class DynamicLossScaler(LossScaler):
     """Class that manages dynamic loss scaling.
diff --git a/scripts/bert/pretraining_utils.py b/scripts/bert/pretraining_utils.py
index 6c2ec1c9b2..c5965d684c 100644
--- a/scripts/bert/pretraining_utils.py
+++ b/scripts/bert/pretraining_utils.py
@@ -24,6 +24,7 @@
 import argparse
 import random
 import multiprocessing
+import functools
 
 import numpy as np
 
@@ -258,7 +259,7 @@ def __call__(self, dataset, sampler):
                                     num_workers=self._num_ctxes)
         return dataloader
 
-class BERTLoaderTransform(object):
+class BERTLoaderTransform:
     """Create dataloader for a BERT dataset. """
 
     def __init__(self, use_avg_len, batch_size, shuffle, num_ctxes, num_buckets):
@@ -281,7 +282,8 @@ def get_pretrain_data_npz(data, batch_size, num_ctxes, shuffle, use_avg_len,
         'Number of training files must be greater than the number of partitions. ' \
         'Only found %d files at %s'%(num_files, data)
     split_sampler = nlp.data.SplitSampler(num_files, num_parts=num_parts, part_index=part_idx)
-    stream = nlp.data.SimpleDatasetStream(nlp.data.NumpyDataset, data, split_sampler)
+    NumpyDataset = functools.partial(nlp.data.NumpyDataset, allow_pickle=True)
+    stream = nlp.data.SimpleDatasetStream(NumpyDataset, data, split_sampler)
     stream = nlp.data.PrefetchingStream(stream, worker_type='process')
 
     # create data loader based on the dataset
diff --git a/scripts/machine_translation/bleu.py b/scripts/machine_translation/bleu.py
index a54afc526a..6312afb80e 100644
--- a/scripts/machine_translation/bleu.py
+++ b/scripts/machine_translation/bleu.py
@@ -110,7 +110,7 @@ def _tokenize_mteval_13a(segment):
     return norm
 
 
-class UnicodeRegex(object):
+class UnicodeRegex:
     """Ad-hoc hack to recognize all punctuation and symbols.
     """
     def __init__(self):
diff --git a/scripts/machine_translation/dataprocessor.py b/scripts/machine_translation/dataprocessor.py
index 86f047ff8e..44a032fec5 100644
--- a/scripts/machine_translation/dataprocessor.py
+++ b/scripts/machine_translation/dataprocessor.py
@@ -65,7 +65,7 @@ def _load_cached_dataset(prefix):
         return None
 
 
-class TrainValDataTransform(object):
+class TrainValDataTransform:
     """Transform the machine translation dataset.
 
     Clip source and the target sentences to the maximum length. For the source sentence, append the
@@ -176,7 +176,7 @@ def load_translation_data(dataset, bleu, args):
         fetch_tgt_sentence = lambda src, tgt: tgt.split()
         val_tgt_sentences = list(data_val.transform(fetch_tgt_sentence))
         test_tgt_sentences = list(data_test.transform(fetch_tgt_sentence))
-    elif bleu == '13a' or bleu == 'intl':
+    elif bleu in ('13a', 'intl'):
         fetch_tgt_sentence = lambda src, tgt: tgt
         if dataset == 'WMT2016BPE':
             val_text = nlp.data.WMT2016('newstest2013', src_lang=src_lang, tgt_lang=tgt_lang)
@@ -185,7 +185,7 @@ def load_translation_data(dataset, bleu, args):
             val_text = nlp.data.WMT2014('newstest2013', src_lang=src_lang, tgt_lang=tgt_lang)
             test_text = nlp.data.WMT2014('newstest2014', src_lang=src_lang, tgt_lang=tgt_lang,
                                          full=args.full)
-        elif dataset == 'IWSLT2015' or dataset == 'TOY':
+        elif dataset in ('IWSLT2015', 'TOY'):
             val_text = data_val
             test_text = data_test
         else:
diff --git a/scripts/machine_translation/translation.py b/scripts/machine_translation/translation.py
index 8cb4271cf3..b3d0e97e00 100644
--- a/scripts/machine_translation/translation.py
+++ b/scripts/machine_translation/translation.py
@@ -25,7 +25,7 @@
 import mxnet as mx
 from gluonnlp.model import BeamSearchScorer, BeamSearchSampler
 
-class BeamSearchTranslator(object):
+class BeamSearchTranslator:
     """Beam Search Translator
 
     Parameters
diff --git a/scripts/parsing/common/data.py b/scripts/parsing/common/data.py
index 2032ebbff5..fb9bbbd3f6 100644
--- a/scripts/parsing/common/data.py
+++ b/scripts/parsing/common/data.py
@@ -27,7 +27,7 @@
 from .savable import Savable
 
 
-class ConllWord(object):
+class ConllWord:
     """CoNLL format template, see http://anthology.aclweb.org/W/W06/W06-2920.pdf
 
     Parameters
@@ -76,7 +76,7 @@ def __str__(self):
         return '\t'.join(['_' if v is None else v for v in values])
 
 
-class ConllSentence(object):
+class ConllSentence:
     """A list of ConllWord
 
     Parameters
@@ -365,7 +365,7 @@ def rel_size(self):
         return len(self._id2rel)
 
 
-class DataLoader(object):
+class DataLoader:
     """
     Load CoNLL data
     Adopted from https://github.com/jcyk/Dynet-Biaffine-dependency-parser with some modifications
diff --git a/scripts/parsing/common/k_means.py b/scripts/parsing/common/k_means.py
index 2d8d134ec4..632cfe3481 100755
--- a/scripts/parsing/common/k_means.py
+++ b/scripts/parsing/common/k_means.py
@@ -21,7 +21,7 @@
 import numpy as np
 
 
-class KMeans(object):
+class KMeans:
     """
     Cluster sentences by their lengths
 
@@ -101,7 +101,6 @@ def __init__(self, k, len_cntr):
         # print('%d) Final splits: %s; Final mass: %d' % (i, self._splits, self.get_mass()))
 
         self._reindex()
-        return
 
     def _recenter(self):
         """
diff --git a/scripts/parsing/common/savable.py b/scripts/parsing/common/savable.py
index 29179f1b8c..52bf88814c 100644
--- a/scripts/parsing/common/savable.py
+++ b/scripts/parsing/common/savable.py
@@ -21,7 +21,7 @@
 import pickle
 
 
-class Savable(object):
+class Savable:
     """
     A super class for save/load operations.
     """
diff --git a/scripts/parsing/common/tarjan.py b/scripts/parsing/common/tarjan.py
index 9fe296d715..75a98c6f33 100755
--- a/scripts/parsing/common/tarjan.py
+++ b/scripts/parsing/common/tarjan.py
@@ -78,7 +78,6 @@ def strongconnect(self, v, index, stack):
             w = stack.pop()
             self._onstack[w] = False
             self._SCCs[-1].add(w)
-        return
 
     # ======================
     @property
diff --git a/scripts/parsing/common/utils.py b/scripts/parsing/common/utils.py
index 749502ae67..6d94493112 100644
--- a/scripts/parsing/common/utils.py
+++ b/scripts/parsing/common/utils.py
@@ -33,7 +33,7 @@
 from .tarjan import Tarjan
 
 
-class Progbar(object):
+class Progbar:
     """Progbar class copied from keras (https://github.com/fchollet/keras/)
 
     Displays a progress bar.
diff --git a/scripts/parsing/parser/dep_parser.py b/scripts/parsing/parser/dep_parser.py
index 053aea3c98..73a9b5d3ac 100644
--- a/scripts/parsing/parser/dep_parser.py
+++ b/scripts/parsing/parser/dep_parser.py
@@ -33,7 +33,7 @@
 from scripts.parsing.parser.evaluate import evaluate_official_script
 
 
-class DepParser(object):
+class DepParser:
     """User interfaces for biaffine dependency parser.
 
     It wraps a biaffine model inside, provides training, evaluating and parsing.
diff --git a/scripts/question_answering/data_pipeline.py b/scripts/question_answering/data_pipeline.py
index b454333569..0cd0b8b6ba 100644
--- a/scripts/question_answering/data_pipeline.py
+++ b/scripts/question_answering/data_pipeline.py
@@ -38,7 +38,7 @@
 from gluonnlp.data import SQuAD
 
 
-class SQuADDataPipeline(object):
+class SQuADDataPipeline:
     """Main data processing pipeline class, which encapsulate all preprocessing logic. The class
     process the data in multiprocessing mode using Pool. It can save/load the result of processing,
     but since it happens in a single thread, it is usually faster to just process data from scratch.
@@ -432,7 +432,7 @@ def _partition(mapped_values):
         return partitioned_data.items()
 
 
-class SQuADDataTokenizer(object):
+class SQuADDataTokenizer:
     """SQuAD data tokenizer, that encapsulate the splitting logic of each entry of SQuAD dataset"""
     spacy_tokenizer = nlp.data.SpacyTokenizer()
 
@@ -574,7 +574,7 @@ def _get_token_spans(text, tokens):
         return spans
 
 
-class SQuADDataFilter(object):
+class SQuADDataFilter:
     """Filter an example based on the specified conditions"""
 
     def __init__(self, para_limit, ques_limit, ans_limit):
@@ -612,7 +612,7 @@ def filter(self, example):
                (example['y2s'][0] - example['y1s'][0]) <= self._ans_limit
 
 
-class SQuADAsyncVocabMapper(object):
+class SQuADAsyncVocabMapper:
     """A multiprocessing implementation of a Mapper for tokens counting"""
 
     def __init__(self, iterate_over_example=False):
@@ -663,7 +663,7 @@ def __call__(self, example):
         return list(counter.items())
 
 
-class SQuADAsyncVocabReducer(object):
+class SQuADAsyncVocabReducer:
     """A multiprocessing implementation of a Reducing for tokens counting"""
 
     def run_async(self, items, pool):
@@ -701,7 +701,7 @@ def __call__(self, item):
         return token, sum(counts)
 
 
-class SQuADDataFeaturizer(object):
+class SQuADDataFeaturizer:
     """Class that converts tokenized examples into featurized"""
 
     def __init__(self, word_vocab, char_vocab, para_limit, ques_limit, char_limit,
@@ -892,7 +892,7 @@ def get_record_by_idx(self, rec_idx):
         return self._record_idx_to_record[rec_idx]['rec']
 
 
-class SQuADDataLoaderTransformer(object):
+class SQuADDataLoaderTransformer:
     """Thin wrapper on SQuADQADataset that removed non-numeric values from the record. The output of
     that transformer can be provided to a DataLoader"""
 
diff --git a/scripts/sentiment_analysis/process_data.py b/scripts/sentiment_analysis/process_data.py
index 962935b073..dd9ec892b7 100644
--- a/scripts/sentiment_analysis/process_data.py
+++ b/scripts/sentiment_analysis/process_data.py
@@ -61,7 +61,7 @@ def _load_file(data_name):
 
 
 def _clean_str(string, data_name):
-    if data_name == 'SST-1' or data_name == 'SST-2':
+    if data_name in ('SST-1', 'SST-2'):
         string = re.sub(r'[^A-Za-z0-9(),!?\'\`]', ' ', string)
         string = re.sub(r'\s{2,}', ' ', string)
         return string.strip().lower()
@@ -122,7 +122,7 @@ def _preprocess_dataset(dataset, vocab, max_len):
 
 def load_dataset(data_name):
     """Load sentiment dataset."""
-    if data_name == 'MR' or data_name == 'Subj' or data_name == 'CR' or data_name == 'MPQA':
+    if data_name in ('MR', 'Subj', 'CR', 'MPQA'):
         train_dataset, output_size = _load_file(data_name)
         vocab, max_len = _build_vocab(data_name, train_dataset, [], [])
         train_dataset, train_data_lengths = _preprocess_dataset(train_dataset, vocab, max_len)
diff --git a/scripts/sentiment_analysis/text_cnn.py b/scripts/sentiment_analysis/text_cnn.py
index 662ec44f77..96098890ae 100644
--- a/scripts/sentiment_analysis/text_cnn.py
+++ b/scripts/sentiment_analysis/text_cnn.py
@@ -71,7 +71,7 @@ def init(textCNN, vocab, model_mode, context):
         textCNN.embedding.weight.set_data(vocab.embedding.idx_to_vec)
     if model_mode == 'multichannel':
         textCNN.embedding_extend.weight.set_data(vocab.embedding.idx_to_vec)
-    if model_mode == 'static' or model_mode == 'multichannel':
+    if model_mode in ('static', 'multichannel'):
         # Parameters of textCNN.embedding are not updated during training.
         textCNN.embedding.collect_params().setattr('grad_req', 'null')
     trainer = gluon.Trainer(textCNN.collect_params(), 'adadelta', {'rho':0.95, 'clip_gradient':3})
diff --git a/scripts/text_generation/sequence_sampling.py b/scripts/text_generation/sequence_sampling.py
index 2039c77d03..2373f20767 100644
--- a/scripts/text_generation/sequence_sampling.py
+++ b/scripts/text_generation/sequence_sampling.py
@@ -86,7 +86,7 @@
 
 # Define the decoder function, we use log_softmax to map the output scores to log-likelihoods
 # Also, we transform the layout to NTC
-class LMDecoder(object):
+class LMDecoder:
     def __init__(self, net):
         self.net = net
 
diff --git a/src/gluonnlp/base.py b/src/gluonnlp/base.py
index 7196175699..a34c55a24d 100644
--- a/src/gluonnlp/base.py
+++ b/src/gluonnlp/base.py
@@ -45,9 +45,9 @@ def numba_jitclass(spec):
         # pylint: disable=unused-argument
         return identity
 
-    class NumbaTypes(object):
+    class NumbaTypes:
         """Shim for numba.types"""
-        class NumbaType(object):
+        class NumbaType:
             """Shim for numba.types.type"""
             def __getitem__(self, x):
                 # pylint: disable=unused-argument
diff --git a/src/gluonnlp/data/batchify/batchify.py b/src/gluonnlp/data/batchify/batchify.py
index dad5e5e118..c18728224d 100644
--- a/src/gluonnlp/data/batchify/batchify.py
+++ b/src/gluonnlp/data/batchify/batchify.py
@@ -93,7 +93,7 @@ def _stack_arrs(arrs, use_shared_mem, dtype):
             return mx.nd.array(out, dtype=dtype)
 
 
-class Stack(object):
+class Stack:
     r"""Stack the input data samples to construct the batch.
 
     The N input samples must have the same shape/length and will be stacked to construct a batch.
@@ -157,7 +157,7 @@ def __call__(self, data):
         return _stack_arrs(data, True, self._dtype)
 
 
-class Pad(object):
+class Pad:
     """Return a callable that pads and stacks data.
 
     Parameters
@@ -275,7 +275,7 @@ def __call__(self, data):
             raise NotImplementedError
 
 
-class Tuple(object):
+class Tuple:
     """Wrap multiple batchify functions together. The input functions will be applied
     to the corresponding input fields.
 
@@ -342,7 +342,7 @@ def __call__(self, data):
             ret.append(ele_fn([ele[i] for ele in data]))
         return tuple(ret)
 
-class List(object):
+class List:
     """Simply forward the list of input data.
 
     This is particularly useful when the Dataset contains textual data
diff --git a/src/gluonnlp/data/batchify/embedding.py b/src/gluonnlp/data/batchify/embedding.py
index bd6d247616..22128cfdc5 100644
--- a/src/gluonnlp/data/batchify/embedding.py
+++ b/src/gluonnlp/data/batchify/embedding.py
@@ -39,7 +39,7 @@ def numba_njit(func):
         return func
 
 
-class EmbeddingCenterContextBatchify(object):
+class EmbeddingCenterContextBatchify:
     """Helper to create batches of center and contexts words.
 
     Batches are created lazily on a optionally shuffled version of the Dataset.
diff --git a/src/gluonnlp/data/batchify/language_model.py b/src/gluonnlp/data/batchify/language_model.py
index f908b1f6ef..8e3a4286cd 100644
--- a/src/gluonnlp/data/batchify/language_model.py
+++ b/src/gluonnlp/data/batchify/language_model.py
@@ -32,7 +32,7 @@
 from ..utils import slice_sequence, _slice_pad_length
 from ..stream import DataStream
 
-class CorpusBatchify(object):
+class CorpusBatchify:
     """Transform the dataset into N independent sequences, where N is the batch size.
 
     Parameters
@@ -70,7 +70,7 @@ def __call__(self, data):
                     self._batch_size, -1).T)
 
 
-class CorpusBPTTBatchify(object):
+class CorpusBPTTBatchify:
     """Transform the dataset into batches of numericalized samples, in the way
     that the recurrent states from last batch connects with the current batch
     for each sample.
@@ -154,7 +154,7 @@ def _split_data_label(x):
     return x[:-1, :], x[1:, :]
 
 
-class StreamBPTTBatchify(object):
+class StreamBPTTBatchify:
     """Transform a Stream of CorpusDataset to BPTT batches.
 
     The corpus is transformed into batches of numericalized samples, in the way that the
diff --git a/src/gluonnlp/data/dataloader.py b/src/gluonnlp/data/dataloader.py
index d8c83eaad6..edb213af45 100644
--- a/src/gluonnlp/data/dataloader.py
+++ b/src/gluonnlp/data/dataloader.py
@@ -59,7 +59,7 @@ def _thread_worker_fn(samples, batchify_fn, dataset):
         batch = batchify_fn([dataset[i] for i in samples])
     return batch
 
-class _MultiWorkerIter(object):
+class _MultiWorkerIter:
     """Internal multi-worker iterator for DataLoader."""
     def __init__(self, worker_pool, batchify_fn, batch_sampler, pin_memory=False,
                  worker_fn=_worker_fn, prefetch=0, dataset=None):
@@ -112,7 +112,7 @@ def __iter__(self):
         return self
 
 
-class ShardedDataLoader(object):
+class ShardedDataLoader:
     """Loads data from a dataset and returns mini-batches of data.
 
     Parameters
diff --git a/src/gluonnlp/data/dataset.py b/src/gluonnlp/data/dataset.py
index cab6601ea3..6a67557f0d 100644
--- a/src/gluonnlp/data/dataset.py
+++ b/src/gluonnlp/data/dataset.py
@@ -253,14 +253,16 @@ class NumpyDataset(ArrayDataset):
     ----------
     filename : str
         Path to the .npy or .npz file.
+    kwargs
+        Keyword arguments are passed to np.load.
 
     Properties
     ----------
     keys: list of str or None
         The list of keys loaded from the .npz file.
     """
-    def __init__(self, filename):
-        arrs = np.load(filename)
+    def __init__(self, filename, **kwargs):
+        arrs = np.load(filename, **kwargs)
         keys = None
         data = []
         if filename.endswith('.npy'):
diff --git a/src/gluonnlp/data/glue.py b/src/gluonnlp/data/glue.py
index 8cfbc170d8..c7408da050 100644
--- a/src/gluonnlp/data/glue.py
+++ b/src/gluonnlp/data/glue.py
@@ -657,7 +657,7 @@ def _generate(self, segment):
         download(self._repo_dir() + raw_name, path=raw_path, sha1_hash=raw_hash)
         data_path = os.path.join(self._root, data_name)
 
-        if segment == 'train' or segment == 'dev':
+        if segment in ('train', 'dev'):
             if os.path.isfile(data_path) and check_sha1(data_path, data_hash):
                 return
 
diff --git a/src/gluonnlp/data/sampler.py b/src/gluonnlp/data/sampler.py
index 73d008d0ef..6f839c71ec 100644
--- a/src/gluonnlp/data/sampler.py
+++ b/src/gluonnlp/data/sampler.py
@@ -71,7 +71,7 @@ def _bucket_stats(bucket_sample_ids, seq_lengths):
     return (bucket_average_lengths, bucket_length_stds)
 
 
-class BucketScheme(object):
+class BucketScheme:
     r"""Base class for generating bucket keys."""
     def __call__(self, max_lengths, min_lengths, num_buckets):
         """Generate bucket keys based on the lengths of sequences and number of buckets.
diff --git a/src/gluonnlp/data/stream.py b/src/gluonnlp/data/stream.py
index ffb847d1fb..68150c207e 100644
--- a/src/gluonnlp/data/stream.py
+++ b/src/gluonnlp/data/stream.py
@@ -47,7 +47,7 @@
     'DataStream', 'SimpleDataStream', 'DatasetStream', 'SimpleDatasetStream',
     'PrefetchingStream']
 
-class DataStream(object):
+class DataStream:
     """Abstract Data Stream Interface.
 
     DataStreams are useful to avoid loading big datasets to memory. A
@@ -232,7 +232,7 @@ def __iter__(self):
             yield self._dataset(filename, **self._kwargs)
 
 
-class _Prefetcher(object):
+class _Prefetcher:
     """Internal shared prefetcher logic."""
     _dataq = None  # Data queue transmits prefetched elements
     _controlq = None  # Control queue to instruct thread / process shutdown
@@ -303,8 +303,7 @@ def __next__(self):
             self._controlq.put(None)
             if isinstance(next_error[0], StopIteration):
                 raise StopIteration
-            else:
-                return self._reraise(*next_error)
+            return self._reraise(*next_error)
 
     def _reraise(self, e, tb):
         print('Reraising exception from Prefetcher', file=sys.stderr)
diff --git a/src/gluonnlp/data/transforms.py b/src/gluonnlp/data/transforms.py
index edacadcbfd..fbf6bec31c 100644
--- a/src/gluonnlp/data/transforms.py
+++ b/src/gluonnlp/data/transforms.py
@@ -49,7 +49,7 @@
 from ..base import get_home_dir
 
 
-class ClipSequence(object):
+class ClipSequence:
     """Clip the sequence to have length no more than `length`.
 
     Parameters
@@ -82,7 +82,7 @@ def __call__(self, sample):
         return sample[:min(len(sample), self._length)]
 
 
-class PadSequence(object):
+class PadSequence:
     """Pad the sequence.
 
     Pad the sequence to the given `length` by inserting `pad_val`. If `clip` is set,
@@ -157,7 +157,7 @@ def __call__(self, sample):
                     'mxnet.NDArray, received type=%s' % str(type(sample)))
 
 
-class NLTKMosesTokenizer(object):
+class NLTKMosesTokenizer:
     """Apply the Moses Tokenizer implemented in NLTK.
 
     Users of this class are required to install `NLTK <https://www.nltk.org/install.html>`_
@@ -219,7 +219,7 @@ def __call__(self, sample, return_str=False):
         return self._tokenizer.tokenize(sample, return_str=return_str)
 
 
-class SacreMosesTokenizer(object):
+class SacreMosesTokenizer:
     """Apply the Moses Tokenizer implemented in sacremoses.
 
     Users of this class are required to install
@@ -285,7 +285,7 @@ def __call__(self, sample, return_str=False):
         return self._tokenizer.tokenize(sample, return_str=return_str)
 
 
-class SpacyTokenizer(object):
+class SpacyTokenizer:
     """Apply the Spacy Tokenizer.
 
     Users of this class are required to install `spaCy <https://spacy.io/usage/>`_
@@ -348,7 +348,7 @@ def __call__(self, sample):
         return [tok.text for tok in self._nlp(sample)]
 
 
-class NLTKMosesDetokenizer(object):
+class NLTKMosesDetokenizer:
     r"""Apply the Moses Detokenizer implemented in NLTK.
 
     Users of this class are required to `install NLTK <https://www.nltk.org/install.html>`_
@@ -410,7 +410,7 @@ def __call__(self, sample, return_str=False):
         return self._detokenizer.detokenize(sample, return_str=return_str)
 
 
-class SacreMosesDetokenizer(object):
+class SacreMosesDetokenizer:
     r"""Apply the Moses Detokenizer implemented in sacremoses.
 
     Users of this class are required to `install sacremoses
@@ -490,7 +490,7 @@ def __call__(self, sample, return_str=None):
         return self._detokenizer.detokenize(sample, return_str=ret_str)
 
 
-class JiebaTokenizer(object):
+class JiebaTokenizer:
     r"""Apply the jieba Tokenizer.
 
     Users of this class are required to `install jieba <https://github.com/fxsjy/jieba>`_
@@ -537,11 +537,11 @@ def __call__(self, sample):
         # we use default cutting mode provided by jieba, i.e., accurate mode
         return [
             tok for tok in self._tokenizer.cut(sample)
-            if tok != ' ' and tok != ''
+            if tok not in (' ', '')
         ]
 
 
-class NLTKStanfordSegmenter(object):
+class NLTKStanfordSegmenter:
     r"""Apply the Stanford Chinese Word Segmenter implemented in NLTK.
 
     Users of this class are required to install Java, NLTK and download Stanford Word Segmenter
@@ -651,7 +651,7 @@ def __call__(self, sample):
         return [tok for tok in self._tokenizer.segment(sample).strip().split()]
 
 
-class _SentencepieceProcessor(object):
+class _SentencepieceProcessor:
     def __init__(self, path):
         try:
             import sentencepiece
@@ -774,7 +774,7 @@ def __call__(self, sample):
         return self._processor.DecodePieces(sample)
 
 
-class BERTBasicTokenizer(object):
+class BERTBasicTokenizer:
     r"""Runs basic tokenization
 
     performs invalid character removal (e.g. control chars) and whitespace.
@@ -885,13 +885,10 @@ def _is_chinese_char(self, cp):
         # as is Japanese Hiragana and Katakana. Those alphabets are used to write
         # space-separated words, so they are not treated specially and handled
         # like the all of the other languages.
-        if ((cp >= 0x4E00 and cp <= 0x9FFF) or (cp >= 0x3400 and cp <= 0x4DBF)
-                or (cp >= 0x20000 and cp <= 0x2A6DF)
-                or (cp >= 0x2A700 and cp <= 0x2B73F)
-                or (cp >= 0x2B740 and cp <= 0x2B81F)
-                or (cp >= 0x2B820 and cp <= 0x2CEAF)
-                or (cp >= 0xF900 and cp <= 0xFAFF)
-                or (cp >= 0x2F800 and cp <= 0x2FA1F)):
+        if ((0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF) or (0x20000 <= cp <= 0x2A6DF)
+                or (0x2A700 <= cp <= 0x2B73F) or (0x2B740 <= cp <= 0x2B81F)
+                or (0x2B820 <= cp <= 0x2CEAF) or (0xF900 <= cp <= 0xFAFF)
+                or (0x2F800 <= cp <= 0x2FA1F)):
             return True
 
         return False
@@ -934,10 +931,10 @@ def _is_punctuation(self, char):
         # Characters such as "^", "$", and "`" are not in the Unicode
         # Punctuation class but we treat them as punctuation anyways, for
         # consistency.
-        group0 = cp >= 33 and cp <= 47
-        group1 = cp >= 58 and cp <= 64
-        group2 = cp >= 91 and cp <= 96
-        group3 = cp >= 123 and cp <= 126
+        group0 = 33 <= cp <= 47
+        group1 = 58 <= cp <= 64
+        group2 = 91 <= cp <= 96
+        group3 = 123 <= cp <= 126
         if (group0 or group1 or group2 or group3):
             return True
         cat = unicodedata.category(char)
@@ -963,7 +960,7 @@ def _whitespace_tokenize(self, text):
         return tokens
 
 
-class BERTTokenizer(object):
+class BERTTokenizer:
     r"""End-to-end tokenization for BERT models.
 
     Parameters
@@ -1220,7 +1217,7 @@ def is_first_subword(token):
         return token.startswith(BERTSPTokenizer._special_prefix)
 
 
-class BERTSentenceTransform(object):
+class BERTSentenceTransform:
     r"""BERT style data transformation.
 
     Parameters
@@ -1366,7 +1363,7 @@ def _truncate_seq_pair(self, tokens_a, tokens_b, max_length):
             else:
                 tokens_b.pop()
 
-class _GPT2BPE(object):
+class _GPT2BPE:
     """Base class for GPT-2 BPE tokenizer and detokenizer."""
     def __init__(self):
         codes = list(range(ord(u'!'), ord(u'~') + 1)) +\
diff --git a/src/gluonnlp/data/utils.py b/src/gluonnlp/data/utils.py
index d403473bfa..925c2c3991 100644
--- a/src/gluonnlp/data/utils.py
+++ b/src/gluonnlp/data/utils.py
@@ -406,7 +406,7 @@ def whitespace_splitter(s):
     return s.split()
 
 
-class Splitter(object):
+class Splitter:
     """Split a string based on a separator.
 
     Parameters
diff --git a/src/gluonnlp/embedding/evaluation.py b/src/gluonnlp/embedding/evaluation.py
index cf689b8465..47a47d0786 100644
--- a/src/gluonnlp/embedding/evaluation.py
+++ b/src/gluonnlp/embedding/evaluation.py
@@ -32,12 +32,10 @@
 
 class _WordEmbeddingEvaluationFunction(HybridBlock):  # pylint: disable=abstract-method
     """Base class for word embedding evaluation functions."""
-    pass
 
 
 class WordEmbeddingSimilarityFunction(_WordEmbeddingEvaluationFunction):  # pylint: disable=abstract-method
     """Base class for word embedding similarity functions."""
-    pass
 
 
 class WordEmbeddingAnalogyFunction(_WordEmbeddingEvaluationFunction):  # pylint: disable=abstract-method
@@ -52,7 +50,6 @@ class WordEmbeddingAnalogyFunction(_WordEmbeddingEvaluationFunction):  # pylint:
     eps : float, optional, default=1e-10
         A small constant for numerical stability.
     """
-    pass
 
 
 ###############################################################################
diff --git a/src/gluonnlp/embedding/token_embedding.py b/src/gluonnlp/embedding/token_embedding.py
index 1b2fd9eb95..5adbc697d9 100644
--- a/src/gluonnlp/embedding/token_embedding.py
+++ b/src/gluonnlp/embedding/token_embedding.py
@@ -142,7 +142,7 @@ def list_sources(embedding_name=None):
                 for embedding_name, embedding_cls in registry.get_registry(TokenEmbedding).items()}
 
 
-class TokenEmbedding(object):
+class TokenEmbedding:
     """Token embedding base class.
 
     To load token embedding from an externally hosted pre-trained token embedding file, such as
@@ -724,10 +724,9 @@ def __setitem__(self, tokens, new_embedding):
                                     ' unknown token, please explicitly include "{}" as the '
                                     '`unknown_token` in `tokens`. This is to avoid unintended '
                                     'updates.').format(token, self.unknown_token))
-                else:
-                    raise KeyError(('Token "{}" is unknown. Updating the embedding vector for an '
-                                    'unknown token is not allowed because `unknown_token` is not '
-                                    'specified.').format(token))
+                raise KeyError(('Token "{}" is unknown. Updating the embedding vector for an '
+                                'unknown token is not allowed because `unknown_token` is not '
+                                'specified.').format(token))
 
         self._idx_to_vec[nd.array(indices)] = new_embedding
 
@@ -743,10 +742,10 @@ def _check_source(cls, source_file_hash, source):
         """
         embedding_name = cls.__name__.lower()
         if source not in source_file_hash:
-            raise KeyError('Cannot find pre-trained source {} for token embedding {}. '
-                           'Valid pre-trained file names for embedding {}: {}'.format(
-                               source, embedding_name, embedding_name,
-                               ', '.join(source_file_hash.keys())))
+            raise KeyError('Cannot find pre-trained source {source} for token embedding {name}. '
+                           'Valid pre-trained file names for embedding {name}: {values}'.format(
+                               source=source, name=embedding_name,
+                               values=', '.join(source_file_hash.keys())))
 
     @staticmethod
     def from_file(file_path, elem_delim=' ', encoding=ENCODING, **kwargs):
diff --git a/src/gluonnlp/metric/masked_accuracy.py b/src/gluonnlp/metric/masked_accuracy.py
index 65fc446777..31d3fea708 100644
--- a/src/gluonnlp/metric/masked_accuracy.py
+++ b/src/gluonnlp/metric/masked_accuracy.py
@@ -23,7 +23,7 @@
 
 __all__ = ['EvalMetric', 'MaskedAccuracy']
 
-class EvalMetric(object):
+class EvalMetric:
     """Base class for all evaluation metrics.
 
     .. note::
diff --git a/src/gluonnlp/model/block.py b/src/gluonnlp/model/block.py
index ef42c66a83..2b6d83780e 100644
--- a/src/gluonnlp/model/block.py
+++ b/src/gluonnlp/model/block.py
@@ -39,7 +39,7 @@ class RNNCellLayer(Block):
     def __init__(self, rnn_cell, layout='TNC', **kwargs):
         super(RNNCellLayer, self).__init__(**kwargs)
         self.cell = rnn_cell
-        assert layout == 'TNC' or layout == 'NTC', \
+        assert layout in ('TNC', 'NTC'), \
             'Invalid layout %s; must be one of ["TNC" or "NTC"]'%layout
         self._layout = layout
         self._axis = layout.find('T')
@@ -109,7 +109,7 @@ def __init__(self, **kwargs):
         super(GELU, self).__init__(**kwargs)
         self._support_erf = False
         try:
-            self._support_erf = True if ndarray.erf else False
+            self._support_erf = bool(ndarray.erf)
         except AttributeError:
             warnings.warn('`erf` operator support is not found. '
                           'Please consider upgrading to mxnet >= 1.4')
diff --git a/src/gluonnlp/model/sequence_sampler.py b/src/gluonnlp/model/sequence_sampler.py
index a233a4b5db..a60a2be6e6 100644
--- a/src/gluonnlp/model/sequence_sampler.py
+++ b/src/gluonnlp/model/sequence_sampler.py
@@ -448,7 +448,7 @@ def hybrid_forward(self, F, samples, valid_length, outputs, scores, beam_alive_m
                chosen_word_ids, beam_alive_mask, new_states
 
 
-class BeamSearchSampler(object):
+class BeamSearchSampler:
     r"""Draw samples from the decoder by beam search.
 
     Parameters
@@ -715,7 +715,7 @@ def _else_func():
             F.contrib.cond(F.sum(new_beam_alive_mask) == 0, _then_func, _else_func)
         return new_samples, new_scores, new_new_valid_length
 
-class SequenceSampler(object):
+class SequenceSampler:
     r"""Draw samples from the decoder according to the step-wise distribution.
 
     Parameters
diff --git a/src/gluonnlp/utils/files.py b/src/gluonnlp/utils/files.py
index 3cca21556c..b2eb1af301 100644
--- a/src/gluonnlp/utils/files.py
+++ b/src/gluonnlp/utils/files.py
@@ -68,7 +68,7 @@ def mkdir(dirname):
             if e.errno != 17:
                 raise e
 
-class _TempFilePath(object):
+class _TempFilePath:
     """A TempFilePath that provides a path to a temporarily file, and automatically
     cleans up the temp file at exit.
     """
diff --git a/src/gluonnlp/utils/parallel.py b/src/gluonnlp/utils/parallel.py
index 62799b383b..edcaba7ea2 100644
--- a/src/gluonnlp/utils/parallel.py
+++ b/src/gluonnlp/utils/parallel.py
@@ -25,7 +25,7 @@
 
 __all__ = ['Parallelizable', 'Parallel']
 
-class Parallelizable(object):
+class Parallelizable:
     """Base class for parallelizable unit of work, which can be invoked by `Parallel`.
     The subclass must implement the `forward_backward` method, and be used
     together with `Parallel`. For example::
@@ -59,7 +59,7 @@ def forward_backward(self, x):
         """ Forward and backward computation. """
         raise NotImplementedError()
 
-class Parallel(object):
+class Parallel:
     """Class for parallel processing with `Parallelizable`s. It invokes a
     `Parallelizable` with multiple Python threads. For example::
 
@@ -100,7 +100,7 @@ def forward_backward(self, x):
         multiple threads may cause unexpected behavior.
     """
 
-    class _StopSignal(object):
+    class _StopSignal:
         """Internal class to signal stop. """
         def __init__(self, msg):
             self._msg = msg
diff --git a/src/gluonnlp/utils/parameter.py b/src/gluonnlp/utils/parameter.py
index 44abf8f0bf..8dac90f940 100644
--- a/src/gluonnlp/utils/parameter.py
+++ b/src/gluonnlp/utils/parameter.py
@@ -144,10 +144,9 @@ def load_parameters(model, filename, ctx=None, allow_missing=False,
     if cast_dtype is not None:
         if mx.__version__ < '1.5.0':
             raise NotImplementedError('cast_dtype option requires MXNet 1.5.0')
-        else:
-            _s3_compatible_save_load(False, model.load_parameters, filename, ctx=ctx,
-                                     allow_missing=allow_missing, ignore_extra=ignore_extra,
-                                     cast_dtype=cast_dtype)
+        _s3_compatible_save_load(False, model.load_parameters, filename, ctx=ctx,
+                                 allow_missing=allow_missing, ignore_extra=ignore_extra,
+                                 cast_dtype=cast_dtype)
     else:
         _s3_compatible_save_load(False, model.load_parameters, filename, ctx=ctx,
                                  allow_missing=allow_missing, ignore_extra=ignore_extra)
diff --git a/src/gluonnlp/vocab/elmo.py b/src/gluonnlp/vocab/elmo.py
index b30af8aae3..c0d29f21d2 100644
--- a/src/gluonnlp/vocab/elmo.py
+++ b/src/gluonnlp/vocab/elmo.py
@@ -22,7 +22,7 @@
 
 __all__ = ['ELMoCharVocab']
 
-class ELMoCharVocab(object):
+class ELMoCharVocab:
     r"""ELMo special character vocabulary
 
     The vocab aims to map individual tokens to sequences of character ids, compatible with ELMo.
diff --git a/src/gluonnlp/vocab/subwords.py b/src/gluonnlp/vocab/subwords.py
index 2203a66cb4..b8647a0d06 100644
--- a/src/gluonnlp/vocab/subwords.py
+++ b/src/gluonnlp/vocab/subwords.py
@@ -53,7 +53,7 @@ def list_subword_functions():
     return list(reg.keys())
 
 
-class SubwordFunction(object):
+class SubwordFunction:
     """A SubwordFunction maps words to lists of subword indices.
 
     This class is abstract and to be subclassed. Use
diff --git a/src/gluonnlp/vocab/vocab.py b/src/gluonnlp/vocab/vocab.py
index 61947b5a04..1d6450a964 100644
--- a/src/gluonnlp/vocab/vocab.py
+++ b/src/gluonnlp/vocab/vocab.py
@@ -39,7 +39,7 @@
 UNK_IDX = 0
 
 
-class Vocab(object):
+class Vocab:
     """Indexing and embedding attachment for text tokens.
 
     Parameters
diff --git a/tests/unittest/test_token_embedding.py b/tests/unittest/test_token_embedding.py
index eb82c65dcc..baf92b97fe 100644
--- a/tests/unittest/test_token_embedding.py
+++ b/tests/unittest/test_token_embedding.py
@@ -27,7 +27,7 @@
 from gluonnlp.base import _str_types
 
 
-class NaiveUnknownLookup(object):
+class NaiveUnknownLookup:
     def __init__(self, embsize):
         self.embsize = embsize
 
diff --git a/tests/unittest/test_vocab_embed.py b/tests/unittest/test_vocab_embed.py
index b9dd92badc..e26cd9f04c 100644
--- a/tests/unittest/test_vocab_embed.py
+++ b/tests/unittest/test_vocab_embed.py
@@ -761,7 +761,7 @@ def test_vocab_set_embedding_with_subword_lookup_only_token_embedding(
         allow_extend, unknown_token, vocab_unknown_token, initialize):
     embsize = 5
 
-    class NaiveLookup(object):
+    class NaiveLookup:
         def __contains__(self, token):
             return True
 
@@ -915,7 +915,7 @@ def test_token_embedding_from_S3_fasttext_with_ngrams(load_ngrams):
 def test_token_embedding_unknown_lookup(setinconstructor, lookup,
                                         initializetokenembedding,
                                         unknown_token, allow_extend, tmpdir):
-    class NaiveLookup(object):
+    class NaiveLookup:
         dim = 5  # Must match _mk_my_pretrain_file
 
         def __contains__(self, token):
@@ -927,7 +927,7 @@ def __getitem__(self, tokens):
             else:
                 return nd.ones((len(tokens), self.dim))
 
-    class IncapableLookup(object):
+    class IncapableLookup:
         def __contains__(self, token):
             return False
 
@@ -1090,7 +1090,7 @@ def test_word_embedding_evaluation_registry():
     with pytest.raises(RuntimeError):
 
         @nlp.embedding.evaluation.register
-        class InvalidEvaluationFunction(object):
+        class InvalidEvaluationFunction:
             pass
 
     with pytest.raises(KeyError):