From 4035779f66597e1ef47b389ae8a790831229cbb6 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Tue, 24 Sep 2019 08:13:13 +0000 Subject: [PATCH 1/9] Deprecate Vocab padding, bos and eos_token posargs and defaults --- src/gluonnlp/vocab/vocab.py | 63 ++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/src/gluonnlp/vocab/vocab.py b/src/gluonnlp/vocab/vocab.py index 5943b90d6b..a0f2227b56 100644 --- a/src/gluonnlp/vocab/vocab.py +++ b/src/gluonnlp/vocab/vocab.py @@ -16,7 +16,6 @@ # under the License. # pylint: disable=consider-iterating-dictionary - """Vocabulary.""" __all__ = ['Vocab'] @@ -33,6 +32,9 @@ from ..data.utils import Counter, DefaultLookupDict, count_tokens UNK_IDX = 0 +_DEPR_PAD = object() +_DEPR_BOS = object() +_DEPR_EOS = object() class Vocab: @@ -60,12 +62,6 @@ class Vocab: `None`, looking up any token that is not part of the vocabulary and thus considered unknown will return the index of `unknown_token`. If None, looking up an unknown token will result in `KeyError`. - padding_token - The representation for the special token of padding token. - bos_token - The representation for the special token of beginning-of-sequence token. - eos_token - The representation for the special token of end-of-sequence token. reserved_tokens A list specifying additional tokens to be added to the vocabulary. `reserved_tokens` must not contain the value of `unknown_token` or @@ -91,6 +87,30 @@ class Vocab: specified tokens are listed together with reserved tokens in the `reserved_tokens` attribute of the vocabulary object. + Deprecated Parameters + --------------------- + deprecated_padding_token + The representation for the special token of padding token. + Default: '' + .. deprecated:: 0.9 + Specifying padding_token as positional argument is deprecated and + support will be removed. Specify it as keyword argument instead + (see documentation of **kwargs above) + deprecated_bos_token + The representation for the special token of beginning-of-sequence token. + Default: '' + .. deprecated:: 0.9 + Specifying bos_token as positional argument is deprecated and + support will be removed. Specify it as keyword argument instead + (see documentation of **kwargs above) + deprecated_eos_token + The representation for the special token of end-of-sequence token. + Default: '' + .. deprecated:: 0.9 + Specifying eos_token as positional argument is deprecated and + support will be removed. Specify it as keyword argument instead + (see documentation of **kwargs above) + Attributes ---------- embedding : instance of :class:`gluonnlp.embedding.TokenEmbedding` @@ -173,15 +193,34 @@ class Vocab: def __init__(self, counter: Optional[Counter] = None, max_size: Optional[int] = None, min_freq: int = 1, unknown_token: Optional[Hashable] = C.UNK_TOKEN, + deprecated_padding_token: Optional[Hashable] = _DEPR_PAD, + deprecated_bos_token: Optional[Hashable] = _DEPR_BOS, + deprecated_eos_token: Optional[Hashable] = _DEPR_EOS, + reserved_tokens: Optional[List[Hashable]] = None, + token_to_idx: Optional[Dict[Hashable, int]] = None, *, padding_token: Optional[Hashable] = C.PAD_TOKEN, bos_token: Optional[Hashable] = C.BOS_TOKEN, - eos_token: Optional[Hashable] = C.EOS_TOKEN, - reserved_tokens: Optional[List[Hashable]] = None, - token_to_idx: Optional[Dict[Hashable, int]] = None, **kwargs): + eos_token: Optional[Hashable] = C.EOS_TOKEN, **kwargs): # Sanity checks. assert min_freq > 0, '`min_freq` must be set to a positive value.' + # Deprecation checks and warnings + combs = ((deprecated_padding_token, 'padding_token', _DEPR_PAD, padding_token), + (deprecated_bos_token, 'bos_token', _DEPR_BOS, bos_token), + (deprecated_eos_token, 'eos_token', _DEPR_EOS, eos_token)) + for depr_pos_arg, name, indicator, value in combs: + if depr_pos_arg != indicator: + warnings.warn( + 'Specifying `{n}` as positional argument is deprecated and support will be removed. ' + 'Please specify `{n}` as keyword argument instead, ' + 'for example `Vocab(counter, {n}={v})`'.format(n=name, v=depr_pos_arg), + DeprecationWarning) + # Store positional argument value in kwargs + kwargs[name] = depr_pos_arg + elif name not in kwargs: # Store keyword argument value in kwargs + kwargs[name] = value + # Set up idx_to_token and token_to_idx based on presence of unknown token self._unknown_token = unknown_token self._idx_to_token = [unknown_token] if unknown_token else [] @@ -190,10 +229,6 @@ def __init__(self, counter: Optional[Counter] = None, max_size: Optional[int] = else: self._token_to_idx = {} - kwargs['padding_token'] = padding_token - kwargs['bos_token'] = bos_token - kwargs['eos_token'] = eos_token - # Handle special tokens special_tokens = [] for special_token_name, special_token in kwargs.items(): From 41edc09dbfc344a0a1ab5fd179a961bfed06f569 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Tue, 24 Sep 2019 20:08:25 +0000 Subject: [PATCH 2/9] Enable 'default' display of DeprecationWarnings raised by gluonnlp DeprecationWarnings are hidden by default warning filter. This commit explicitly sets the filter to 'default' for DeprecationWarnings raised in GluonNLP code. https://www.python.org/dev/peps/pep-0565/ --- src/gluonnlp/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gluonnlp/__init__.py b/src/gluonnlp/__init__.py index 9d81c73607..b6f2fa36ac 100644 --- a/src/gluonnlp/__init__.py +++ b/src/gluonnlp/__init__.py @@ -18,6 +18,9 @@ # pylint: disable=wildcard-import """NLP toolkit.""" +import warnings +warnings.filterwarnings(module='gluonnlp', action='default', category=DeprecationWarning) + from . import loss from . import data from . import embedding From 261fc3ad01d90355ade39c1b099a6798c2a5ae9a Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Wed, 25 Sep 2019 13:26:24 +0000 Subject: [PATCH 3/9] Fix lint --- src/gluonnlp/__init__.py | 3 ++- src/gluonnlp/vocab/vocab.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gluonnlp/__init__.py b/src/gluonnlp/__init__.py index b6f2fa36ac..7faf51d690 100644 --- a/src/gluonnlp/__init__.py +++ b/src/gluonnlp/__init__.py @@ -19,7 +19,6 @@ """NLP toolkit.""" import warnings -warnings.filterwarnings(module='gluonnlp', action='default', category=DeprecationWarning) from . import loss from . import data @@ -44,3 +43,5 @@ 'optimizer', 'utils', 'metric'] + +warnings.filterwarnings(module='gluonnlp', action='default', category=DeprecationWarning) diff --git a/src/gluonnlp/vocab/vocab.py b/src/gluonnlp/vocab/vocab.py index a0f2227b56..6284919c05 100644 --- a/src/gluonnlp/vocab/vocab.py +++ b/src/gluonnlp/vocab/vocab.py @@ -212,8 +212,8 @@ def __init__(self, counter: Optional[Counter] = None, max_size: Optional[int] = for depr_pos_arg, name, indicator, value in combs: if depr_pos_arg != indicator: warnings.warn( - 'Specifying `{n}` as positional argument is deprecated and support will be removed. ' - 'Please specify `{n}` as keyword argument instead, ' + 'Specifying `{n}` as positional argument is deprecated and ' + 'support will be removed. Please specify `{n}` as keyword argument instead, ' 'for example `Vocab(counter, {n}={v})`'.format(n=name, v=depr_pos_arg), DeprecationWarning) # Store positional argument value in kwargs From ef5d016e0f73767880ba5da9595c55bccb155703 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Fri, 4 Oct 2019 22:55:17 +0000 Subject: [PATCH 4/9] Remove ..derpecated:: annotation due to 'Unexpected indentation' error --- src/gluonnlp/vocab/vocab.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/gluonnlp/vocab/vocab.py b/src/gluonnlp/vocab/vocab.py index 6284919c05..5885bf19f6 100644 --- a/src/gluonnlp/vocab/vocab.py +++ b/src/gluonnlp/vocab/vocab.py @@ -90,26 +90,20 @@ class Vocab: Deprecated Parameters --------------------- deprecated_padding_token - The representation for the special token of padding token. - Default: '' - .. deprecated:: 0.9 - Specifying padding_token as positional argument is deprecated and - support will be removed. Specify it as keyword argument instead - (see documentation of **kwargs above) + The representation for the special token of padding token. Default: + ''. Specifying padding_token as positional argument is deprecated + and support will be removed. Specify it as keyword argument instead + (see documentation of **kwargs above) deprecated_bos_token - The representation for the special token of beginning-of-sequence token. - Default: '' - .. deprecated:: 0.9 - Specifying bos_token as positional argument is deprecated and - support will be removed. Specify it as keyword argument instead - (see documentation of **kwargs above) + The representation for the special token of beginning-of-sequence + token. Default: ''. Specifying bos_token as positional argument is + deprecated and support will be removed. Specify it as keyword argument + instead (see documentation of **kwargs above) deprecated_eos_token The representation for the special token of end-of-sequence token. - Default: '' - .. deprecated:: 0.9 - Specifying eos_token as positional argument is deprecated and - support will be removed. Specify it as keyword argument instead - (see documentation of **kwargs above) + Default: ''. Specifying eos_token as positional argument is + deprecated and support will be removed. Specify it as keyword argument + instead (see documentation of **kwargs above) Attributes ---------- From 65e086f1c6d313d10aa90e6f19e60c7f57209c17 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Fri, 11 Oct 2019 00:21:58 +0000 Subject: [PATCH 5/9] Fix doc lint: "Inline strong start-string without end-string." `**` is interpreted as start-string for strong format. --- src/gluonnlp/vocab/vocab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gluonnlp/vocab/vocab.py b/src/gluonnlp/vocab/vocab.py index 5885bf19f6..20b18dfad6 100644 --- a/src/gluonnlp/vocab/vocab.py +++ b/src/gluonnlp/vocab/vocab.py @@ -75,7 +75,7 @@ class Vocab: example, it is valid to only set the `unknown_token` index to 10 (instead of the default of 0) with `token_to_idx = {'': 10}`, assuming that there are at least 10 tokens in the vocabulary. - **kwargs + ``**kwargs`` Keyword arguments of the format `xxx_token` can be used to specify further special tokens that will be exposed as attribute of the vocabulary and associated with an index. From 311fa2e5cde32af7bacbfa928f01bc38bf5e1dcc Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Mon, 14 Oct 2019 19:08:47 +0000 Subject: [PATCH 6/9] Escape all `**kwargs` correctly --- src/gluonnlp/vocab/vocab.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gluonnlp/vocab/vocab.py b/src/gluonnlp/vocab/vocab.py index 20b18dfad6..e9880c6142 100644 --- a/src/gluonnlp/vocab/vocab.py +++ b/src/gluonnlp/vocab/vocab.py @@ -75,7 +75,7 @@ class Vocab: example, it is valid to only set the `unknown_token` index to 10 (instead of the default of 0) with `token_to_idx = {'': 10}`, assuming that there are at least 10 tokens in the vocabulary. - ``**kwargs`` + `**kwargs` Keyword arguments of the format `xxx_token` can be used to specify further special tokens that will be exposed as attribute of the vocabulary and associated with an index. @@ -93,17 +93,17 @@ class Vocab: The representation for the special token of padding token. Default: ''. Specifying padding_token as positional argument is deprecated and support will be removed. Specify it as keyword argument instead - (see documentation of **kwargs above) + (see documentation of `**kwargs` above) deprecated_bos_token The representation for the special token of beginning-of-sequence token. Default: ''. Specifying bos_token as positional argument is deprecated and support will be removed. Specify it as keyword argument - instead (see documentation of **kwargs above) + instead (see documentation of `**kwargs` above) deprecated_eos_token The representation for the special token of end-of-sequence token. Default: ''. Specifying eos_token as positional argument is deprecated and support will be removed. Specify it as keyword argument - instead (see documentation of **kwargs above) + instead (see documentation of `**kwargs` above) Attributes ---------- From f367f7239f7de146e5839bd6803a92118e268133 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Mon, 14 Oct 2019 19:09:10 +0000 Subject: [PATCH 7/9] Declare sphinx doc build dependencies --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 0170b82f78..8d4366af2d 100644 --- a/setup.py +++ b/setup.py @@ -76,6 +76,8 @@ def find_version(*file_paths): 'recommonmark', 'sphinx-gallery', 'sphinx_rtd_theme', + 'mxtheme', + 'sphinx-autodoc-typehints', 'nbsphinx', 'flaky', ], From 9c64d5b43fca9b8836ed424c2d2563831e7bf4bd Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Mon, 14 Oct 2019 23:29:21 +0000 Subject: [PATCH 8/9] Disable Simverb test again --- tests/unittest/test_datasets.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unittest/test_datasets.py b/tests/unittest/test_datasets.py index 880970607b..91cc616647 100644 --- a/tests/unittest/test_datasets.py +++ b/tests/unittest/test_datasets.py @@ -268,8 +268,9 @@ def test_simlex999(): @flaky(max_runs=2, min_passes=1) @pytest.mark.serial @pytest.mark.remote_required -@pytest.mark.skipif(datetime.date.today() < datetime.date(2019, 10, 14), - reason="simverb temporarily unavailable.") +@pytest.mark.skipif(datetime.date.today() < datetime.date(2019, 10, 21), + reason="simverb temporarily unavailable" + "contacted author on 10/14") def test_simverb3500(): data = nlp.data.SimVerb3500() assert len(data) == 3500 From c4b52623f8fb678ba26d41123f840956b67b7c03 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Mon, 14 Oct 2019 23:34:25 +0000 Subject: [PATCH 9/9] Remove 'Deprecated Parameters' section Sphinx doesn't render it correctly --- src/gluonnlp/vocab/vocab.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gluonnlp/vocab/vocab.py b/src/gluonnlp/vocab/vocab.py index e9880c6142..7138fca370 100644 --- a/src/gluonnlp/vocab/vocab.py +++ b/src/gluonnlp/vocab/vocab.py @@ -86,9 +86,6 @@ class Vocab: just as if it was listed in the `reserved_tokens` argument. The specified tokens are listed together with reserved tokens in the `reserved_tokens` attribute of the vocabulary object. - - Deprecated Parameters - --------------------- deprecated_padding_token The representation for the special token of padding token. Default: ''. Specifying padding_token as positional argument is deprecated