Skip to content

Commit

Permalink
Fix Sphinx python docstring error: text contrib module (apache#12949) (
Browse files Browse the repository at this point in the history
  • Loading branch information
frankfliu authored and Jose Luis Contreras committed Nov 13, 2018
1 parent e72c583 commit b2c5360
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 82 deletions.
78 changes: 1 addition & 77 deletions python/mxnet/contrib/text/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class _TokenEmbedding(vocab.Vocabulary):
pre-trained token embedding file, are taken as the indexed tokens of the embedding.
Properties
Attributes
----------
token_to_idx : dict mapping str to int
A dict mapping each token to its index integer.
Expand Down Expand Up @@ -506,25 +506,6 @@ class GloVe(_TokenEmbedding):
embedding vectors, such as loaded from a pre-trained token embedding file. If None, all the
tokens from the loaded embedding vectors, such as loaded from a pre-trained token embedding
file, will be indexed.
Properties
----------
token_to_idx : dict mapping str to int
A dict mapping each token to its index integer.
idx_to_token : list of strs
A list of indexed tokens where the list indices and the token indices are aligned.
unknown_token : hashable object
The representation for any unknown token. In other words, any unknown token will be indexed
as the same representation.
reserved_tokens : list of strs or None
A list of reserved tokens that will always be indexed.
vec_len : int
The length of the embedding vector for each token.
idx_to_vec : mxnet.ndarray.NDArray
For all the indexed tokens in this embedding, this NDArray maps each token's index to an
embedding vector. The largest valid index maps to the initialized embedding vector for every
reserved token, such as an unknown_token token and a padding token.
"""

# Map a pre-trained token embedding archive file and its SHA-1 hash.
Expand Down Expand Up @@ -610,25 +591,6 @@ class FastText(_TokenEmbedding):
embedding vectors, such as loaded from a pre-trained token embedding file. If None, all the
tokens from the loaded embedding vectors, such as loaded from a pre-trained token embedding
file, will be indexed.
Properties
----------
token_to_idx : dict mapping str to int
A dict mapping each token to its index integer.
idx_to_token : list of strs
A list of indexed tokens where the list indices and the token indices are aligned.
unknown_token : hashable object
The representation for any unknown token. In other words, any unknown token will be indexed
as the same representation.
reserved_tokens : list of strs or None
A list of reserved tokens that will always be indexed.
vec_len : int
The length of the embedding vector for each token.
idx_to_vec : mxnet.ndarray.NDArray
For all the indexed tokens in this embedding, this NDArray maps each token's index to an
embedding vector. The largest valid index maps to the initialized embedding vector for every
reserved token, such as an unknown_token token and a padding token.
"""

# Map a pre-trained token embedding archive file and its SHA-1 hash.
Expand Down Expand Up @@ -687,25 +649,6 @@ class CustomEmbedding(_TokenEmbedding):
embedding vectors, such as loaded from a pre-trained token embedding file. If None, all the
tokens from the loaded embedding vectors, such as loaded from a pre-trained token embedding
file, will be indexed.
Properties
----------
token_to_idx : dict mapping str to int
A dict mapping each token to its index integer.
idx_to_token : list of strs
A list of indexed tokens where the list indices and the token indices are aligned.
unknown_token : hashable object
The representation for any unknown token. In other words, any unknown token will be indexed
as the same representation.
reserved_tokens : list of strs or None
A list of reserved tokens that will always be indexed.
vec_len : int
The length of the embedding vector for each token.
idx_to_vec : mxnet.ndarray.NDArray
For all the indexed tokens in this embedding, this NDArray maps each token's index to an
embedding vector. The largest valid index maps to the initialized embedding vector for every
reserved token, such as an unknown_token token and a padding token.
"""

def __init__(self, pretrained_file_path, elem_delim=' ', encoding='utf8',
Expand Down Expand Up @@ -735,25 +678,6 @@ class CompositeEmbedding(_TokenEmbedding):
token_embeddings : instance or list of `mxnet.contrib.text.embedding._TokenEmbedding`
One or multiple pre-trained token embeddings to load. If it is a list of multiple
embeddings, these embedding vectors will be concatenated for each token.
Properties
----------
token_to_idx : dict mapping str to int
A dict mapping each token to its index integer.
idx_to_token : list of strs
A list of indexed tokens where the list indices and the token indices are aligned.
unknown_token : hashable object
The representation for any unknown token. In other words, any unknown token will be indexed
as the same representation.
reserved_tokens : list of strs or None
A list of reserved tokens that will always be indexed.
vec_len : int
The length of the embedding vector for each token.
idx_to_vec : mxnet.ndarray.NDArray
For all the indexed tokens in this embedding, this NDArray maps each token's index to an
embedding vector. The largest valid index maps to the initialized embedding vector for every
reserved token, such as an unknown_token token and a padding token.
"""
def __init__(self, vocabulary, token_embeddings):

Expand Down
12 changes: 7 additions & 5 deletions python/mxnet/contrib/text/vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,8 @@ class Vocabulary(object):
`reserved_tokens` must be of the same hashable type. Examples: str, int, and tuple.
Properties
Attributes
----------
token_to_idx : dict mapping str to int
A dict mapping each token to its index integer.
idx_to_token : list of strs
A list of indexed tokens where the list indices and the token indices are aligned.
unknown_token : hashable object
The representation for any unknown token. In other words, any unknown token will be indexed
as the same representation.
Expand Down Expand Up @@ -143,10 +139,16 @@ def __len__(self):

@property
def token_to_idx(self):
"""
dict mapping str to int: A dict mapping each token to its index integer.
"""
return self._token_to_idx

@property
def idx_to_token(self):
"""
list of strs: A list of indexed tokens where the list indices and the token indices are aligned.
"""
return self._idx_to_token

@property
Expand Down

0 comments on commit b2c5360

Please sign in to comment.