diff --git a/python/mxnet/contrib/text/embedding.py b/python/mxnet/contrib/text/embedding.py index 277f78222922..e2a05c841afd 100644 --- a/python/mxnet/contrib/text/embedding.py +++ b/python/mxnet/contrib/text/embedding.py @@ -161,7 +161,7 @@ class _TokenEmbedding(vocab.Vocabulary): pre-trained token embedding file, are taken as the indexed tokens of the embedding. - Properties + Attributes ---------- token_to_idx : dict mapping str to int A dict mapping each token to its index integer. @@ -506,25 +506,6 @@ class GloVe(_TokenEmbedding): embedding vectors, such as loaded from a pre-trained token embedding file. If None, all the tokens from the loaded embedding vectors, such as loaded from a pre-trained token embedding file, will be indexed. - - - Properties - ---------- - token_to_idx : dict mapping str to int - A dict mapping each token to its index integer. - idx_to_token : list of strs - A list of indexed tokens where the list indices and the token indices are aligned. - unknown_token : hashable object - The representation for any unknown token. In other words, any unknown token will be indexed - as the same representation. - reserved_tokens : list of strs or None - A list of reserved tokens that will always be indexed. - vec_len : int - The length of the embedding vector for each token. - idx_to_vec : mxnet.ndarray.NDArray - For all the indexed tokens in this embedding, this NDArray maps each token's index to an - embedding vector. The largest valid index maps to the initialized embedding vector for every - reserved token, such as an unknown_token token and a padding token. """ # Map a pre-trained token embedding archive file and its SHA-1 hash. @@ -610,25 +591,6 @@ class FastText(_TokenEmbedding): embedding vectors, such as loaded from a pre-trained token embedding file. If None, all the tokens from the loaded embedding vectors, such as loaded from a pre-trained token embedding file, will be indexed. - - - Properties - ---------- - token_to_idx : dict mapping str to int - A dict mapping each token to its index integer. - idx_to_token : list of strs - A list of indexed tokens where the list indices and the token indices are aligned. - unknown_token : hashable object - The representation for any unknown token. In other words, any unknown token will be indexed - as the same representation. - reserved_tokens : list of strs or None - A list of reserved tokens that will always be indexed. - vec_len : int - The length of the embedding vector for each token. - idx_to_vec : mxnet.ndarray.NDArray - For all the indexed tokens in this embedding, this NDArray maps each token's index to an - embedding vector. The largest valid index maps to the initialized embedding vector for every - reserved token, such as an unknown_token token and a padding token. """ # Map a pre-trained token embedding archive file and its SHA-1 hash. @@ -687,25 +649,6 @@ class CustomEmbedding(_TokenEmbedding): embedding vectors, such as loaded from a pre-trained token embedding file. If None, all the tokens from the loaded embedding vectors, such as loaded from a pre-trained token embedding file, will be indexed. - - - Properties - ---------- - token_to_idx : dict mapping str to int - A dict mapping each token to its index integer. - idx_to_token : list of strs - A list of indexed tokens where the list indices and the token indices are aligned. - unknown_token : hashable object - The representation for any unknown token. In other words, any unknown token will be indexed - as the same representation. - reserved_tokens : list of strs or None - A list of reserved tokens that will always be indexed. - vec_len : int - The length of the embedding vector for each token. - idx_to_vec : mxnet.ndarray.NDArray - For all the indexed tokens in this embedding, this NDArray maps each token's index to an - embedding vector. The largest valid index maps to the initialized embedding vector for every - reserved token, such as an unknown_token token and a padding token. """ def __init__(self, pretrained_file_path, elem_delim=' ', encoding='utf8', @@ -735,25 +678,6 @@ class CompositeEmbedding(_TokenEmbedding): token_embeddings : instance or list of `mxnet.contrib.text.embedding._TokenEmbedding` One or multiple pre-trained token embeddings to load. If it is a list of multiple embeddings, these embedding vectors will be concatenated for each token. - - - Properties - ---------- - token_to_idx : dict mapping str to int - A dict mapping each token to its index integer. - idx_to_token : list of strs - A list of indexed tokens where the list indices and the token indices are aligned. - unknown_token : hashable object - The representation for any unknown token. In other words, any unknown token will be indexed - as the same representation. - reserved_tokens : list of strs or None - A list of reserved tokens that will always be indexed. - vec_len : int - The length of the embedding vector for each token. - idx_to_vec : mxnet.ndarray.NDArray - For all the indexed tokens in this embedding, this NDArray maps each token's index to an - embedding vector. The largest valid index maps to the initialized embedding vector for every - reserved token, such as an unknown_token token and a padding token. """ def __init__(self, vocabulary, token_embeddings): diff --git a/python/mxnet/contrib/text/vocab.py b/python/mxnet/contrib/text/vocab.py index 9e44acb10199..ede2ca535712 100644 --- a/python/mxnet/contrib/text/vocab.py +++ b/python/mxnet/contrib/text/vocab.py @@ -63,12 +63,8 @@ class Vocabulary(object): `reserved_tokens` must be of the same hashable type. Examples: str, int, and tuple. - Properties + Attributes ---------- - token_to_idx : dict mapping str to int - A dict mapping each token to its index integer. - idx_to_token : list of strs - A list of indexed tokens where the list indices and the token indices are aligned. unknown_token : hashable object The representation for any unknown token. In other words, any unknown token will be indexed as the same representation. @@ -143,10 +139,16 @@ def __len__(self): @property def token_to_idx(self): + """ + dict mapping str to int: A dict mapping each token to its index integer. + """ return self._token_to_idx @property def idx_to_token(self): + """ + list of strs: A list of indexed tokens where the list indices and the token indices are aligned. + """ return self._idx_to_token @property