Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update dependencies #8156

Merged
merged 3 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/nlp/nemo_megatron/retro/retro_model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Following is the retro memory map index data format:
- chunk id address in byte (int64 array)
-

:sup:`1` 1: np.uint8, 2: np.int8, 3: np.int16, 4: np.int32, 5: np.int64, 6: np.float, 7: np.double, 8: np.uint16
:sup:`1` 1: np.uint8, 2: np.int8, 3: np.int16, 4: np.int32, 5: np.int64, 6: np.float64, 7: np.double, 8: np.uint16

:sup:`2` When building the indexed dataset, we pad each sentence to be a multiple of ``chunk_size`` with ``pad_id`` from the tokenizer.
The number of tokens for each sentence includes the padded token ids. For retrieval data, there is an extra ``chunk_size`` padding at
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/asr/parts/utils/numba_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def phase_vocoder(D: np.ndarray, rate: float, phi_advance: np.ndarray, scale_buf
Returns:
Complex64 ndarray of shape [d, t / rate, complex=2]
"""
time_steps = np.arange(0, D.shape[1], rate, dtype=np.float)
time_steps = np.arange(0, D.shape[1], rate, dtype=np.float64)

# Create an empty output array
d_stretch = np.zeros((D.shape[0], len(time_steps)), D.dtype, order='F')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
self.all_intents[idx],
Expand Down Expand Up @@ -326,7 +326,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
)
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def __getitem__(self, idx: int):
np.array(ex.example_id_num[-1]), # service_id
np.array(ex.utterance_ids),
np.array(ex.utterance_segment),
np.array(ex.utterance_mask, dtype=np.long),
np.array(ex.utterance_mask, dtype=np.longlong),
np.array(ex.intent_status, dtype=np.float32),
np.array(ex.requested_slot_status, dtype=np.float32),
np.array(ex.categorical_slot_status),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def __getitem__(self, idx):
return (
np.array(feature.input_ids),
np.array(feature.segment_ids),
np.array(feature.input_mask, dtype=np.long),
np.array(feature.input_mask, dtype=np.longlong),
np.array(feature.label_id),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def construct_input(self, token_ids1, max_seq_length, token_ids2=None):
num_nonpad_tokens = len(bert_input)

input_ids[:num_nonpad_tokens] = bert_input
input_ids = np.array(input_ids, dtype=np.long)
input_ids = np.array(input_ids, dtype=np.longlong)
input_mask = input_ids != self.tokenizer.pad_id
input_type_ids = np.ones_like(input_ids)
input_type_ids[:sentence1_length] = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
self.all_intents[idx],
Expand Down Expand Up @@ -291,7 +291,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
)
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,10 @@ def truncate_seq_pair(a, b, max_num_tokens):

input_ids, output_mask = self.mask_ids(output_ids)

input_mask = np.zeros(self.max_seq_length, dtype=np.long)
input_mask = np.zeros(self.max_seq_length, dtype=np.longlong)
input_mask[: len(input_ids)] = 1

input_type_ids = np.zeros(self.max_seq_length, dtype=np.int)
input_type_ids = np.zeros(self.max_seq_length, dtype=np.int64)
input_type_ids[len(a_document) + 2 : len(output_ids) + 1] = 1

padding_length = max(0, self.max_seq_length - len(input_ids))
Expand All @@ -257,7 +257,7 @@ def truncate_seq_pair(a, b, max_num_tokens):
return (
np.array(input_ids),
input_type_ids,
np.array(input_mask, dtype=np.long),
np.array(input_mask, dtype=np.longlong),
np.array(output_ids),
np.array(output_mask, dtype=np.float32),
is_next,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def pad_batches(self, ids):

batches = []
for batch_elem_len, batch_sent_ids in zip(self.batch_elem_lengths, self.batch_sent_ids):
batch = self.tokenizer.pad_id * np.ones((len(batch_sent_ids), batch_elem_len), dtype=np.int)
batch = self.tokenizer.pad_id * np.ones((len(batch_sent_ids), batch_elem_len), dtype=np.int64)
for i, sentence_idx in enumerate(batch_sent_ids):
batch[i][: len(ids[sentence_idx])] = ids[sentence_idx]
batches.append(batch)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def pad_batches(self, src_ids, tgt_ids, batch_indices):
for batch_idx, b in enumerate(batch_indices):
src_len = max([len(src_ids[i]) for i in b])
tgt_len = max([len(tgt_ids[i]) for i in b])
src_ids_ = self.src_pad_id * np.ones((len(b), src_len), dtype=np.int)
tgt_ids_ = self.tgt_pad_id * np.ones((len(b), tgt_len), dtype=np.int)
src_ids_ = self.src_pad_id * np.ones((len(b), src_len), dtype=np.int64)
tgt_ids_ = self.tgt_pad_id * np.ones((len(b), tgt_len), dtype=np.int64)
for i, sentence_idx in enumerate(b):
src_ids_[i][: len(src_ids[sentence_idx])] = src_ids[sentence_idx]
tgt_ids_[i][: len(tgt_ids[sentence_idx])] = tgt_ids[sentence_idx]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1647,7 +1647,7 @@ def _form_batches(
- ``'input_mask'``: a boolean numpy array;
- ``'loss_mask'``: a boolean numpy array.
If ``waveforms`` is not ``None``, then a batch also contain items
- ``features``: a ``np.float`` numpy array.
- ``features``: a ``np.float64`` numpy array.
- ``features_length`` a ``np.int32`` numpy array.
If ``audio_filepaths`` is not ``None``, then a natch also contain items
- ``audio_filepaths`` a list of strings.
Expand Down Expand Up @@ -1677,7 +1677,7 @@ def _form_batches(
"capit_labels": item[3].astype(np.int64),
}
if self.use_audio and self.preload_audios:
batch['features'] = item[4].astype(np.float)
batch['features'] = item[4].astype(np.float64)
batch['features_length'] = item[5]
elif self.use_audio and not self.preload_audios:
batch['audio_filepaths'] = item[6]
Expand Down Expand Up @@ -1730,7 +1730,7 @@ def _pack_into_batches(
- ``'input_mask'``: a boolean numpy array;
- ``'loss_mask'``: a boolean numpy array.
If ``waveforms`` is not ``None``, then a batch also contain items
- ``features``: a ``np.float`` numpy array.
- ``features``: a ``np.float64`` numpy array.
- ``features_length`` a ``np.int32`` numpy array.
If ``audio_filepaths`` is not ``None``, then a natch also contain items
- ``audio_filepaths`` a list of strings.
Expand Down Expand Up @@ -1785,7 +1785,7 @@ def _pack_into_batches(
if self.use_audio and self.preload_audios:
batch['features'] = pad(
waveforms[start : start + size], max(audio_lengths[start : start + size]), 0.0
).astype(np.float)
).astype(np.float64)
batch['features_length'] = audio_lengths[start : start + size]
elif self.use_audio and not self.preload_audios:
batch['audio_filepaths'] = audio_filepaths[start : start + size]
Expand Down Expand Up @@ -1993,8 +1993,8 @@ def __getitem__(self, idx: int) -> Dict[str, np.ndarray]:
computed for corresponding token. See more in description of constructor parameters
``ignore_start_end``, ``ignore_extra_tokens`` (if ``self.add_masks_and_segment_ids_to_batch`` is
``False``, then these items is missing).
- ``'features'`` (:obj:`numpy.ndarray`) :obj:`np.float` array of waveforms of audio if ``self.preload_audio`` is set to ``True`` else empty.
- ``'features_length'`` (:obj:`numpy.ndarray`) :obj:`np.long` array of number of samples per audio.
- ``'features'`` (:obj:`numpy.ndarray`) :obj:`np.float64` array of waveforms of audio if ``self.preload_audio`` is set to ``True`` else empty.
- ``'features_length'`` (:obj:`numpy.ndarray`) :obj:`np.longlong` array of number of samples per audio.
- ``'audio_filepaths'`` (:obj:`List`) :obj:`str` contains paths of audio files if ``self.preload_audio`` set to ``False``
"""
return self.batches[idx]
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,6 @@ def __getitem__(
self.all_query_ids[idx],
self.all_is_first[idx],
self.all_is_last[idx],
np.array(self.all_audio_queries[idx], dtype=np.float),
np.array(self.all_audio_queries[idx], dtype=np.float64),
self.all_audio_lengths[idx],
)
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_subtokens_mask[idx]),
np.array(self.all_loss_mask[idx]),
np.array(self.all_labels[idx]),
Expand Down Expand Up @@ -348,6 +348,6 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_subtokens_mask[idx]),
)
2 changes: 1 addition & 1 deletion nemo/collections/vision/data/megatron/autoaugment.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def __init__(
"translateY": np.linspace(0, 150 / 331, num_levels),
"rotate": np.linspace(0, 30, num_levels),
"color": np.linspace(0.0, 0.9, num_levels),
"posterize": np.round(np.linspace(8, 4, num_levels), 0).astype(np.int),
"posterize": np.round(np.linspace(8, 4, num_levels), 0).astype(np.int64),
"solarize": np.linspace(256, 0, num_levels), # range [0, 256]
"contrast": np.linspace(0.0, 0.9, num_levels),
"sharpness": np.linspace(0.0, 0.9, num_levels),
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
huggingface_hub
numba
numpy>=1.22,<1.24
numpy>=1.22
onnx>=1.7.0
python-dateutil
ruamel.yaml
Expand Down
1 change: 0 additions & 1 deletion requirements/requirements_common.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
datasets
inflect
pandas
pydantic<2 # remove after inflect supports Pydantic 2.0+
sacremoses>=0.0.43
sentencepiece<1.0.0
youtokentome>=1.0.5
Loading