Skip to content

Commit

Permalink
Update dependencies (#8156)
Browse files Browse the repository at this point in the history
* Update dependencies

Signed-off-by: smajumdar <[email protected]>

* Update numpy.long to numpy.longlong (long deprecated in np 24)

Signed-off-by: smajumdar <[email protected]>

* Update all deprecatd numpy types

Signed-off-by: smajumdar <[email protected]>

---------

Signed-off-by: smajumdar <[email protected]>
Signed-off-by: Pablo Garay <[email protected]>
  • Loading branch information
titu1994 authored and pablo-garay committed Mar 19, 2024
1 parent 90600f1 commit 973a7ec
Show file tree
Hide file tree
Showing 16 changed files with 26 additions and 27 deletions.
2 changes: 1 addition & 1 deletion docs/source/nlp/nemo_megatron/retro/retro_model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Following is the retro memory map index data format:
- chunk id address in byte (int64 array)
-

:sup:`1` 1: np.uint8, 2: np.int8, 3: np.int16, 4: np.int32, 5: np.int64, 6: np.float, 7: np.double, 8: np.uint16
:sup:`1` 1: np.uint8, 2: np.int8, 3: np.int16, 4: np.int32, 5: np.int64, 6: np.float64, 7: np.double, 8: np.uint16

:sup:`2` When building the indexed dataset, we pad each sentence to be a multiple of ``chunk_size`` with ``pad_id`` from the tokenizer.
The number of tokens for each sentence includes the padded token ids. For retrieval data, there is an extra ``chunk_size`` padding at
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/asr/parts/utils/numba_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def phase_vocoder(D: np.ndarray, rate: float, phi_advance: np.ndarray, scale_buf
Returns:
Complex64 ndarray of shape [d, t / rate, complex=2]
"""
time_steps = np.arange(0, D.shape[1], rate, dtype=np.float)
time_steps = np.arange(0, D.shape[1], rate, dtype=np.float64)

# Create an empty output array
d_stretch = np.zeros((D.shape[0], len(time_steps)), D.dtype, order='F')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
self.all_intents[idx],
Expand Down Expand Up @@ -326,7 +326,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
)
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def __getitem__(self, idx: int):
np.array(ex.example_id_num[-1]), # service_id
np.array(ex.utterance_ids),
np.array(ex.utterance_segment),
np.array(ex.utterance_mask, dtype=np.long),
np.array(ex.utterance_mask, dtype=np.longlong),
np.array(ex.intent_status, dtype=np.float32),
np.array(ex.requested_slot_status, dtype=np.float32),
np.array(ex.categorical_slot_status),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def __getitem__(self, idx):
return (
np.array(feature.input_ids),
np.array(feature.segment_ids),
np.array(feature.input_mask, dtype=np.long),
np.array(feature.input_mask, dtype=np.longlong),
np.array(feature.label_id),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def construct_input(self, token_ids1, max_seq_length, token_ids2=None):
num_nonpad_tokens = len(bert_input)

input_ids[:num_nonpad_tokens] = bert_input
input_ids = np.array(input_ids, dtype=np.long)
input_ids = np.array(input_ids, dtype=np.longlong)
input_mask = input_ids != self.tokenizer.pad_id
input_type_ids = np.ones_like(input_ids)
input_type_ids[:sentence1_length] = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
self.all_intents[idx],
Expand Down Expand Up @@ -291,7 +291,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_loss_mask[idx]),
np.array(self.all_subtokens_mask[idx]),
)
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,10 @@ def truncate_seq_pair(a, b, max_num_tokens):

input_ids, output_mask = self.mask_ids(output_ids)

input_mask = np.zeros(self.max_seq_length, dtype=np.long)
input_mask = np.zeros(self.max_seq_length, dtype=np.longlong)
input_mask[: len(input_ids)] = 1

input_type_ids = np.zeros(self.max_seq_length, dtype=np.int)
input_type_ids = np.zeros(self.max_seq_length, dtype=np.int64)
input_type_ids[len(a_document) + 2 : len(output_ids) + 1] = 1

padding_length = max(0, self.max_seq_length - len(input_ids))
Expand All @@ -257,7 +257,7 @@ def truncate_seq_pair(a, b, max_num_tokens):
return (
np.array(input_ids),
input_type_ids,
np.array(input_mask, dtype=np.long),
np.array(input_mask, dtype=np.longlong),
np.array(output_ids),
np.array(output_mask, dtype=np.float32),
is_next,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def pad_batches(self, ids):

batches = []
for batch_elem_len, batch_sent_ids in zip(self.batch_elem_lengths, self.batch_sent_ids):
batch = self.tokenizer.pad_id * np.ones((len(batch_sent_ids), batch_elem_len), dtype=np.int)
batch = self.tokenizer.pad_id * np.ones((len(batch_sent_ids), batch_elem_len), dtype=np.int64)
for i, sentence_idx in enumerate(batch_sent_ids):
batch[i][: len(ids[sentence_idx])] = ids[sentence_idx]
batches.append(batch)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def pad_batches(self, src_ids, tgt_ids, batch_indices):
for batch_idx, b in enumerate(batch_indices):
src_len = max([len(src_ids[i]) for i in b])
tgt_len = max([len(tgt_ids[i]) for i in b])
src_ids_ = self.src_pad_id * np.ones((len(b), src_len), dtype=np.int)
tgt_ids_ = self.tgt_pad_id * np.ones((len(b), tgt_len), dtype=np.int)
src_ids_ = self.src_pad_id * np.ones((len(b), src_len), dtype=np.int64)
tgt_ids_ = self.tgt_pad_id * np.ones((len(b), tgt_len), dtype=np.int64)
for i, sentence_idx in enumerate(b):
src_ids_[i][: len(src_ids[sentence_idx])] = src_ids[sentence_idx]
tgt_ids_[i][: len(tgt_ids[sentence_idx])] = tgt_ids[sentence_idx]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1647,7 +1647,7 @@ def _form_batches(
- ``'input_mask'``: a boolean numpy array;
- ``'loss_mask'``: a boolean numpy array.
If ``waveforms`` is not ``None``, then a batch also contain items
- ``features``: a ``np.float`` numpy array.
- ``features``: a ``np.float64`` numpy array.
- ``features_length`` a ``np.int32`` numpy array.
If ``audio_filepaths`` is not ``None``, then a natch also contain items
- ``audio_filepaths`` a list of strings.
Expand Down Expand Up @@ -1677,7 +1677,7 @@ def _form_batches(
"capit_labels": item[3].astype(np.int64),
}
if self.use_audio and self.preload_audios:
batch['features'] = item[4].astype(np.float)
batch['features'] = item[4].astype(np.float64)
batch['features_length'] = item[5]
elif self.use_audio and not self.preload_audios:
batch['audio_filepaths'] = item[6]
Expand Down Expand Up @@ -1730,7 +1730,7 @@ def _pack_into_batches(
- ``'input_mask'``: a boolean numpy array;
- ``'loss_mask'``: a boolean numpy array.
If ``waveforms`` is not ``None``, then a batch also contain items
- ``features``: a ``np.float`` numpy array.
- ``features``: a ``np.float64`` numpy array.
- ``features_length`` a ``np.int32`` numpy array.
If ``audio_filepaths`` is not ``None``, then a natch also contain items
- ``audio_filepaths`` a list of strings.
Expand Down Expand Up @@ -1785,7 +1785,7 @@ def _pack_into_batches(
if self.use_audio and self.preload_audios:
batch['features'] = pad(
waveforms[start : start + size], max(audio_lengths[start : start + size]), 0.0
).astype(np.float)
).astype(np.float64)
batch['features_length'] = audio_lengths[start : start + size]
elif self.use_audio and not self.preload_audios:
batch['audio_filepaths'] = audio_filepaths[start : start + size]
Expand Down Expand Up @@ -1993,8 +1993,8 @@ def __getitem__(self, idx: int) -> Dict[str, np.ndarray]:
computed for corresponding token. See more in description of constructor parameters
``ignore_start_end``, ``ignore_extra_tokens`` (if ``self.add_masks_and_segment_ids_to_batch`` is
``False``, then these items is missing).
- ``'features'`` (:obj:`numpy.ndarray`) :obj:`np.float` array of waveforms of audio if ``self.preload_audio`` is set to ``True`` else empty.
- ``'features_length'`` (:obj:`numpy.ndarray`) :obj:`np.long` array of number of samples per audio.
- ``'features'`` (:obj:`numpy.ndarray`) :obj:`np.float64` array of waveforms of audio if ``self.preload_audio`` is set to ``True`` else empty.
- ``'features_length'`` (:obj:`numpy.ndarray`) :obj:`np.longlong` array of number of samples per audio.
- ``'audio_filepaths'`` (:obj:`List`) :obj:`str` contains paths of audio files if ``self.preload_audio`` set to ``False``
"""
return self.batches[idx]
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,6 @@ def __getitem__(
self.all_query_ids[idx],
self.all_is_first[idx],
self.all_is_last[idx],
np.array(self.all_audio_queries[idx], dtype=np.float),
np.array(self.all_audio_queries[idx], dtype=np.float64),
self.all_audio_lengths[idx],
)
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_subtokens_mask[idx]),
np.array(self.all_loss_mask[idx]),
np.array(self.all_labels[idx]),
Expand Down Expand Up @@ -348,6 +348,6 @@ def __getitem__(self, idx):
return (
np.array(self.all_input_ids[idx]),
np.array(self.all_segment_ids[idx]),
np.array(self.all_input_mask[idx], dtype=np.long),
np.array(self.all_input_mask[idx], dtype=np.longlong),
np.array(self.all_subtokens_mask[idx]),
)
2 changes: 1 addition & 1 deletion nemo/collections/vision/data/megatron/autoaugment.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def __init__(
"translateY": np.linspace(0, 150 / 331, num_levels),
"rotate": np.linspace(0, 30, num_levels),
"color": np.linspace(0.0, 0.9, num_levels),
"posterize": np.round(np.linspace(8, 4, num_levels), 0).astype(np.int),
"posterize": np.round(np.linspace(8, 4, num_levels), 0).astype(np.int64),
"solarize": np.linspace(256, 0, num_levels), # range [0, 256]
"contrast": np.linspace(0.0, 0.9, num_levels),
"sharpness": np.linspace(0.0, 0.9, num_levels),
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
huggingface_hub
numba
numpy>=1.22,<1.24
numpy>=1.22
onnx>=1.7.0
python-dateutil
ruamel.yaml
Expand Down
1 change: 0 additions & 1 deletion requirements/requirements_common.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
datasets
inflect
pandas
pydantic<2 # remove after inflect supports Pydantic 2.0+
sacremoses>=0.0.43
sentencepiece<1.0.0
youtokentome>=1.0.5

0 comments on commit 973a7ec

Please sign in to comment.