diff --git a/applications/FLASK/Transformer/datasets/pretokenize/data_utils.py b/applications/FLASK/Transformer/datasets/pretokenize/data_utils.py new file mode 100644 index 00000000000..fb901b81261 --- /dev/null +++ b/applications/FLASK/Transformer/datasets/pretokenize/data_utils.py @@ -0,0 +1,9 @@ +import numpy as np + + +def random_zero_array(arr, p, mask_token): + """ + Randomly zero out elements of an array with probability p + """ + mask = np.random.choice([0, 1], size=arr.shape, p=[p, 1 - p]) + return arr * mask + mask_token * (1 - mask)