From 8380bbf04a92d47bfdf888df6b642c87b4fdcc0b Mon Sep 17 00:00:00 2001 From: Arkadiusz Nowaczynski Date: Tue, 16 Dec 2025 18:26:42 +0100 Subject: [PATCH 1/2] replace raise error with LOG.warning in AA LCR dataset prepare Signed-off-by: Arkadiusz Nowaczynski --- nemo_skills/dataset/aalcr/prepare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_skills/dataset/aalcr/prepare.py b/nemo_skills/dataset/aalcr/prepare.py index ec9fa75256..81f2b724a1 100644 --- a/nemo_skills/dataset/aalcr/prepare.py +++ b/nemo_skills/dataset/aalcr/prepare.py @@ -187,7 +187,7 @@ def write_data_to_file(output_file, data, txt_file_folder, max_context_window, t continue if n_tokens != entry["input_tokens"]: # check if the n_tokens exactly match the input_tokens in the entry - raise ValueError(f"n_tokens: {n_tokens} != input_tokens: {entry['input_tokens']}") + LOG.warning(f"n_tokens: {n_tokens} != input_tokens: {entry['input_tokens']}") entry[f"n_tokens_{tokenizer_name}"] = n_tokens entry["question"] = question From d1d894e8facd949cda3f258847d138d4321a939a Mon Sep 17 00:00:00 2001 From: Arkadiusz Nowaczynski Date: Tue, 16 Dec 2025 22:28:58 +0100 Subject: [PATCH 2/2] remove aalcr from EXCLUDED_DATASETS in test_eval Signed-off-by: Arkadiusz Nowaczynski --- tests/gpu-tests/test_eval.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/gpu-tests/test_eval.py b/tests/gpu-tests/test_eval.py index 31c8f2cccf..88e713bbec 100644 --- a/tests/gpu-tests/test_eval.py +++ b/tests/gpu-tests/test_eval.py @@ -44,7 +44,6 @@ "mbpp", "mmau-pro", "asr-leaderboard", - "aalcr", # Has tokenization mismatch issues "audiobench", "librispeech-pc", }