diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index d2231e1703ee..9d9fd30b63c0 100644 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -254,7 +254,7 @@ def tokenize_function(examples): tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, - remove_columns=[text_column_name], + remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache, ) diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py index cd1cc3f26da7..bf15b00d8397 100644 --- a/examples/language-modeling/run_mlm.py +++ b/examples/language-modeling/run_mlm.py @@ -292,7 +292,7 @@ def tokenize_function(examples): tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, - remove_columns=[text_column_name], + remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache, ) diff --git a/examples/language-modeling/run_plm.py b/examples/language-modeling/run_plm.py index 337ebb3e7ef6..bc1c3fd28e98 100644 --- a/examples/language-modeling/run_plm.py +++ b/examples/language-modeling/run_plm.py @@ -279,7 +279,7 @@ def tokenize_function(examples): tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, - remove_columns=[text_column_name], + remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache, )