vllm-project · HDCharles · Feb 2, 2026 · Jan 4, 2026 · Jan 6, 2026 · Jan 10, 2026
diff --git a/experimental/attention/llama3_attention.py b/experimental/attention/llama3_attention.py
@@ -1,10 +1,10 @@
+from compressed_tensors.quantization import QuantizationArgs, QuantizationScheme
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from llmcompressor.utils import dispatch_for_generation
-from compressed_tensors.quantization import QuantizationScheme, QuantizationArgs
 
 # Select model and load it.
 model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

diff --git a/experimental/attention/llama3_attention_r3_nvfp4.py b/experimental/attention/llama3_attention_r3_nvfp4.py
@@ -1,12 +1,12 @@
+from compressed_tensors.quantization import QuantizationScheme
+from compressed_tensors.quantization.quant_scheme import NVFP4
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from llmcompressor.modifiers.transform import SpinQuantModifier
 from llmcompressor.utils import dispatch_for_generation
-from compressed_tensors.quantization import QuantizationScheme
-from compressed_tensors.quantization.quant_scheme import NVFP4
 
 # Select model and load it.
 model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

@@ -31,6 +31,9 @@
     from datasets import Dataset, DatasetDict
 
 
+TOKENIZERS_PARALLELISM_ENV = "TOKENIZERS_PARALLELISM"
+
+
 class Oneshot:
     """
     Class responsible for carrying out one-shot calibration on a pretrained model.
@@ -121,6 +124,19 @@ def __init__(
         :param log_dir: Path to save logs during oneshot run.
             Nothing is logged to file if None.
         """
+        # Disable tokenizer parallelism to prevent warning when using
+        # multiprocessing for dataset preprocessing. The warning occurs because
+        # FastTokenizer's internal threading conflicts with dataset.map's num_proc.
+        # See: https://github.com/vllm-project/llm-compressor/issues/2007
+        if TOKENIZERS_PARALLELISM_ENV not in os.environ:
+            os.environ[TOKENIZERS_PARALLELISM_ENV] = "false"
+            logger.warning(
+                "Disabling tokenizer parallelism due to threading conflict between "
+                "FastTokenizer and Datasets. Set "
+                f"{TOKENIZERS_PARALLELISM_ENV}=false to "
+                "suppress this warning."
+            )
+
         # Set up file logging (no default files):
         # 1) If LLM_COMPRESSOR_LOG_FILE is set, log to that file.
         # 2) Else, if an explicit log_dir is provided, create a timestamped file there.

diff --git a/tests/llmcompressor/transformers/oneshot/test_tokenizer_parallelism.py b/tests/llmcompressor/transformers/oneshot/test_tokenizer_parallelism.py
@@ -0,0 +1,47 @@
+import os
+
+import pytest
+
+from llmcompressor.entrypoints.oneshot import (
+    TOKENIZERS_PARALLELISM_ENV as _TOKENIZERS_PARALLELISM_ENV,
+)
+
+
+class TestTokenizerParallelism:
+    """Tests for tokenizer parallelism warning suppression (issue #2007)."""
+
+    def test_oneshot_sets_tokenizers_parallelism_when_not_set(self, monkeypatch):
+        """
+        Test that Oneshot sets TOKENIZERS_PARALLELISM=false when not already set.
+
+        This prevents the warning:
+        "huggingface/tokenizers: The current process just got forked, after
+        parallelism has already been used. Disabling parallelism to avoid deadlocks..."
+
+        See: https://github.com/vllm-project/llm-compressor/issues/2007
+        """
+        monkeypatch.delenv(_TOKENIZERS_PARALLELISM_ENV, raising=False)
+
+        from llmcompressor.entrypoints.oneshot import Oneshot
+
+        # Create a minimal Oneshot instance to trigger __init__
+        # We expect it to fail due to missing model, but the env var should be set
+        with pytest.raises(Exception):
+            Oneshot(model="nonexistent-model")
+
+        assert os.environ[_TOKENIZERS_PARALLELISM_ENV] == "false"
+
+    def test_oneshot_respects_existing_tokenizers_parallelism(self, monkeypatch):
+        """
+        Test that Oneshot respects user's existing TOKENIZERS_PARALLELISM setting.
+
+        If a user has explicitly set TOKENIZERS_PARALLELISM, we should not override it.
+        """
+        monkeypatch.setenv(_TOKENIZERS_PARALLELISM_ENV, "true")
+
+        from llmcompressor.entrypoints.oneshot import Oneshot
+
+        with pytest.raises(Exception):
+            Oneshot(model="nonexistent-model")
+
+        assert os.environ[_TOKENIZERS_PARALLELISM_ENV] == "true"
diff --git a/tools/collect_env.py b/tools/collect_env.py
@@ -3,9 +3,9 @@
 creating bug reports. See `.github/ISSUE_TEMPLATE/bug_report.md`
 """
 
+import importlib
 import platform
 import sys
-import importlib
 
 
 def get_version(pkg_name):