diff --git a/python/sglang/srt/environ.py b/python/sglang/srt/environ.py
index f6ca9026ef0a..f8dbcca85713 100644
--- a/python/sglang/srt/environ.py
+++ b/python/sglang/srt/environ.py
@@ -458,7 +458,7 @@ class Envs:
     SGLANG_ROPE_CACHE_ALIGN = EnvInt(128)
 
     # Overlap Spec V2
-    SGLANG_ENABLE_SPEC_V2 = EnvBool(False)
+    SGLANG_ENABLE_SPEC_V2 = EnvBool(True)
     SGLANG_ENABLE_OVERLAP_PLAN_STREAM = EnvBool(False)
 
     # Spec Config
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
index 817ae43fbed2..9cbe98987135 100644
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -1962,11 +1962,6 @@ def _handle_model_specific_adjustments(self):
                 logger.info(
                     "Enable multi-layer EAGLE speculative decoding for MiMoV2 model."
                 )
-                if not envs.SGLANG_ENABLE_SPEC_V2.get():
-                    envs.SGLANG_ENABLE_SPEC_V2.set(True)
-                    logger.warning(
-                        "Spec v2 is enabled for multi-layer EAGLE speculative decoding."
-                    )
 
             if self.enable_hierarchical_cache:
                 self.swa_full_tokens_ratio = 1.0
@@ -1983,11 +1978,6 @@ def _handle_model_specific_adjustments(self):
                 logger.info(
                     "Enable multi-layer EAGLE speculative decoding for Step3p5ForCausalLM model."
                 )
-                if not envs.SGLANG_ENABLE_SPEC_V2.get():
-                    envs.SGLANG_ENABLE_SPEC_V2.set(True)
-                    logger.warning(
-                        "Spec v2 is enabled for multi-layer EAGLE speculative decoding."
-                    )
             if self.enable_hierarchical_cache:
                 self.swa_full_tokens_ratio = 1.0
                 logger.warning(
@@ -3386,26 +3376,29 @@ def _handle_speculative_decoding(self):
                     "Max running requests is reset to 48 for speculative decoding. You can override this by explicitly setting --max-running-requests."
                 )
 
+            spec_v1_reason = None
             if (
-                self.speculative_algorithm in ["EAGLE", "EAGLE3", "STANDALONE"]
-                and envs.SGLANG_ENABLE_SPEC_V2.get()
+                self.speculative_eagle_topk is not None
+                and self.speculative_eagle_topk > 1
+                and not self.disable_overlap_schedule
+            ):
+                self.disable_overlap_schedule = True
+                spec_v1_reason = "spec v2 currently only supports topk = 1"
+            elif (
+                not envs.SGLANG_ENABLE_SPEC_V2.get()
+                and not self.disable_overlap_schedule
             ):
-                self.disable_overlap_schedule = False
+                self.disable_overlap_schedule = True
+                spec_v1_reason = "SGLANG_ENABLE_SPEC_V2=False"
+
+            if self.disable_overlap_schedule:
                 logger.warning(
-                    "Spec v2 is enabled for eagle/eagle3 speculative decoding and overlap schedule is turned on."
+                    "Spec v1 is used for eagle/eagle3/standalone speculative decoding because %s.",
+                    spec_v1_reason or "overlap schedule is disabled",
                 )
-                if (
-                    self.speculative_eagle_topk is not None
-                    and self.speculative_eagle_topk > 1
-                ):
-                    raise ValueError(
-                        "Spec v2 currently only supports topk = 1 for speculative decoding."
-                    )
             else:
-                self.disable_overlap_schedule = True
                 logger.warning(
-                    "Overlap scheduler is disabled when spec v2 is off or using unsupported speculative algorithm. "
-                    "You can set env SGLANG_ENABLE_SPEC_V2=True to enable the experimental overlap scheduler. "
+                    "Spec v2 is enabled by default for eagle/eagle3/standalone speculative decoding."
                 )
 
             if self.enable_mixed_chunk:
diff --git a/test/manual/ascend/test_ascend_deepseek_mtp.py b/test/manual/ascend/test_ascend_deepseek_mtp.py
index cbe01a07add1..acc78fa5b44b 100644
--- a/test/manual/ascend/test_ascend_deepseek_mtp.py
+++ b/test/manual/ascend/test_ascend_deepseek_mtp.py
@@ -53,7 +53,6 @@ def setUpClass(cls):
         ]
 
         envs.SGLANG_NPU_USE_MLAPO.set(True)
-        envs.SGLANG_ENABLE_SPEC_V2.set(True)
         envs.SGLANG_ENABLE_OVERLAP_PLAN_STREAM.set(True)
 
     def test_a_gsm8k(self):
diff --git a/test/manual/test_deepseek_v31.py b/test/manual/test_deepseek_v31.py
index 8025e47c4e2b..543879b17de2 100644
--- a/test/manual/test_deepseek_v31.py
+++ b/test/manual/test_deepseek_v31.py
@@ -50,7 +50,6 @@ def test_deepseek_v31_all_variants(self):
                 DEEPSEEK_V31_MODEL_PATH,
                 tp_size=8,
                 extra_args=base_args + mtp_args,
-                env={"SGLANG_ENABLE_SPEC_V2": "1"},
                 variant="TP8+MTP",
             ),
         ]
diff --git a/test/manual/test_glm_46_fp8.py b/test/manual/test_glm_46_fp8.py
index 94fb724b75b8..815ad33f4d53 100644
--- a/test/manual/test_glm_46_fp8.py
+++ b/test/manual/test_glm_46_fp8.py
@@ -41,7 +41,6 @@ def test_glm_46_fp8_all_variants(self):
                 GLM_4_6_FP8_MODEL_PATH,
                 tp_size=8,
                 extra_args=base_args + mtp_args,
-                env={"SGLANG_ENABLE_SPEC_V2": "1"},
                 variant="TP8+MTP",
             ),
         ]
diff --git a/test/manual/test_qwen3_235b.py b/test/manual/test_qwen3_235b.py
index f0e4f03996ce..acae0bd1e182 100644
--- a/test/manual/test_qwen3_235b.py
+++ b/test/manual/test_qwen3_235b.py
@@ -52,7 +52,6 @@ def test_qwen3_235b_fp8_all_variants(self):
                 QWEN3_235B_FP8_MODEL_PATH,
                 tp_size=8,
                 extra_args=base_args + eagle3_args,
-                env={"SGLANG_ENABLE_SPEC_V2": "1"},
                 variant="TP8+EP2+EAGLE3",
             ),
         ]
diff --git a/test/registered/4-gpu-models/test_qwen35_models.py b/test/registered/4-gpu-models/test_qwen35_models.py
new file mode 100644
index 000000000000..be125c32175f
--- /dev/null
+++ b/test/registered/4-gpu-models/test_qwen35_models.py
@@ -0,0 +1,242 @@
+import unittest
+from types import SimpleNamespace
+
+import requests
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.accuracy_test_runner import AccuracyTestParams
+from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.kits.reasoning_kit import ReasoningTokenUsageMixin
+
+# This eval harness applies the chat_template, which is critical for qwen3.5
+# to get good accuracy on gsm8k
+from sglang.test.run_combined_tests import run_combined_tests
+from sglang.test.run_eval import run_eval
+from sglang.test.test_utils import (
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    ModelLaunchSettings,
+    popen_launch_server,
+)
+
+register_cuda_ci(est_time=768, suite="stage-c-test-4-gpu-b200")
+
+QWEN35_FP4_MODEL = "nvidia/Qwen3.5-397B-A17B-NVFP4"
+ACC_THRESHOLDS = {QWEN35_FP4_MODEL: {"gsm8k": 0.95}}
+
+
+class TestQwen35FP4(CustomTestCase):
+    def test_gsm8k(self):
+        base_args = [
+            "--tp-size",
+            "4",
+            "--chunked-prefill-size",
+            "2048",
+            "--mamba-scheduler-strategy",
+            "extra_buffer",
+            "--mamba-track-interval",
+            "128",
+            "--mamba-ssm-dtype",
+            "bfloat16",
+            "--max-running-requests",
+            "128",
+            "--reasoning-parser",
+            "qwen3",
+            "--attention-backend",
+            "trtllm_mha",
+            "--quantization",
+            "modelopt_fp4",
+            "--model-loader-extra-config",
+            '{"enable_multithread_load": true,"num_threads": 64}',
+        ]
+
+        variants = [
+            ModelLaunchSettings(
+                QWEN35_FP4_MODEL,
+                extra_args=base_args,
+                variant="Triton",
+            ),
+            # TODO: Fix this and re-enable it
+            # ModelLaunchSettings(
+            #     QWEN35_FP4_MODEL,
+            #     extra_args=base_args + ["--linear-attn-decode-backend", "flashinfer"],
+            #     variant="FlashInfer",
+            # ),
+        ]
+
+        run_combined_tests(
+            models=variants,
+            test_name="Qwen3.5-397B-A17B-NVFP4",
+            accuracy_params=AccuracyTestParams(
+                dataset="gsm8k",
+                baseline_accuracy=ACC_THRESHOLDS[QWEN35_FP4_MODEL]["gsm8k"],
+                num_examples=200,
+                num_threads=128,
+                max_tokens=16000,
+                thinking_mode="qwen3",
+                temperature=0.6,
+                top_p=0.95,
+                top_k=20,
+            ),
+        )
+
+
+class TestQwen35FP4MTP(ReasoningTokenUsageMixin, CustomTestCase):
+    reasoning_parser_name = "qwen3"
+
+    @classmethod
+    def setUpClass(cls):
+        cls.model = QWEN35_FP4_MODEL
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.init_reasoning_token_verifier()
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=[
+                "--tp-size",
+                "4",
+                "--chunked-prefill-size",
+                "2048",
+                "--mamba-scheduler-strategy",
+                "extra_buffer",
+                "--mamba-track-interval",
+                "128",
+                "--mamba-ssm-dtype",
+                "bfloat16",
+                "--max-running-requests",
+                "128",
+                "--reasoning-parser",
+                "qwen3",
+                "--attention-backend",
+                "trtllm_mha",
+                "--quantization",
+                "modelopt_fp4",
+                "--speculative-algorithm",
+                "NEXTN",
+                "--speculative-num-steps",
+                "3",
+                "--speculative-eagle-topk",
+                "1",
+                "--speculative-num-draft-tokens",
+                "4",
+                "--mem-fraction-static",
+                "0.8",
+                "--model-loader-extra-config",
+                '{"enable_multithread_load": true,"num_threads": 64}',
+            ],
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_gsm8k(self):
+        args = SimpleNamespace(
+            model=self.model,
+            eval_name="gsm8k",
+            num_shots=5,
+            num_examples=200,
+            max_tokens=16000,
+            num_threads=128,
+            repeat=1,
+            temperature=0.6,
+            top_p=0.95,
+            top_k=20,
+            base_url=self.base_url,
+            host="http://127.0.0.1",
+            port=int(self.base_url.split(":")[-1]),
+        )
+        metrics = run_eval(args)
+        print(f"{metrics=}")
+        self.assertGreaterEqual(metrics["score"], ACC_THRESHOLDS[self.model]["gsm8k"])
+
+        server_info = requests.get(self.base_url + "/server_info")
+        avg_spec_accept_length = server_info.json()["internal_states"][0][
+            "avg_spec_accept_length"
+        ]
+        print(f"{avg_spec_accept_length=}")
+        self.assertGreater(avg_spec_accept_length, 3.3)
+
+
+class TestQwen35FP4MTPV2(ReasoningTokenUsageMixin, CustomTestCase):
+    reasoning_parser_name = "qwen3"
+
+    @classmethod
+    def setUpClass(cls):
+        cls.model = QWEN35_FP4_MODEL
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.init_reasoning_token_verifier()
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=[
+                "--tp-size",
+                "4",
+                "--chunked-prefill-size",
+                "2048",
+                "--mamba-scheduler-strategy",
+                "extra_buffer",
+                "--mamba-track-interval",
+                "128",
+                "--mamba-ssm-dtype",
+                "bfloat16",
+                "--max-running-requests",
+                "128",
+                "--reasoning-parser",
+                "qwen3",
+                "--attention-backend",
+                "trtllm_mha",
+                "--quantization",
+                "modelopt_fp4",
+                "--speculative-algorithm",
+                "NEXTN",
+                "--speculative-num-steps",
+                "3",
+                "--speculative-eagle-topk",
+                "1",
+                "--speculative-num-draft-tokens",
+                "4",
+                "--mem-fraction-static",
+                "0.8",
+                "--model-loader-extra-config",
+                '{"enable_multithread_load": true,"num_threads": 64}',
+            ],
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_gsm8k(self):
+        args = SimpleNamespace(
+            model=self.model,
+            eval_name="gsm8k",
+            num_shots=5,
+            num_examples=200,
+            max_tokens=16000,
+            num_threads=128,
+            repeat=1,
+            temperature=0.6,
+            top_p=0.95,
+            top_k=20,
+            base_url=self.base_url,
+            host="http://127.0.0.1",
+            port=int(self.base_url.split(":")[-1]),
+        )
+        metrics = run_eval(args)
+        print(f"{metrics=}")
+        self.assertGreaterEqual(metrics["score"], ACC_THRESHOLDS[self.model]["gsm8k"])
+
+        server_info = requests.get(self.base_url + "/server_info")
+        avg_spec_accept_length = server_info.json()["internal_states"][0][
+            "avg_spec_accept_length"
+        ]
+        print(f"{avg_spec_accept_length=}")
+        self.assertGreater(avg_spec_accept_length, 3.3)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py b/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py
index e7edfbee9e03..13ad4d0c3140 100644
--- a/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py
+++ b/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py
@@ -1,6 +1,5 @@
 import unittest
 
-from sglang.srt.environ import envs
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.kits.eval_accuracy_kit import GSM8KMixin
 from sglang.test.kits.kl_divergence_kit import KLDivergenceMixin
@@ -94,16 +93,6 @@ class TestQwen3NextMTPV2(GSM8KMixin, KLDivergenceMixin, DefaultServerBase):
         "128",
     ]
 
-    @classmethod
-    def setUpClass(cls):
-        envs.SGLANG_ENABLE_SPEC_V2.set(True)
-        super().setUpClass()
-
-    @classmethod
-    def tearDownClass(cls):
-        envs.SGLANG_ENABLE_SPEC_V2.set(False)
-        super().tearDownClass()
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/registered/8-gpu-models/test_deepseek_v32.py b/test/registered/8-gpu-models/test_deepseek_v32.py
index 5a6525d204f5..b792931979df 100644
--- a/test/registered/8-gpu-models/test_deepseek_v32.py
+++ b/test/registered/8-gpu-models/test_deepseek_v32.py
@@ -68,7 +68,6 @@ def test_deepseek_v32_all_variants(self):
                 DEEPSEEK_V32_MODEL_PATH,
                 tp_size=8,
                 extra_args=BASE_ARGS + DP_ARGS + TOOL_CALL_ARGS + MTP_ARGS,
-                env={"SGLANG_ENABLE_SPEC_V2": "1"},
                 variant="DP8+MTP",
             ),
             # Variant: "tp" - Pure TP=8 only
@@ -83,7 +82,6 @@ def test_deepseek_v32_all_variants(self):
                 DEEPSEEK_V32_MODEL_PATH,
                 tp_size=8,
                 extra_args=BASE_ARGS + TP_ARGS + TOOL_CALL_ARGS + MTP_ARGS,
-                env={"SGLANG_ENABLE_SPEC_V2": "1"},
                 variant="TP8+MTP",
             ),
         ]
diff --git a/test/registered/8-gpu-models/test_dsa_models_mtp.py b/test/registered/8-gpu-models/test_dsa_models_mtp.py
index fe5ffe1c7a17..05cd47632e00 100644
--- a/test/registered/8-gpu-models/test_dsa_models_mtp.py
+++ b/test/registered/8-gpu-models/test_dsa_models_mtp.py
@@ -3,7 +3,6 @@
 
 import requests
 
-from sglang.srt.environ import envs
 from sglang.srt.utils import kill_process_tree
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.run_eval import run_eval
@@ -48,13 +47,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true, "num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -132,13 +130,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true, "num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -219,13 +216,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true, "num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -303,13 +299,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true, "num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/registered/8-gpu-models/test_gpt_oss_120b.py b/test/registered/8-gpu-models/test_gpt_oss_120b.py
index dae7bac4cbf5..114d93781886 100644
--- a/test/registered/8-gpu-models/test_gpt_oss_120b.py
+++ b/test/registered/8-gpu-models/test_gpt_oss_120b.py
@@ -48,7 +48,6 @@ def test_gpt_oss_120b_all_variants(self):
             "--speculative-num-draft-tokens=4",
         ]
         eagle3_env = {
-            "SGLANG_ENABLE_SPEC_V2": "1",
             "SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN": "1",
         }
 
diff --git a/test/registered/8-gpu-models/test_mistral_large3.py b/test/registered/8-gpu-models/test_mistral_large3.py
index 68b7ca7d20af..58587d45e3e2 100644
--- a/test/registered/8-gpu-models/test_mistral_large3.py
+++ b/test/registered/8-gpu-models/test_mistral_large3.py
@@ -73,7 +73,6 @@ def test_mistral_large3_all_variants(self):
                 MISTRAL_LARGE3_FP8_MODEL_PATH,
                 tp_size=8,
                 extra_args=base_args + eagle_args,
-                env={"SGLANG_ENABLE_SPEC_V2": "1"},
                 variant="TP8+MTP",
             ),
             # Variant: "nvfp4" - NVFP4 model + TP=8 + trtllm_mla backend
diff --git a/test/registered/8-gpu-models/test_qwen35.py b/test/registered/8-gpu-models/test_qwen35.py
index 813552b83421..bf7fb2d01e12 100644
--- a/test/registered/8-gpu-models/test_qwen35.py
+++ b/test/registered/8-gpu-models/test_qwen35.py
@@ -57,7 +57,6 @@ def test_qwen35(self):
                 tp_size=8,
                 extra_args=base_args + dp_args + mtp_args,
                 variant="TP8+DP8+MTP",
-                env={"SGLANG_ENABLE_SPEC_V2": "1"},
             ),
         ]
 
diff --git a/test/registered/amd/test_deepseek_r1_mxfp4_8gpu.py b/test/registered/amd/test_deepseek_r1_mxfp4_8gpu.py
index 04d4f6efb7a7..a58a998090af 100644
--- a/test/registered/amd/test_deepseek_r1_mxfp4_8gpu.py
+++ b/test/registered/amd/test_deepseek_r1_mxfp4_8gpu.py
@@ -88,7 +88,6 @@ def setUpClass(cls):
         cls.model = DEEPSEEK_R1_MODEL_PATH
         cls.base_url = DEFAULT_URL_FOR_TEST
 
-        envs.SGLANG_ENABLE_SPEC_V2.set(True)
         envs.SGLANG_ENABLE_OVERLAP_PLAN_STREAM.set(True)
 
         other_args = [
diff --git a/test/registered/ascend/basic_function/speculative_inference/test_npu_eagle3.py b/test/registered/ascend/basic_function/speculative_inference/test_npu_eagle3.py
index efbb5f738e02..f6f1e37f517a 100644
--- a/test/registered/ascend/basic_function/speculative_inference/test_npu_eagle3.py
+++ b/test/registered/ascend/basic_function/speculative_inference/test_npu_eagle3.py
@@ -63,7 +63,6 @@ def setUpClass(cls):
 
         cls.extra_envs = {
             "SGLANG_ENABLE_OVERLAP_PLAN_STREAM": "1",
-            "SGLANG_ENABLE_SPEC_V2": "1",
         }
         os.environ.update(cls.extra_envs)
 
diff --git a/test/registered/cp/test_deepseek_v32_cp_single_node.py b/test/registered/cp/test_deepseek_v32_cp_single_node.py
index 5adefd3d1ff0..55fa2190717e 100644
--- a/test/registered/cp/test_deepseek_v32_cp_single_node.py
+++ b/test/registered/cp/test_deepseek_v32_cp_single_node.py
@@ -1,7 +1,6 @@
 import unittest
 from types import SimpleNamespace
 
-from sglang.srt.environ import envs
 from sglang.srt.utils import kill_process_tree
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.run_eval import run_eval
@@ -52,13 +51,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true, "num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -119,13 +117,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true, "num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/registered/ep/test_deepep_large.py b/test/registered/ep/test_deepep_large.py
index 5967d489eb59..a400ae73d105 100644
--- a/test/registered/ep/test_deepep_large.py
+++ b/test/registered/ep/test_deepep_large.py
@@ -3,6 +3,7 @@
 
 import requests
 
+from sglang.srt.environ import envs
 from sglang.srt.utils import kill_process_tree
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.run_eval import run_eval
@@ -86,48 +87,49 @@ class TestDeepseekMTP(CustomTestCase):
     def setUpClass(cls):
         cls.model = DEFAULT_DEEPEP_MODEL_NAME_FOR_TEST
         cls.base_url = DEFAULT_URL_FOR_TEST
-        cls.process = popen_launch_server(
-            cls.model,
-            cls.base_url,
-            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            other_args=[
-                "--trust-remote-code",
-                "--tp",
-                "8",
-                "--enable-dp-attention",
-                "--dp",
-                "8",
-                "--moe-dense-tp-size",
-                "1",
-                "--enable-dp-lm-head",
-                "--moe-a2a-backend",
-                "deepep",
-                "--moe-runner-backend",
-                "deep_gemm",
-                "--enable-two-batch-overlap",
-                "--ep-num-redundant-experts",
-                "32",
-                "--ep-dispatch-algorithm",
-                "dynamic",
-                "--eplb-algorithm",
-                "deepseek",
-                "--cuda-graph-bs",
-                "64",  # TODO: increase it to 128 when TBO is supported in draft_extend
-                "--max-running-requests",
-                "512",
-                "--speculative-algorithm",
-                "EAGLE",
-                "--speculative-num-steps",
-                "1",
-                "--speculative-eagle-topk",
-                "1",
-                "--speculative-num-draft-tokens",
-                "2",
-                "--disable-radix-cache",
-                "--model-loader-extra-config",
-                '{"enable_multithread_load": true,"num_threads": 64}',
-            ],
-        )
+        with envs.SGLANG_ENABLE_SPEC_V2.override(False):
+            cls.process = popen_launch_server(
+                cls.model,
+                cls.base_url,
+                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+                other_args=[
+                    "--trust-remote-code",
+                    "--tp",
+                    "8",
+                    "--enable-dp-attention",
+                    "--dp",
+                    "8",
+                    "--moe-dense-tp-size",
+                    "1",
+                    "--enable-dp-lm-head",
+                    "--moe-a2a-backend",
+                    "deepep",
+                    "--moe-runner-backend",
+                    "deep_gemm",
+                    "--enable-two-batch-overlap",
+                    "--ep-num-redundant-experts",
+                    "32",
+                    "--ep-dispatch-algorithm",
+                    "dynamic",
+                    "--eplb-algorithm",
+                    "deepseek",
+                    "--cuda-graph-bs",
+                    "64",  # TODO: increase it to 128 when TBO is supported in draft_extend
+                    "--max-running-requests",
+                    "512",
+                    "--speculative-algorithm",
+                    "EAGLE",
+                    "--speculative-num-steps",
+                    "1",
+                    "--speculative-eagle-topk",
+                    "1",
+                    "--speculative-num-draft-tokens",
+                    "2",
+                    "--disable-radix-cache",
+                    "--model-loader-extra-config",
+                    '{"enable_multithread_load": true,"num_threads": 64}',
+                ],
+            )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/registered/mla/test_flashmla.py b/test/registered/mla/test_flashmla.py
index abcad2a391a7..97fd2e2eaf8b 100644
--- a/test/registered/mla/test_flashmla.py
+++ b/test/registered/mla/test_flashmla.py
@@ -9,6 +9,7 @@
 import requests
 import torch
 
+from sglang.srt.environ import envs
 from sglang.srt.utils import kill_process_tree
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.run_eval import run_eval
@@ -97,12 +98,13 @@ def setUpClass(cls):
                 ]
             )
         # Use longer timeout for DeepGEMM JIT compilation which can take 10-20 minutes
-        cls.process = popen_launch_server(
-            cls.model,
-            cls.base_url,
-            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH * 2,
-            other_args=other_args,
-        )
+        with envs.SGLANG_ENABLE_SPEC_V2.override(False):
+            cls.process = popen_launch_server(
+                cls.model,
+                cls.base_url,
+                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH * 2,
+                other_args=other_args,
+            )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py b/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py
index 59dbe8741258..1cf38cdf7115 100644
--- a/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py
+++ b/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py
@@ -3,7 +3,6 @@
 
 import requests
 
-from sglang.srt.environ import envs
 from sglang.srt.utils import kill_process_tree
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.run_eval import run_eval
@@ -54,13 +53,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true,"num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=SERVER_LAUNCH_TIMEOUT,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=SERVER_LAUNCH_TIMEOUT,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -145,13 +143,12 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true,"num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(True):
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=SERVER_LAUNCH_TIMEOUT,
-                other_args=other_args,
-            )
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=SERVER_LAUNCH_TIMEOUT,
+            other_args=other_args,
+        )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/registered/spec/eagle/test_adaptive_speculative.py b/test/registered/spec/eagle/test_adaptive_speculative.py
index 6863eacb4934..4f8a0ff4754c 100644
--- a/test/registered/spec/eagle/test_adaptive_speculative.py
+++ b/test/registered/spec/eagle/test_adaptive_speculative.py
@@ -6,6 +6,7 @@
 
 import requests
 
+from sglang.srt.environ import envs
 from sglang.srt.utils import kill_process_tree
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.run_eval import run_eval
@@ -58,32 +59,33 @@ def setUpClass(cls):
             cls.adaptive_config_path = f.name
 
         try:
-            cls.process = popen_launch_server(
-                cls.model,
-                cls.base_url,
-                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-                other_args=[
-                    "--trust-remote-code",
-                    "--attention-backend",
-                    "triton",
-                    "--speculative-algorithm",
-                    "EAGLE",
-                    "--speculative-draft-model-path",
-                    cls.draft_model,
-                    "--speculative-num-steps",
-                    "1",
-                    "--speculative-eagle-topk",
-                    "1",
-                    "--speculative-num-draft-tokens",
-                    "2",
-                    "--speculative-adaptive",
-                    "--speculative-adaptive-config",
-                    cls.adaptive_config_path,
-                    "--skip-server-warmup",
-                    "--mem-fraction-static",
-                    "0.7",
-                ],
-            )
+            with envs.SGLANG_ENABLE_SPEC_V2.override(False):
+                cls.process = popen_launch_server(
+                    cls.model,
+                    cls.base_url,
+                    timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+                    other_args=[
+                        "--trust-remote-code",
+                        "--attention-backend",
+                        "triton",
+                        "--speculative-algorithm",
+                        "EAGLE",
+                        "--speculative-draft-model-path",
+                        cls.draft_model,
+                        "--speculative-num-steps",
+                        "1",
+                        "--speculative-eagle-topk",
+                        "1",
+                        "--speculative-num-draft-tokens",
+                        "2",
+                        "--speculative-adaptive",
+                        "--speculative-adaptive-config",
+                        cls.adaptive_config_path,
+                        "--skip-server-warmup",
+                        "--mem-fraction-static",
+                        "0.7",
+                    ],
+                )
         except Exception:
             os.unlink(cls.adaptive_config_path)
             raise
diff --git a/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py b/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py
index a8372c84e180..46a7cf5e55b9 100644
--- a/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py
+++ b/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py
@@ -49,13 +49,9 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true,"num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(
+        with envs.SGLANG_SPEC_NAN_DETECTION.override(
             True
-        ), envs.SGLANG_SPEC_NAN_DETECTION.override(
-            True
-        ), envs.SGLANG_SPEC_OOB_DETECTION.override(
-            True
-        ):
+        ), envs.SGLANG_SPEC_OOB_DETECTION.override(True):
             cls.process = popen_launch_server(
                 cls.model,
                 cls.base_url,
diff --git a/test/registered/spec/eagle/test_eagle_infer_a.py b/test/registered/spec/eagle/test_eagle_infer_a.py
index eca84327cfb8..077e4f46bfbd 100644
--- a/test/registered/spec/eagle/test_eagle_infer_a.py
+++ b/test/registered/spec/eagle/test_eagle_infer_a.py
@@ -2,6 +2,7 @@
 import unittest
 
 import sglang as sgl
+from sglang.srt.environ import envs
 from sglang.srt.utils.hf_transformers_utils import get_tokenizer
 from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.test_utils import (
@@ -34,6 +35,14 @@ class TestEAGLEEngine(CustomTestCase):
         "accept_len": 3.6,
     }
 
+    @classmethod
+    def setUpClass(cls):
+        envs.SGLANG_ENABLE_SPEC_V2.set(False)
+
+    @classmethod
+    def tearDownClass(cls):
+        envs.SGLANG_ENABLE_SPEC_V2.clear()
+
     def setUp(self):
         self.prompt = "Today is a sunny day and I like"
         self.sampling_params = {"temperature": 0, "max_new_tokens": 8}
diff --git a/test/registered/spec/eagle/test_eagle_infer_b.py b/test/registered/spec/eagle/test_eagle_infer_b.py
index 7e941b38e693..3d4449271e9b 100644
--- a/test/registered/spec/eagle/test_eagle_infer_b.py
+++ b/test/registered/spec/eagle/test_eagle_infer_b.py
@@ -30,6 +30,11 @@ class TestEAGLEServerBasic(EagleServerBase):
 
     extra_args = ["--chunked-prefill-size", 128, "--max-running-requests", 8]
 
+    @classmethod
+    def setUpClass(cls):
+        with envs.SGLANG_ENABLE_SPEC_V2.override(False):
+            super().setUpClass()
+
     # FIXME(lsyin): move the test methods to kits
     def test_request_abort(self):
         concurrency = 4
diff --git a/test/registered/spec/eagle/test_eagle_infer_beta.py b/test/registered/spec/eagle/test_eagle_infer_beta.py
index 96a2096a706c..436ac2001e5e 100644
--- a/test/registered/spec/eagle/test_eagle_infer_beta.py
+++ b/test/registered/spec/eagle/test_eagle_infer_beta.py
@@ -63,9 +63,7 @@ def setUpClass(cls):
             *[str(i) for i in range(1, cls.max_running_requests + 1)],
         ]
         launch_args.extend(cls.other_launch_args)
-        with envs.SGLANG_ENABLE_SPEC_V2.override(
-            True
-        ), envs.SGLANG_ENABLE_STRICT_MEM_CHECK_DURING_BUSY.override(
+        with envs.SGLANG_ENABLE_STRICT_MEM_CHECK_DURING_BUSY.override(
             1
         ), envs.SGLANG_SPEC_NAN_DETECTION.override(
             True
diff --git a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py
index bb31b88aec70..0dcb7c5a2992 100644
--- a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py
+++ b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py
@@ -65,13 +65,9 @@ def setUpClass(cls):
             "--speculative-num-draft-tokens",
             "4",
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(
+        with envs.SGLANG_SPEC_NAN_DETECTION.override(
             True
-        ), envs.SGLANG_SPEC_NAN_DETECTION.override(
-            True
-        ), envs.SGLANG_SPEC_OOB_DETECTION.override(
-            True
-        ):
+        ), envs.SGLANG_SPEC_OOB_DETECTION.override(True):
             cls.process = popen_launch_server(
                 cls.model,
                 cls.base_url,
diff --git a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention_large.py b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention_large.py
index c875e995c167..c64acb19cddd 100644
--- a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention_large.py
+++ b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention_large.py
@@ -73,13 +73,9 @@ def setUpClass(cls):
             "--model-loader-extra-config",
             '{"enable_multithread_load": true,"num_threads": 64}',
         ]
-        with envs.SGLANG_ENABLE_SPEC_V2.override(
+        with envs.SGLANG_SPEC_NAN_DETECTION.override(
             True
-        ), envs.SGLANG_SPEC_NAN_DETECTION.override(
-            True
-        ), envs.SGLANG_SPEC_OOB_DETECTION.override(
-            True
-        ):
+        ), envs.SGLANG_SPEC_OOB_DETECTION.override(True):
             cls.process = popen_launch_server(
                 cls.model,
                 cls.base_url,
diff --git a/test/registered/spec/test_standalone_speculative_decoding.py b/test/registered/spec/test_standalone_speculative_decoding.py
index 6510210dfde2..240d374f64a8 100644
--- a/test/registered/spec/test_standalone_speculative_decoding.py
+++ b/test/registered/spec/test_standalone_speculative_decoding.py
@@ -1,4 +1,3 @@
-import os
 import unittest
 from types import SimpleNamespace
 
@@ -80,6 +79,7 @@ def setUpClass(cls):
         # please don't do this if you want to make your inference workload faster
         envs.SGLANG_JIT_DEEPGEMM_PRECOMPILE.set(False)
         envs.SGLANG_ENABLE_JIT_DEEPGEMM.set(False)
+        envs.SGLANG_ENABLE_SPEC_V2.set(False)
         model = cls.model
         cls.process = popen_launch_server(
             model,
@@ -91,6 +91,7 @@ def setUpClass(cls):
     @classmethod
     def tearDownClass(cls):
         kill_process_tree(cls.process.pid)
+        envs.SGLANG_ENABLE_SPEC_V2.clear()
 
     def test_gsm8k(self):
         requests.get(self.base_url + "/flush_cache")
@@ -140,7 +141,6 @@ def setUpClass(cls):
         # please don't do this if you want to make your inference workload faster
         envs.SGLANG_JIT_DEEPGEMM_PRECOMPILE.set(False)
         envs.SGLANG_ENABLE_JIT_DEEPGEMM.set(False)
-        envs.SGLANG_ENABLE_SPEC_V2.set(True)  # Enable Speculative Decoding V2
         model = cls.model
         cls.process = popen_launch_server(
             model,
@@ -152,8 +152,6 @@ def setUpClass(cls):
     @classmethod
     def tearDownClass(cls):
         kill_process_tree(cls.process.pid)
-        if "SGLANG_ENABLE_SPEC_V2" in os.environ:
-            envs.SGLANG_ENABLE_SPEC_V2.set(False)
 
     def test_gsm8k(self):
         requests.get(self.base_url + "/flush_cache")