Add starcoder 2-7b

fredricz-20070104 · crazydemo · commit e719a1b23253 · 2025-08-18T13:25:21.000+08:00
Signed-off-by: FredricZ-2007 &lt;226039983+fredricz-20070104@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml
@@ -45,6 +45,10 @@ microsoft/Phi-3.5-mini-instruct:
   - accuracy: 31.354
 microsoft/Phi-4-mini-instruct:
   - accuracy: 32.921
+bigcode/starcoder2-7b:
+  - accuracy: 26.611
+  - quant_algo: FP8
+    accuracy: 26.611
 mistralai/Codestral-22B-v0.1:
   - accuracy: 30.316
   - quant_algo: FP8
diff --git a/tests/integration/defs/accuracy/references/mmlu.yaml b/tests/integration/defs/accuracy/references/mmlu.yaml
@@ -229,6 +229,10 @@ nvidia/Nemotron-H-56B-Base-8K:
     accuracy: 83.82
 microsoft/Phi-4-mini-instruct:
   - accuracy: 68.98
+bigcode/starcoder2-7b:
+  - accuracy: 41.35
+  - quant_algo: FP8
+    accuracy: 41.35
 mistralai/Codestral-22B-v0.1:
   - accuracy: 61.72
   - quant_algo: FP8
diff --git a/tests/integration/defs/accuracy/test_llm_api.py b/tests/integration/defs/accuracy/test_llm_api.py
@@ -435,6 +435,32 @@ def test_auto_dtype(self):
             task.evaluate(llm)
 
 
+class TestStarCoder2_7B(LlmapiAccuracyTestHarness):
+    MODEL_NAME = "bigcode/starcoder2-7b"
+    MODEL_PATH = f"{llm_models_root()}/starcoder2-7b"
+    kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
+
+    @pytest.mark.skip_less_device_memory(70000)
+    def test_auto_dtype(self):
+        with LLM(self.MODEL_PATH, kv_cache_config=self.kv_cache_config) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
+
+    @skip_pre_ada
+    @pytest.mark.skip_less_device_memory(70000)
+    def test_fp8(self):
+        quant_config = QuantConfig(QuantAlgo.FP8)
+        with LLM(self.MODEL_PATH,
+                 quant_config=quant_config,
+                 kv_cache_config=self.kv_cache_config) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
+
+
 class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
     MODEL_NAME = "mistralai/Codestral-22B-v0.1"
     MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -2426,16 +2426,6 @@ def test_auto_dtype(self):
             task.evaluate(llm)
 
 
-class TestStarCoder2_7B(LlmapiAccuracyTestHarness):
-    MODEL_NAME = "bigcode/starcoder2-7b"
-    MODEL_PATH = f"{llm_models_root()}/starcoder2-7b"
-
-    def test_auto_dtype(self):
-        with LLM(self.MODEL_PATH) as llm:
-            task = MMLU(self.MODEL_NAME)
-            task.evaluate(llm)
-
-
 class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
     MODEL_NAME = "mistralai/Codestral-22B-v0.1"
     MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
diff --git a/tests/integration/test_lists/qa/llm_function_nim.txt b/tests/integration/test_lists/qa/llm_function_nim.txt
@@ -21,3 +21,8 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True]
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
+accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype
+accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8
+accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype
+accuracy/test_llm_api.py::TestCodestral_22B_V01::test_fp8
+accuracy/test_llm_api_pytorch.py::TestCodestral_22B_V01::test_auto_dtype