Merge pull request #217 from ooooo-create/fix_topk

wanghuancoder · web-flow · commit 12d200da38e7 · 2025-06-05T14:25:40.000+08:00
[Accuracy diff No.70] Fix accuracy diff for topk API
diff --git a/tester/api_config/2_paddle_only_random/random_calculation.txt b/tester/api_config/2_paddle_only_random/random_calculation.txt
@@ -25604,3 +25604,4 @@ paddle.put_along_axis(Tensor([6, 8000],"float32"), Tensor([6, 799],"int64"), Ten
 paddle.put_along_axis(Tensor([7, 8000],"float32"), Tensor([7, 799],"int64"), Tensor([7, 799],"float32"), 1, )
 paddle.put_along_axis(Tensor([8, 8000],"float32"), Tensor([8, 799],"int64"), Tensor([8, 799],"float32"), 1, )
 paddle.put_along_axis(Tensor([9, 8000],"float32"), Tensor([9, 799],"int64"), Tensor([9, 799],"float32"), 1, )
+paddle.topk(Tensor([128, 1000],"float16"), k=5, )
diff --git a/tester/api_config/5_accuracy/accuracy_gpu_error.txt b/tester/api_config/5_accuracy/accuracy_gpu_error.txt
@@ -20095,7 +20095,6 @@ paddle.topk(Tensor([1, 93, 37044],"float32"), 13, axis=-1, largest=True, )
 paddle.topk(Tensor([1, 96, 52500],"float32"), 13, axis=-1, largest=True, )
 paddle.topk(Tensor([1, 99, 27216],"float32"), 13, axis=-1, largest=True, )
 paddle.topk(Tensor([1022, 14],"float32"), 10, axis=0, )
-paddle.topk(Tensor([128, 1000],"float16"), k=5, )
 paddle.topk(Tensor([12906, 215],"float32"), 10, axis=0, )
 paddle.topk(Tensor([1302, 7],"float32"), 10, axis=0, )
 paddle.topk(Tensor([14, 400],"float16"), k=5, )
diff --git a/tester/api_config/config_analyzer.py b/tester/api_config/config_analyzer.py
@@ -32,6 +32,39 @@
     "paddle.mod",
 ]
 
+def generate_unique_array(num_items, float_dtype):
+    def get_integer_dtype(float_dtype):
+        float_dtype = numpy.dtype(float_dtype)
+        if float_dtype == numpy.float16:
+            return numpy.uint16, 16
+        elif float_dtype == numpy.float32:
+            return numpy.uint32, 32
+        elif float_dtype == numpy.float64:
+            return numpy.uint64, 64
+        else:
+            raise ValueError(f"Unsupported float dtype: {float_dtype}")
+    integer_dtype, bits = get_integer_dtype(float_dtype)
+    max_int = (1 << bits) - 1
+    current_start_value = 1
+    return_list  = []
+    attemp_count = 0
+    while len(return_list) < num_items and attemp_count < 3:
+        nums_to_generate = int(num_items * 1.5)
+        if current_start_value >= max_int:
+            raise ValueError(f"Cannot generate {num_items} unique items of type {float_dtype} within the range.")
+        end_value = min(current_start_value + nums_to_generate, max_int)
+        random_arr = numpy.arange(current_start_value, end_value, dtype=integer_dtype)
+        float_arr = random_arr.view(float_dtype)
+        if return_list is None:
+            return_list = float_arr[numpy.isfinite(float_arr)]
+        else:
+            return_list = numpy.unique(numpy.concatenate([return_list, float_arr[numpy.isfinite(float_arr)]])) 
+        current_start_value = end_value
+        attemp_count += 1
+    if len(return_list) < num_items:
+        raise ValueError(f"Could not generate {num_items} unique items of type {float_dtype}")
+    return return_list[:num_items]
+
 class TensorConfig:
     def __init__(self, shape, dtype, place=None):
         self.shape = shape
@@ -1638,8 +1671,21 @@ def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
                 if self.check_arg(api_config, 1, "repeat_times"):
                     self.numpy_tensor = numpy.random.randint(1, 128, size=self.shape).astype(self.dtype)
 
-            elif api_config.api_name == "paddle.topk":
-                if self.check_arg(api_config, 1, "k"):
+            elif api_config.api_name in {"paddle.topk", "paddle.Tensor.topk"}:
+                if self.check_arg(api_config, 0, "x"):
+                    x_numel = self.numel()
+                    if self.dtype in {"bfloat16", "float32", "float64"}:
+                        dtype = "float32" if self.dtype == "bfloat16" else self.dtype
+                        self.numpy_tensor = numpy.linspace(-x_numel, x_numel, x_numel, dtype=dtype).reshape(self.shape)
+                        if numpy.unique(self.numpy_tensor).size < x_numel:
+                            self.numpy_tensor = generate_unique_array(x_numel, dtype).reshape(self.shape)
+                    elif self.dtype == "float16":
+                        self.numpy_tensor = generate_unique_array(x_numel, self.dtype).reshape(self.shape)
+                    elif self.dtype in {"int32", "int64"}:
+                        self.numpy_tensor = numpy.random.choice(numpy.arange(-x_numel, x_numel), size=self.shape, replace=False).astype(self.dtype)
+                    else:
+                        raise ValueError(f"Unsupported dtype {self.dtype} for paddle.topk / paddle.Tensor.topk")
+                elif self.check_arg(api_config, 1, "k"):
                     x_config = self.get_arg(api_config, 0, "x")
                     max_k_value = 1
                     if isinstance(x_config, TensorConfig) and x_config.shape:
@@ -1762,7 +1808,7 @@ def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
                 else:
                     # self.check_arg(api_config, 1, "other"): 
                     self.numpy_tensor = self.get_random_numpy_tensor(self.shape, self.dtype, min=-10, max=10)
-                
+
             if self.numpy_tensor is None:
                 if USE_CACHED_NUMPY and self.dtype not in ["int64", "float64"]:
                     dtype = "float32" if self.dtype == "bfloat16" else self.dtype