Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions tester/api_config/config_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,39 @@
"paddle.mod",
]

def generate_unique_array(num_items, float_dtype):
def get_integer_dtype(float_dtype):
float_dtype = numpy.dtype(float_dtype)
if float_dtype == numpy.float16:
return numpy.uint16, 16
elif float_dtype == numpy.float32:
return numpy.uint32, 32
elif float_dtype == numpy.float64:
return numpy.uint64, 64
else:
raise ValueError(f"Unsupported float dtype: {float_dtype}")
integer_dtype, bits = get_integer_dtype(float_dtype)
max_int = (1 << bits) - 1
current_start_value = 1
return_list = []
attemp_count = 0
while len(return_list) < num_items and attemp_count < 3:
nums_to_generate = int(num_items * 1.5)
if current_start_value >= max_int:
raise ValueError(f"Cannot generate {num_items} unique items of type {float_dtype} within the range.")
end_value = min(current_start_value + nums_to_generate, max_int)
random_arr = numpy.arange(current_start_value, end_value, dtype=integer_dtype)
float_arr = random_arr.view(float_dtype)
if return_list is None:
return_list = float_arr[numpy.isfinite(float_arr)]
else:
return_list = numpy.unique(numpy.concatenate([return_list, float_arr[numpy.isfinite(float_arr)]]))
current_start_value = end_value
attemp_count += 1
if len(return_list) < num_items:
raise ValueError(f"Could not generate {num_items} unique items of type {float_dtype}")
return return_list[:num_items]

class TensorConfig:
def __init__(self, shape, dtype, place=None):
self.shape = shape
Expand Down Expand Up @@ -1636,8 +1669,21 @@ def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
if self.check_arg(api_config, 1, "repeat_times"):
self.numpy_tensor = numpy.random.randint(1, 128, size=self.shape).astype(self.dtype)

elif api_config.api_name == "paddle.topk":
if self.check_arg(api_config, 1, "k"):
elif api_config.api_name in {"paddle.topk", "paddle.Tensor.topk"}:
if self.check_arg(api_config, 0, "x"):
x_numel = self.numel()
if self.dtype in {"bfloat16", "float32", "float64"}:
dtype = "float32" if self.dtype == "bfloat16" else self.dtype
self.numpy_tensor = numpy.linspace(-x_numel, x_numel, x_numel, dtype=dtype).reshape(self.shape)
if numpy.unique(self.numpy_tensor).size < x_numel:
self.numpy_tensor = generate_unique_array(x_numel, dtype).reshape(self.shape)
elif self.dtype == "float16":
self.numpy_tensor = generate_unique_array(x_numel, self.dtype).reshape(self.shape)
Comment on lines +1677 to +1683
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

非 float 16 经过 numpy.linspace 也会产生相同的元素吗(舍入错误),考虑将范围放在 dtype 的最大范围内再生成呢

numpy.linspace(numpy.finfo(dtype).min, numpy.finfo(dtype).max, x_numel, dtype=dtype).reshape(self.shape)

float16 也试试。看了下有一些 float16 配置 numel 很大,但是 float16 表示范围很有限。

Copy link
Collaborator

@Cutelemon6 Cutelemon6 Jun 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

upd:float16 最多表示 63488 个有限值,超出这个元素数量的 tensor 具有随机输出。

同学可以了解一下 float16 的表示范围,元素数量1亿以内的的 float16 tensor 理论上都能通过,没有随机性。

print(f"float16 max: {numpy.finfo(numpy.float16).max}, float16 min: {numpy.finfo(numpy.float16).min}, float16 eps: {numpy.finfo(numpy.float16).eps}")
print(f"max tensor numel est: {(numpy.finfo(numpy.float16).max.astype(numpy.float64) - numpy.finfo(numpy.float16).min.astype(numpy.float64)) / numpy.finfo(numpy.float16).eps}")

output

float16 max: 65504.0, float16 min: -65504.0, float16 eps: 0.0009765625
max tensor numel est: 134152192.0

elif self.dtype in {"int32", "int64"}:
self.numpy_tensor = numpy.random.choice(numpy.arange(-x_numel, x_numel), size=self.shape, replace=False).astype(self.dtype)
else:
raise ValueError(f"Unsupported dtype {self.dtype} for paddle.topk")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

f"Unsupported dtype {self.dtype} for paddle.topk / paddle.Tensor.topk"

elif self.check_arg(api_config, 1, "k"):
x_config = self.get_arg(api_config, 0, "x")
max_k_value = 1
if isinstance(x_config, TensorConfig) and x_config.shape:
Expand Down Expand Up @@ -1760,7 +1806,7 @@ def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
else:
# self.check_arg(api_config, 1, "other"):
self.numpy_tensor = self.get_random_numpy_tensor(self.shape, self.dtype, min=-10, max=10)

if self.numpy_tensor is None:
if USE_CACHED_NUMPY:
dtype = "float32" if self.dtype == "bfloat16" else self.dtype
Expand Down