-
Notifications
You must be signed in to change notification settings - Fork 39
[Accuracy diff No.70] Fix accuracy diff for topk API #217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
3940ca9
e5dd429
eadc457
5ade436
ca21f30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,39 @@ | |
| "paddle.mod", | ||
| ] | ||
|
|
||
| def generate_unique_array(num_items, float_dtype): | ||
| def get_integer_dtype(float_dtype): | ||
| float_dtype = numpy.dtype(float_dtype) | ||
| if float_dtype == numpy.float16: | ||
| return numpy.uint16, 16 | ||
| elif float_dtype == numpy.float32: | ||
| return numpy.uint32, 32 | ||
| elif float_dtype == numpy.float64: | ||
| return numpy.uint64, 64 | ||
| else: | ||
| raise ValueError(f"Unsupported float dtype: {float_dtype}") | ||
| integer_dtype, bits = get_integer_dtype(float_dtype) | ||
| max_int = (1 << bits) - 1 | ||
| current_start_value = 1 | ||
| return_list = [] | ||
| attemp_count = 0 | ||
| while len(return_list) < num_items and attemp_count < 3: | ||
| nums_to_generate = int(num_items * 1.5) | ||
| if current_start_value >= max_int: | ||
| raise ValueError(f"Cannot generate {num_items} unique items of type {float_dtype} within the range.") | ||
| end_value = min(current_start_value + nums_to_generate, max_int) | ||
| random_arr = numpy.arange(current_start_value, end_value, dtype=integer_dtype) | ||
| float_arr = random_arr.view(float_dtype) | ||
| if return_list is None: | ||
| return_list = float_arr[numpy.isfinite(float_arr)] | ||
| else: | ||
| return_list = numpy.unique(numpy.concatenate([return_list, float_arr[numpy.isfinite(float_arr)]])) | ||
| current_start_value = end_value | ||
| attemp_count += 1 | ||
| if len(return_list) < num_items: | ||
| raise ValueError(f"Could not generate {num_items} unique items of type {float_dtype}") | ||
| return return_list[:num_items] | ||
|
|
||
| class TensorConfig: | ||
| def __init__(self, shape, dtype, place=None): | ||
| self.shape = shape | ||
|
|
@@ -1636,8 +1669,21 @@ def get_padding_offset(bsz, max_seq_len, seq_lens_this_time): | |
| if self.check_arg(api_config, 1, "repeat_times"): | ||
| self.numpy_tensor = numpy.random.randint(1, 128, size=self.shape).astype(self.dtype) | ||
|
|
||
| elif api_config.api_name == "paddle.topk": | ||
| if self.check_arg(api_config, 1, "k"): | ||
| elif api_config.api_name in {"paddle.topk", "paddle.Tensor.topk"}: | ||
| if self.check_arg(api_config, 0, "x"): | ||
| x_numel = self.numel() | ||
| if self.dtype in {"bfloat16", "float32", "float64"}: | ||
| dtype = "float32" if self.dtype == "bfloat16" else self.dtype | ||
| self.numpy_tensor = numpy.linspace(-x_numel, x_numel, x_numel, dtype=dtype).reshape(self.shape) | ||
| if numpy.unique(self.numpy_tensor).size < x_numel: | ||
| self.numpy_tensor = generate_unique_array(x_numel, dtype).reshape(self.shape) | ||
| elif self.dtype == "float16": | ||
| self.numpy_tensor = generate_unique_array(x_numel, self.dtype).reshape(self.shape) | ||
|
Comment on lines
+1677
to
+1683
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 非 float 16 经过 numpy.linspace 也会产生相同的元素吗(舍入错误),考虑将范围放在 dtype 的最大范围内再生成呢 numpy.linspace(numpy.finfo(dtype).min, numpy.finfo(dtype).max, x_numel, dtype=dtype).reshape(self.shape)float16 也试试。看了下有一些 float16 配置 numel 很大,但是 float16 表示范围很有限。
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. upd:float16 最多表示 63488 个有限值,超出这个元素数量的 tensor 具有随机输出。
print(f"float16 max: {numpy.finfo(numpy.float16).max}, float16 min: {numpy.finfo(numpy.float16).min}, float16 eps: {numpy.finfo(numpy.float16).eps}")
print(f"max tensor numel est: {(numpy.finfo(numpy.float16).max.astype(numpy.float64) - numpy.finfo(numpy.float16).min.astype(numpy.float64)) / numpy.finfo(numpy.float16).eps}")output float16 max: 65504.0, float16 min: -65504.0, float16 eps: 0.0009765625
max tensor numel est: 134152192.0 |
||
| elif self.dtype in {"int32", "int64"}: | ||
| self.numpy_tensor = numpy.random.choice(numpy.arange(-x_numel, x_numel), size=self.shape, replace=False).astype(self.dtype) | ||
| else: | ||
| raise ValueError(f"Unsupported dtype {self.dtype} for paddle.topk") | ||
|
||
| elif self.check_arg(api_config, 1, "k"): | ||
| x_config = self.get_arg(api_config, 0, "x") | ||
| max_k_value = 1 | ||
| if isinstance(x_config, TensorConfig) and x_config.shape: | ||
|
|
@@ -1760,7 +1806,7 @@ def get_padding_offset(bsz, max_seq_len, seq_lens_this_time): | |
| else: | ||
| # self.check_arg(api_config, 1, "other"): | ||
| self.numpy_tensor = self.get_random_numpy_tensor(self.shape, self.dtype, min=-10, max=10) | ||
|
|
||
| if self.numpy_tensor is None: | ||
| if USE_CACHED_NUMPY: | ||
| dtype = "float32" if self.dtype == "bfloat16" else self.dtype | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.