Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions numba_cuda/numba/cuda/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Iterable, Union
from io import StringIO
import unittest
import numpy as np

if PYVERSION >= (3, 10):
from filecheck.matcher import Matcher
Expand Down Expand Up @@ -44,6 +45,8 @@ class CUDATestCase(TestCase):
matches FileCheck checks, and is not specific to CUDADispatcher.
"""

FLOAT16_RTOL = np.finfo(np.float16).eps

def setUp(self):
self._low_occupancy_warnings = config.CUDA_LOW_OCCUPANCY_WARNINGS
self._warn_on_implicit_copy = config.CUDA_WARN_ON_IMPLICIT_COPY
Expand Down
44 changes: 23 additions & 21 deletions numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ def test_hadd(self):
arg1 = np.array([3.0], dtype=np.float16)
arg2 = np.array([4.0], dtype=np.float16)
compiled[1, 1](ary, arg1, arg2)
np.testing.assert_allclose(ary[0], arg1 + arg2)
np.testing.assert_allclose(ary[0], arg1 + arg2, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hadd_scalar(self):
Expand All @@ -639,7 +639,7 @@ def test_hadd_scalar(self):
arg2 = np.float16(3.0)
compiled[1, 1](ary, arg1, arg2)
ref = arg1 + arg2
np.testing.assert_allclose(ary[0], ref)
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)

@skip_on_cudasim("Compilation unsupported in the simulator")
@skip_if_nvjitlink_missing("Numbast generated bindings")
Expand All @@ -657,7 +657,9 @@ def test_hfma(self):
arg2 = np.array([3.0], dtype=np.float16)
arg3 = np.array([4.0], dtype=np.float16)
compiled[1, 1](ary, arg1, arg2, arg3)
np.testing.assert_allclose(ary[0], arg1 * arg2 + arg3)
np.testing.assert_allclose(
ary[0], arg1 * arg2 + arg3, rtol=self.FLOAT16_RTOL
)

@skip_unless_cc_53
def test_hfma_scalar(self):
Expand All @@ -668,7 +670,7 @@ def test_hfma_scalar(self):
arg3 = np.float16(4.0)
compiled[1, 1](ary, arg1, arg2, arg3)
ref = arg1 * arg2 + arg3
np.testing.assert_allclose(ary[0], ref)
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)

@skip_on_cudasim("Compilation unsupported in the simulator")
@skip_if_nvjitlink_missing("Numbast generated bindings")
Expand All @@ -687,7 +689,7 @@ def test_hsub(self):
arg1 = np.array([3.0], dtype=np.float16)
arg2 = np.array([4.0], dtype=np.float16)
compiled[1, 1](ary, arg1, arg2)
np.testing.assert_allclose(ary[0], arg1 - arg2)
np.testing.assert_allclose(ary[0], arg1 - arg2, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hsub_scalar(self):
Expand All @@ -697,7 +699,7 @@ def test_hsub_scalar(self):
arg2 = np.float16(1.57)
compiled[1, 1](ary, arg1, arg2)
ref = arg1 - arg2
np.testing.assert_allclose(ary[0], ref)
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)

@skip_on_cudasim("Compilation unsupported in the simulator")
@skip_if_nvjitlink_missing("Numbast generated bindings")
Expand All @@ -714,7 +716,7 @@ def test_hmul(self):
arg1 = np.array([3.0], dtype=np.float16)
arg2 = np.array([4.0], dtype=np.float16)
compiled[1, 1](ary, arg1, arg2)
np.testing.assert_allclose(ary[0], arg1 * arg2)
np.testing.assert_allclose(ary[0], arg1 * arg2, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hmul_scalar(self):
Expand All @@ -724,7 +726,7 @@ def test_hmul_scalar(self):
arg2 = np.float16(1.57)
compiled[1, 1](ary, arg1, arg2)
ref = arg1 * arg2
np.testing.assert_allclose(ary[0], ref)
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)

@skip_on_cudasim("Compilation unsupported in the simulator")
@skip_if_nvjitlink_missing("Numbast generated bindings")
Expand All @@ -743,7 +745,7 @@ def test_hdiv_scalar(self):

compiled[1, 1](ary, arg1, arg2)
ref = arg1 / arg2
np.testing.assert_allclose(ary[0], ref)
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hdiv(self):
Expand All @@ -754,15 +756,15 @@ def test_hdiv(self):

compiled.forall(ary.size)(ary, arry1, arry2)
ref = arry1 / arry2
np.testing.assert_allclose(ary, ref)
np.testing.assert_allclose(ary, ref, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hneg(self):
compiled = cuda.jit("void(f2[:], f2[:])")(simple_hneg)
ary = np.zeros(1, dtype=np.float16)
arg1 = np.array([3.0], dtype=np.float16)
compiled[1, 1](ary, arg1)
np.testing.assert_allclose(ary[0], -arg1)
np.testing.assert_allclose(ary[0], -arg1, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hneg_scalar(self):
Expand All @@ -771,7 +773,7 @@ def test_hneg_scalar(self):
arg1 = np.float16(3.1415926)
compiled[1, 1](ary, arg1)
ref = -arg1
np.testing.assert_allclose(ary[0], ref)
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)

@skip_on_cudasim("Compilation unsupported in the simulator")
@skip_if_nvjitlink_missing("Numbast generated bindings")
Expand All @@ -787,7 +789,7 @@ def test_habs(self):
ary = np.zeros(1, dtype=np.float16)
arg1 = np.array([-3.0], dtype=np.float16)
compiled[1, 1](ary, arg1)
np.testing.assert_allclose(ary[0], abs(arg1))
np.testing.assert_allclose(ary[0], abs(arg1), rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_habs_scalar(self):
Expand All @@ -796,7 +798,7 @@ def test_habs_scalar(self):
arg1 = np.float16(-3.1415926)
compiled[1, 1](ary, arg1)
ref = abs(arg1)
np.testing.assert_allclose(ary[0], ref)
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)

@skip_on_cudasim("Compilation unsupported in the simulator")
@skip_if_nvjitlink_missing("Numbast generated bindings")
Expand Down Expand Up @@ -849,15 +851,15 @@ def test_fp16_intrinsics_common(self):
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
kernel[1, N](r, x)
expected = fn(x, dtype=np.float16)
np.testing.assert_allclose(r, expected)
np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)

x2 = np.random.randint(1, 10, size=N).astype(np.float16)
for kernel, fn in zip(exp_kernels, expected_exp_functions):
with self.subTest(fn=fn):
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
kernel[1, N](r, x2)
expected = fn(x2, dtype=np.float16)
np.testing.assert_allclose(r, expected)
np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hexp10(self):
Expand All @@ -876,7 +878,7 @@ def hexp10_vectors(r, x):

# Run the kernel
hexp10_vectors[1, N](r, x)
np.testing.assert_allclose(r, 10**x)
np.testing.assert_allclose(r, 10**x, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_fp16_comparison(self):
Expand Down Expand Up @@ -948,10 +950,10 @@ def test_hmax(self):
arg1 = np.float16(3.0)
arg2 = np.float16(4.0)
compiled[1, 1](ary, arg1, arg2)
np.testing.assert_allclose(ary[0], arg2)
np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
arg1 = np.float16(5.0)
compiled[1, 1](ary, arg1, arg2)
np.testing.assert_allclose(ary[0], arg1)
np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)

@skip_unless_cc_53
def test_hmin(self):
Expand All @@ -960,10 +962,10 @@ def test_hmin(self):
arg1 = np.float16(3.0)
arg2 = np.float16(4.0)
compiled[1, 1](ary, arg1, arg2)
np.testing.assert_allclose(ary[0], arg1)
np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
arg1 = np.float16(5.0)
compiled[1, 1](ary, arg1, arg2)
np.testing.assert_allclose(ary[0], arg2)
np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)

def test_cbrt_f32(self):
compiled = cuda.jit("void(float32[:], float32)")(simple_cbrt)
Expand Down