diff --git a/numba_cuda/numba/cuda/np/npyfuncs.py b/numba_cuda/numba/cuda/np/npyfuncs.py
new file mode 100644
index 000000000..78cf49f0f
--- /dev/null
+++ b/numba_cuda/numba/cuda/np/npyfuncs.py
@@ -0,0 +1,1809 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+"""Codegen for functions used as kernels in NumPy functions
+
+Typically, the kernels of several ufuncs that can't map directly to
+Python builtins
+"""
+
+import math
+
+import llvmlite.ir
+import numpy as np
+
+from numba.core.extending import overload
+from numba.core.imputils import impl_ret_untracked
+from numba.core import typing, types, errors, cgutils, config
+from numba.core.extending import register_jitable
+from numba.np import npdatetime
+from numba.np.math import cmathimpl, mathimpl, numbers
+from numba.np.numpy_support import numpy_version
+
+# some NumPy constants. Note that we could generate some of them using
+# the math library, but having the values copied from npy_math seems to
+# yield more accurate results
+_NPY_LOG2E = 1.442695040888963407359924681001892137  # math.log(math.e, 2)
+_NPY_LOG10E = 0.434294481903251827651128918916605082  # math.log(math.e, 10)
+_NPY_LOGE2 = 0.693147180559945309417232121458176568  # math.log(2)
+
+
+def _check_arity_and_homogeneity(sig, args, arity, return_type=None):
+    """checks that the following are true:
+    - args and sig.args have arg_count elements
+    - all input types are homogeneous
+    - return type is 'return_type' if provided, otherwise it must be
+      homogeneous with the input types.
+    """
+    assert len(args) == arity
+    assert len(sig.args) == arity
+    ty = sig.args[0]
+    if return_type is None:
+        return_type = ty
+    # must have homogeneous args
+    if not (
+        all(arg == ty for arg in sig.args) and sig.return_type == return_type
+    ):
+        import inspect
+
+        fname = inspect.currentframe().f_back.f_code.co_name
+        msg = "{0} called with invalid types: {1}".format(fname, sig)
+        assert False, msg
+
+
+if getattr(config, "USE_LEGACY_TYPE_SYSTEM", True):
+    cast_arg_ty = types.float64
+else:
+    cast_arg_ty = types.np_float64
+
+
+def _call_func_by_name_with_cast(
+    context, builder, sig, args, func_name, ty=cast_arg_ty
+):
+    # it is quite common in NumPy to have loops implemented as a call
+    # to the double version of the function, wrapped in casts. This
+    # helper function facilitates that.
+    mod = builder.module
+    lty = context.get_argument_type(ty)
+    fnty = llvmlite.ir.FunctionType(lty, [lty] * len(sig.args))
+    fn = cgutils.insert_pure_function(mod, fnty, name=func_name)
+    cast_args = [
+        context.cast(builder, arg, argty, ty)
+        for arg, argty in zip(args, sig.args)
+    ]
+
+    result = builder.call(fn, cast_args)
+    return context.cast(builder, result, types.float64, sig.return_type)
+
+
+def _dispatch_func_by_name_type(context, builder, sig, args, table, user_name):
+    # for most cases the functions are homogeneous on all their types.
+    # this code dispatches on the first argument type as it is the most useful
+    # for our uses (all cases but ldexp are homogeneous in all types, and
+    # dispatching on the first argument type works of ldexp as well)
+    #
+    # assumes that the function pointed by func_name has the type
+    # signature sig (but needs translation to llvm types).
+
+    ty = sig.args[0]
+    try:
+        func_name = table[ty]
+    except KeyError as e:
+        msg = "No {0} function for real type {1}".format(user_name, str(e))
+        raise errors.LoweringError(msg)
+
+    mod = builder.module
+    if ty in types.complex_domain:
+        # In numba struct types are always passed by pointer. So the call has to
+        # be transformed from "result = func(ops...)" to "func(&result, ops...).
+        # note that the result value pointer as first argument is the convention
+        # used by numba.
+
+        # First, prepare the return value
+        out = context.make_complex(builder, ty)
+        ptrargs = [cgutils.alloca_once_value(builder, arg) for arg in args]
+        call_args = [out._getpointer()] + ptrargs
+        # get_value_as_argument for struct types like complex allocate stack space
+        # and initialize with the value, the return value is the pointer to that
+        # allocated space (ie: pointer to a copy of the value in the stack).
+        # get_argument_type returns a pointer to the struct type in consonance.
+        call_argtys = [ty] + list(sig.args)
+        call_argltys = [
+            context.get_value_type(ty).as_pointer() for ty in call_argtys
+        ]
+        fnty = llvmlite.ir.FunctionType(llvmlite.ir.VoidType(), call_argltys)
+        # Note: the function isn't pure here (it writes to its pointer args)
+        fn = cgutils.get_or_insert_function(mod, fnty, func_name)
+        builder.call(fn, call_args)
+        retval = builder.load(call_args[0])
+    else:
+        argtypes = [context.get_argument_type(aty) for aty in sig.args]
+        restype = context.get_argument_type(sig.return_type)
+        fnty = llvmlite.ir.FunctionType(restype, argtypes)
+        fn = cgutils.insert_pure_function(mod, fnty, name=func_name)
+        retval = context.call_external_function(builder, fn, sig.args, args)
+    return retval
+
+
+########################################################################
+# Division kernels inspired by NumPy loops.c.src code
+#
+# The builtins are not applicable as they rely on a test for zero in the
+# denominator. If it is zero the appropriate exception is raised.
+# In NumPy, a division by zero does not raise an exception, but instead
+# generated a known value. Note that a division by zero in any of the
+# operations of a vector may raise an exception or issue a warning
+# depending on the np.seterr configuration. This is not supported
+# right now (and in any case, it won't be handled by these functions
+# either)
+
+
+def np_int_sdiv_impl(context, builder, sig, args):
+    # based on the actual code in NumPy loops.c.src for signed integer types
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    num, den = args
+    ty = sig.args[0]  # any arg type will do, homogeneous
+
+    ZERO = context.get_constant(ty, 0)
+    MINUS_ONE = context.get_constant(ty, -1)
+    MIN_INT = context.get_constant(ty, 1 << (den.type.width - 1))
+    den_is_zero = builder.icmp_unsigned("==", ZERO, den)
+    den_is_minus_one = builder.icmp_unsigned("==", MINUS_ONE, den)
+    num_is_min_int = builder.icmp_unsigned("==", MIN_INT, num)
+    could_cause_sigfpe = builder.and_(den_is_minus_one, num_is_min_int)
+    force_zero = builder.or_(den_is_zero, could_cause_sigfpe)
+    with builder.if_else(force_zero, likely=False) as (then, otherwise):
+        with then:
+            bb_then = builder.basic_block
+        with otherwise:
+            bb_otherwise = builder.basic_block
+            div = builder.sdiv(num, den)
+            mod = builder.srem(num, den)
+            num_gt_zero = builder.icmp_signed(">", num, ZERO)
+            den_gt_zero = builder.icmp_signed(">", den, ZERO)
+            not_same_sign = builder.xor(num_gt_zero, den_gt_zero)
+            mod_not_zero = builder.icmp_unsigned("!=", mod, ZERO)
+            needs_fixing = builder.and_(not_same_sign, mod_not_zero)
+            fix_value = builder.select(needs_fixing, MINUS_ONE, ZERO)
+            result_otherwise = builder.add(div, fix_value)
+
+    result = builder.phi(ZERO.type)
+    result.add_incoming(ZERO, bb_then)
+    result.add_incoming(result_otherwise, bb_otherwise)
+
+    return result
+
+
+def np_int_srem_impl(context, builder, sig, args):
+    # based on the actual code in NumPy loops.c.src for signed integers
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    num, den = args
+    ty = sig.args[0]  # any arg type will do, homogeneous
+
+    ZERO = context.get_constant(ty, 0)
+    den_not_zero = builder.icmp_unsigned("!=", ZERO, den)
+    bb_no_if = builder.basic_block
+    with cgutils.if_unlikely(builder, den_not_zero):
+        bb_if = builder.basic_block
+        mod = builder.srem(num, den)
+        num_gt_zero = builder.icmp_signed(">", num, ZERO)
+        den_gt_zero = builder.icmp_signed(">", den, ZERO)
+        not_same_sign = builder.xor(num_gt_zero, den_gt_zero)
+        mod_not_zero = builder.icmp_unsigned("!=", mod, ZERO)
+        needs_fixing = builder.and_(not_same_sign, mod_not_zero)
+        fix_value = builder.select(needs_fixing, den, ZERO)
+        final_mod = builder.add(fix_value, mod)
+
+    result = builder.phi(ZERO.type)
+    result.add_incoming(ZERO, bb_no_if)
+    result.add_incoming(final_mod, bb_if)
+
+    return result
+
+
+def np_int_sdivrem_impl(context, builder, sig, args):
+    div = np_int_sdiv_impl(
+        context, builder, sig.return_type[0](*sig.args), args
+    )
+    rem = np_int_srem_impl(
+        context, builder, sig.return_type[1](*sig.args), args
+    )
+    return context.make_tuple(builder, sig.return_type, [div, rem])
+
+
+def np_int_udiv_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    num, den = args
+    ty = sig.args[0]  # any arg type will do, homogeneous
+
+    ZERO = context.get_constant(ty, 0)
+    div_by_zero = builder.icmp_unsigned("==", ZERO, den)
+    with builder.if_else(div_by_zero, likely=False) as (then, otherwise):
+        with then:
+            # division by zero
+            bb_then = builder.basic_block
+        with otherwise:
+            # divide!
+            div = builder.udiv(num, den)
+            bb_otherwise = builder.basic_block
+
+    result = builder.phi(ZERO.type)
+    result.add_incoming(ZERO, bb_then)
+    result.add_incoming(div, bb_otherwise)
+    return result
+
+
+def np_int_urem_impl(context, builder, sig, args):
+    # based on the actual code in NumPy loops.c.src for signed integers
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    num, den = args
+    ty = sig.args[0]  # any arg type will do, homogeneous
+
+    ZERO = context.get_constant(ty, 0)
+    den_not_zero = builder.icmp_unsigned("!=", ZERO, den)
+    bb_no_if = builder.basic_block
+    with cgutils.if_unlikely(builder, den_not_zero):
+        bb_if = builder.basic_block
+        mod = builder.urem(num, den)
+
+    result = builder.phi(ZERO.type)
+    result.add_incoming(ZERO, bb_no_if)
+    result.add_incoming(mod, bb_if)
+
+    return result
+
+
+def np_int_udivrem_impl(context, builder, sig, args):
+    div = np_int_udiv_impl(
+        context, builder, sig.return_type[0](*sig.args), args
+    )
+    rem = np_int_urem_impl(
+        context, builder, sig.return_type[1](*sig.args), args
+    )
+    return context.make_tuple(builder, sig.return_type, [div, rem])
+
+
+# implementation of int_fmod is in fact the same as the unsigned remainder,
+# that is: srem with a special case returning 0 when the denominator is 0.
+np_int_fmod_impl = np_int_urem_impl
+
+
+def np_real_div_impl(context, builder, sig, args):
+    # in NumPy real div has the same semantics as an fdiv for generating
+    # NANs, INF and NINF
+    _check_arity_and_homogeneity(sig, args, 2)
+    return builder.fdiv(*args)
+
+
+def np_real_mod_impl(context, builder, sig, args):
+    # note: this maps to NumPy remainder, which has the same semantics as Python
+    # based on code in loops.c.src
+    _check_arity_and_homogeneity(sig, args, 2)
+    in1, in2 = args
+    ty = sig.args[0]
+
+    ZERO = context.get_constant(ty, 0.0)
+    res = builder.frem(in1, in2)
+    res_ne_zero = builder.fcmp_ordered("!=", res, ZERO)
+    den_lt_zero = builder.fcmp_ordered("<", in2, ZERO)
+    res_lt_zero = builder.fcmp_ordered("<", res, ZERO)
+    needs_fixing = builder.and_(
+        res_ne_zero, builder.xor(den_lt_zero, res_lt_zero)
+    )
+    fix_value = builder.select(needs_fixing, in2, ZERO)
+
+    return builder.fadd(res, fix_value)
+
+
+def np_real_fmod_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    return builder.frem(*args)
+
+
+def _fabs(context, builder, arg):
+    ZERO = llvmlite.ir.Constant(arg.type, 0.0)
+    arg_negated = builder.fsub(ZERO, arg)
+    arg_is_negative = builder.fcmp_ordered("<", arg, ZERO)
+    return builder.select(arg_is_negative, arg_negated, arg)
+
+
+def np_complex_div_impl(context, builder, sig, args):
+    # Extracted from numpy/core/src/umath/loops.c.src,
+    # inspired by complex_div_impl
+    # variables named coherent with loops.c.src
+    # This is implemented using the approach described in
+    #   R.L. Smith. Algorithm 116: Complex division.
+    #   Communications of the ACM, 5(8):435, 1962
+
+    in1, in2 = [
+        context.make_complex(builder, sig.args[0], value=arg) for arg in args
+    ]
+
+    in1r = in1.real  # numerator.real
+    in1i = in1.imag  # numerator.imag
+    in2r = in2.real  # denominator.real
+    in2i = in2.imag  # denominator.imag
+    ftype = in1r.type
+    assert all([i.type == ftype for i in [in1r, in1i, in2r, in2i]]), (
+        "mismatched types"
+    )
+    out = context.make_helper(builder, sig.return_type)
+
+    ZERO = llvmlite.ir.Constant(ftype, 0.0)
+    ONE = llvmlite.ir.Constant(ftype, 1.0)
+
+    # if abs(denominator.real) >= abs(denominator.imag)
+    in2r_abs = _fabs(context, builder, in2r)
+    in2i_abs = _fabs(context, builder, in2i)
+    in2r_abs_ge_in2i_abs = builder.fcmp_ordered(">=", in2r_abs, in2i_abs)
+    with builder.if_else(in2r_abs_ge_in2i_abs) as (then, otherwise):
+        with then:
+            # if abs(denominator.real) == 0 and abs(denominator.imag) == 0
+            in2r_is_zero = builder.fcmp_ordered("==", in2r_abs, ZERO)
+            in2i_is_zero = builder.fcmp_ordered("==", in2i_abs, ZERO)
+            in2_is_zero = builder.and_(in2r_is_zero, in2i_is_zero)
+            with builder.if_else(in2_is_zero) as (inn_then, inn_otherwise):
+                with inn_then:
+                    # division by 0.
+                    # fdiv generates the appropriate NAN/INF/NINF
+                    out.real = builder.fdiv(in1r, in2r_abs)
+                    out.imag = builder.fdiv(in1i, in2i_abs)
+                with inn_otherwise:
+                    # general case for:
+                    # abs(denominator.real) > abs(denominator.imag)
+                    rat = builder.fdiv(in2i, in2r)
+                    # scl = 1.0/(in2r + in2i*rat)
+                    tmp1 = builder.fmul(in2i, rat)
+                    tmp2 = builder.fadd(in2r, tmp1)
+                    scl = builder.fdiv(ONE, tmp2)
+                    # out.real = (in1r + in1i*rat)*scl
+                    # out.imag = (in1i - in1r*rat)*scl
+                    tmp3 = builder.fmul(in1i, rat)
+                    tmp4 = builder.fmul(in1r, rat)
+                    tmp5 = builder.fadd(in1r, tmp3)
+                    tmp6 = builder.fsub(in1i, tmp4)
+                    out.real = builder.fmul(tmp5, scl)
+                    out.imag = builder.fmul(tmp6, scl)
+        with otherwise:
+            # general case for:
+            # abs(denominator.imag) > abs(denominator.real)
+            rat = builder.fdiv(in2r, in2i)
+            # scl = 1.0/(in2i + in2r*rat)
+            tmp1 = builder.fmul(in2r, rat)
+            tmp2 = builder.fadd(in2i, tmp1)
+            scl = builder.fdiv(ONE, tmp2)
+            # out.real = (in1r*rat + in1i)*scl
+            # out.imag = (in1i*rat - in1r)*scl
+            tmp3 = builder.fmul(in1r, rat)
+            tmp4 = builder.fmul(in1i, rat)
+            tmp5 = builder.fadd(tmp3, in1i)
+            tmp6 = builder.fsub(tmp4, in1r)
+            out.real = builder.fmul(tmp5, scl)
+            out.imag = builder.fmul(tmp6, scl)
+
+    return out._getvalue()
+
+
+########################################################################
+# NumPy logaddexp
+
+
+def _npy_logaddexp(x1, x2):
+    pass
+
+
+def _generate_logaddexp(fnoverload, const, log1pfn, expfn):
+    # Code generation for logaddexp and logaddexp2 is based on:
+    # https://github.com/numpy/numpy/blob/12c2b7dd62fc0c14b81c8892ed5f4f59cc94d09c/numpy/core/src/npymath/npy_math_internal.h.src#L467-L507
+
+    @overload(fnoverload, target="cuda")
+    def ol_npy_logaddexp(x1, x2):
+        if x1 != x2:
+            return
+        shift = x1(const)
+
+        def impl(x1, x2):
+            x, y = x1, x2
+            if x == y:
+                # Handles infinities of the same sign without warnings
+                return x + shift
+            else:
+                tmp = x - y
+                if tmp > 0:
+                    return x + log1pfn(expfn(-tmp))
+                elif tmp <= 0:
+                    return y + log1pfn(expfn(tmp))
+                else:
+                    # NaN
+                    return tmp
+
+        return impl
+
+
+def _npy_logaddexp(x1, x2):
+    pass
+
+
+_generate_logaddexp(_npy_logaddexp, _NPY_LOGE2, np.log1p, np.exp)
+
+
+def np_real_logaddexp_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    fnty = context.typing_context.resolve_value_type(_npy_logaddexp)
+    sig = fnty.get_call_type(context.typing_context, (*sig.args,), {})
+    impl = context.get_function(fnty, sig)
+    return impl(builder, args)
+
+
+########################################################################
+# NumPy logaddexp2
+def _npy_logaddexp2(x1, x2):
+    pass
+
+
+def npy_log2_1p(x):
+    pass
+
+
+# The following npy_log2_1p function is a translation of:
+# https://github.com/numpy/numpy/blob/12c2b7dd62fc0c14b81c8892ed5f4f59cc94d09c/numpy/core/src/npymath/npy_math_internal.h.src#L457-L460
+
+
+@overload(npy_log2_1p, target="cuda")
+def ol_npy_log2_1p(x):
+    LOG2E = x(_NPY_LOG2E)
+
+    def impl(x):
+        return LOG2E * np.log1p(x)
+
+    return impl
+
+
+_generate_logaddexp(_npy_logaddexp2, 1.0, npy_log2_1p, np.exp2)
+
+
+def np_real_logaddexp2_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    fnty = context.typing_context.resolve_value_type(_npy_logaddexp2)
+    sig = fnty.get_call_type(context.typing_context, (*sig.args,), {})
+    impl = context.get_function(fnty, sig)
+    return impl(builder, args)
+
+
+########################################################################
+# true div kernels
+
+
+def np_int_truediv_impl(context, builder, sig, args):
+    # in NumPy we don't check for 0 denominator... fdiv handles div by
+    # 0 in the way NumPy expects..
+    # integer truediv always yields double
+    num, den = args
+    lltype = num.type
+    assert all(i.type == lltype for i in args), "must have homogeneous types"
+    numty, denty = sig.args
+
+    num = context.cast(builder, num, numty, types.float64)
+    den = context.cast(builder, den, denty, types.float64)
+
+    return builder.fdiv(num, den)
+
+
+########################################################################
+# floor div kernels
+
+
+def np_real_floor_div_impl(context, builder, sig, args):
+    res = np_real_div_impl(context, builder, sig, args)
+    s = typing.signature(sig.return_type, sig.return_type)
+    return np_real_floor_impl(context, builder, s, (res,))
+
+
+def np_real_divmod_impl(context, builder, sig, args):
+    div = np_real_floor_div_impl(
+        context, builder, sig.return_type[0](*sig.args), args
+    )
+    rem = np_real_mod_impl(
+        context, builder, sig.return_type[1](*sig.args), args
+    )
+    return context.make_tuple(builder, sig.return_type, [div, rem])
+
+
+def np_complex_floor_div_impl(context, builder, sig, args):
+    # this is based on the complex floor divide in Numpy's loops.c.src
+    # This is basically a full complex division with a complex floor
+    # applied.
+    # The complex floor seems to be defined as the real floor applied
+    # with the real part and zero in the imaginary part. Fully developed
+    # so it avoids computing anything related to the imaginary result.
+    float_kind = sig.args[0].underlying_float
+    floor_sig = typing.signature(float_kind, float_kind)
+
+    in1, in2 = [
+        context.make_complex(builder, sig.args[0], value=arg) for arg in args
+    ]
+
+    in1r = in1.real
+    in1i = in1.imag
+    in2r = in2.real
+    in2i = in2.imag
+    ftype = in1r.type
+    assert all([i.type == ftype for i in [in1r, in1i, in2r, in2i]]), (
+        "mismatched types"
+    )
+
+    ZERO = llvmlite.ir.Constant(ftype, 0.0)
+
+    out = context.make_helper(builder, sig.return_type)
+    out.imag = ZERO
+
+    in2r_abs = _fabs(context, builder, in2r)
+    in2i_abs = _fabs(context, builder, in2i)
+    in2r_abs_ge_in2i_abs = builder.fcmp_ordered(">=", in2r_abs, in2i_abs)
+
+    with builder.if_else(in2r_abs_ge_in2i_abs) as (then, otherwise):
+        with then:
+            rat = builder.fdiv(in2i, in2r)
+            # out.real = floor((in1r+in1i*rat)/(in2r + in2i*rat))
+            tmp1 = builder.fmul(in1i, rat)
+            tmp2 = builder.fmul(in2i, rat)
+            tmp3 = builder.fadd(in1r, tmp1)
+            tmp4 = builder.fadd(in2r, tmp2)
+            tmp5 = builder.fdiv(tmp3, tmp4)
+            out.real = np_real_floor_impl(context, builder, floor_sig, (tmp5,))
+        with otherwise:
+            rat = builder.fdiv(in2r, in2i)
+            # out.real = floor((in1i + in1r*rat)/(in2i + in2r*rat))
+            tmp1 = builder.fmul(in1r, rat)
+            tmp2 = builder.fmul(in2r, rat)
+            tmp3 = builder.fadd(in1i, tmp1)
+            tmp4 = builder.fadd(in2i, tmp2)
+            tmp5 = builder.fdiv(tmp3, tmp4)
+            out.real = np_real_floor_impl(context, builder, floor_sig, (tmp5,))
+    return out._getvalue()
+
+
+########################################################################
+# numpy power funcs
+
+
+def np_complex_power_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    return numbers.complex_power_impl(context, builder, sig, args)
+
+
+########################################################################
+# numpy float power funcs
+
+
+def real_float_power_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    return numbers.real_power_impl(context, builder, sig, args)
+
+
+def np_complex_float_power_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    return numbers.complex_power_impl(context, builder, sig, args)
+
+
+########################################################################
+# numpy greatest common denominator
+
+
+def np_gcd_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    return mathimpl.gcd_impl(context, builder, sig, args)
+
+
+########################################################################
+# numpy lowest common multiple
+
+
+def np_lcm_impl(context, builder, sig, args):
+    xty, yty = sig.args
+    assert xty == yty == sig.return_type
+    x, y = args
+
+    def lcm(a, b):
+        """
+        Like gcd, heavily cribbed from Julia.
+        """
+        return 0 if a == 0 else abs(a * (b // np.gcd(b, a)))
+
+    res = context.compile_internal(builder, lcm, sig, args)
+    return impl_ret_untracked(context, builder, sig.return_type, res)
+
+
+########################################################################
+# Numpy style complex sign
+
+
+def np_complex_sign_impl(context, builder, sig, args):
+    # equivalent to complex sign in NumPy's sign
+    # but implemented via selects, balancing the 4 cases.
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    if numpy_version >= (2, 0):
+        # NumPy >= 2.0.0
+        def complex_sign(z):
+            abs = math.hypot(z.real, z.imag)
+            if abs == 0:
+                return 0 + 0j
+            else:
+                return z / abs
+
+        res = context.compile_internal(builder, complex_sign, sig, args)
+        return impl_ret_untracked(context, builder, sig.return_type, res)
+    else:
+        op = args[0]
+        ty = sig.args[0]
+        result = context.make_complex(builder, ty)
+        float_ty = ty.underlying_float
+
+        ZERO = context.get_constant(float_ty, 0.0)
+        ONE = context.get_constant(float_ty, 1.0)
+        MINUS_ONE = context.get_constant(float_ty, -1.0)
+        NAN = context.get_constant(float_ty, float("nan"))
+
+        result.real = ZERO
+        result.imag = ZERO
+        cmp_sig = typing.signature(types.boolean, *[ty] * 2)
+        cmp_args = [op, result._getvalue()]
+        arg1_ge_arg2 = np_complex_ge_impl(context, builder, cmp_sig, cmp_args)
+        arg1_eq_arg2 = np_complex_eq_impl(context, builder, cmp_sig, cmp_args)
+        arg1_lt_arg2 = np_complex_lt_impl(context, builder, cmp_sig, cmp_args)
+
+        real_when_ge = builder.select(arg1_eq_arg2, ZERO, ONE)
+        real_when_nge = builder.select(arg1_lt_arg2, MINUS_ONE, NAN)
+        result.real = builder.select(arg1_ge_arg2, real_when_ge, real_when_nge)
+
+        return result._getvalue()
+
+
+########################################################################
+# Numpy rint
+
+
+def np_real_rint_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    return mathimpl.call_fp_intrinsic(builder, "llvm.rint", args)
+
+
+def np_complex_rint_impl(context, builder, sig, args):
+    # based on code in NumPy's funcs.inc.src
+    # rint of a complex number defined as rint of its real and imag
+    # parts
+    _check_arity_and_homogeneity(sig, args, 1)
+    ty = sig.args[0]
+    float_ty = ty.underlying_float
+    in1 = context.make_complex(builder, ty, value=args[0])
+    out = context.make_complex(builder, ty)
+
+    inner_sig = typing.signature(*[float_ty] * 2)
+    out.real = np_real_rint_impl(context, builder, inner_sig, [in1.real])
+    out.imag = np_real_rint_impl(context, builder, inner_sig, [in1.imag])
+    return out._getvalue()
+
+
+########################################################################
+# NumPy exp
+
+
+def np_real_exp_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.exp_impl(context, builder, sig, args)
+
+
+def np_complex_exp_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return cmathimpl.exp_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy exp2
+
+
+def np_real_exp2_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ll_ty = args[0].type
+    fnty = llvmlite.ir.FunctionType(
+        ll_ty,
+        [
+            ll_ty,
+        ],
+    )
+    fn = cgutils.insert_pure_function(builder.module, fnty, name="llvm.exp2")
+    return builder.call(fn, [args[0]])
+
+
+def np_complex_exp2_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    ty = sig.args[0]
+    float_ty = ty.underlying_float
+    in1 = context.make_complex(builder, ty, value=args[0])
+    tmp = context.make_complex(builder, ty)
+    loge2 = context.get_constant(float_ty, _NPY_LOGE2)
+    tmp.real = builder.fmul(loge2, in1.real)
+    tmp.imag = builder.fmul(loge2, in1.imag)
+    return np_complex_exp_impl(context, builder, sig, [tmp._getvalue()])
+
+
+########################################################################
+# NumPy log
+
+
+def np_real_log_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.log_impl(context, builder, sig, args)
+
+
+def np_complex_log_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return cmathimpl.log_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy log2
+
+
+def np_real_log2_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ll_ty = args[0].type
+    fnty = llvmlite.ir.FunctionType(
+        ll_ty,
+        [
+            ll_ty,
+        ],
+    )
+    fn = cgutils.insert_pure_function(builder.module, fnty, name="llvm.log2")
+    return builder.call(fn, [args[0]])
+
+
+def np_complex_log2_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    float_ty = ty.underlying_float
+    tmp = np_complex_log_impl(context, builder, sig, args)
+    tmp = context.make_complex(builder, ty, value=tmp)
+    log2e = context.get_constant(float_ty, _NPY_LOG2E)
+    tmp.real = builder.fmul(log2e, tmp.real)
+    tmp.imag = builder.fmul(log2e, tmp.imag)
+    return tmp._getvalue()
+
+
+########################################################################
+# NumPy log10
+
+
+def np_real_log10_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.log10_impl(context, builder, sig, args)
+
+
+def np_complex_log10_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    float_ty = ty.underlying_float
+    tmp = np_complex_log_impl(context, builder, sig, args)
+    tmp = context.make_complex(builder, ty, value=tmp)
+    log10e = context.get_constant(float_ty, _NPY_LOG10E)
+    tmp.real = builder.fmul(log10e, tmp.real)
+    tmp.imag = builder.fmul(log10e, tmp.imag)
+    return tmp._getvalue()
+
+
+########################################################################
+# NumPy expm1
+
+
+def np_real_expm1_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.expm1_impl(context, builder, sig, args)
+
+
+def np_complex_expm1_impl(context, builder, sig, args):
+    # this is based on nc_expm1 in funcs.inc.src
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    float_ty = ty.underlying_float
+    float_unary_sig = typing.signature(*[float_ty] * 2)
+
+    MINUS_ONE = context.get_constant(float_ty, -1.0)
+    in1 = context.make_complex(builder, ty, value=args[0])
+    a = np_real_exp_impl(context, builder, float_unary_sig, [in1.real])
+    out = context.make_complex(builder, ty)
+    cos_imag = np_real_cos_impl(context, builder, float_unary_sig, [in1.imag])
+    sin_imag = np_real_sin_impl(context, builder, float_unary_sig, [in1.imag])
+    tmp = builder.fmul(a, cos_imag)
+    out.imag = builder.fmul(a, sin_imag)
+    out.real = builder.fadd(tmp, MINUS_ONE)
+
+    return out._getvalue()
+
+
+########################################################################
+# NumPy log1p
+
+
+def np_real_log1p_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.log1p_impl(context, builder, sig, args)
+
+
+def np_complex_log1p_impl(context, builder, sig, args):
+    # base on NumPy's nc_log1p in funcs.inc.src
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    float_ty = ty.underlying_float
+    float_unary_sig = typing.signature(*[float_ty] * 2)
+    float_binary_sig = typing.signature(*[float_ty] * 3)
+
+    ONE = context.get_constant(float_ty, 1.0)
+    in1 = context.make_complex(builder, ty, value=args[0])
+    out = context.make_complex(builder, ty)
+    real_plus_one = builder.fadd(in1.real, ONE)
+    l = np_real_hypot_impl(
+        context, builder, float_binary_sig, [real_plus_one, in1.imag]
+    )
+    out.imag = np_real_atan2_impl(
+        context, builder, float_binary_sig, [in1.imag, real_plus_one]
+    )
+    out.real = np_real_log_impl(context, builder, float_unary_sig, [l])
+
+    return out._getvalue()
+
+
+########################################################################
+# NumPy sqrt
+
+
+def np_real_sqrt_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.sqrt_impl(context, builder, sig, args)
+
+
+def np_complex_sqrt_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return cmathimpl.sqrt_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy square
+
+
+def np_int_square_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return builder.mul(args[0], args[0])
+
+
+def np_real_square_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return builder.fmul(args[0], args[0])
+
+
+def np_complex_square_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    binary_sig = typing.signature(*[sig.return_type] * 3)
+    return numbers.complex_mul_impl(
+        context, builder, binary_sig, [args[0], args[0]]
+    )
+
+
+########################################################################
+# NumPy cbrt
+
+
+def np_real_cbrt_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    # We enable fastmath here to force np.power(x, 1/3) to generate a
+    # call to libm cbrt function
+    @register_jitable(fastmath=True)
+    def cbrt(x):
+        if x < 0:
+            return -np.power(-x, 1.0 / 3.0)
+        else:
+            return np.power(x, 1.0 / 3.0)
+
+    def _cbrt(x):
+        if np.isnan(x):
+            return np.nan
+        return cbrt(x)
+
+    return context.compile_internal(builder, _cbrt, sig, args)
+
+
+########################################################################
+# NumPy reciprocal
+
+
+def np_int_reciprocal_impl(context, builder, sig, args):
+    # based on the implementation in loops.c.src
+    # integer versions for reciprocal are performed via promotion
+    # using double, and then converted back to the type
+    _check_arity_and_homogeneity(sig, args, 1)
+    ty = sig.return_type
+
+    in_as_float = context.cast(builder, args[0], ty, types.float64)
+    ONE = context.get_constant(types.float64, 1)
+    result_as_float = builder.fdiv(ONE, in_as_float)
+    return context.cast(builder, result_as_float, types.float64, ty)
+
+
+def np_real_reciprocal_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    ONE = context.get_constant(sig.return_type, 1.0)
+    return builder.fdiv(ONE, args[0])
+
+
+def np_complex_reciprocal_impl(context, builder, sig, args):
+    # based on the implementation in loops.c.src
+    # Basically the same Smith method used for division, but with
+    # the numerator substituted by 1.0
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    float_ty = ty.underlying_float
+
+    ZERO = context.get_constant(float_ty, 0.0)
+    ONE = context.get_constant(float_ty, 1.0)
+    in1 = context.make_complex(builder, ty, value=args[0])
+    out = context.make_complex(builder, ty)
+    in1r = in1.real
+    in1i = in1.imag
+    in1r_abs = _fabs(context, builder, in1r)
+    in1i_abs = _fabs(context, builder, in1i)
+    in1i_abs_le_in1r_abs = builder.fcmp_ordered("<=", in1i_abs, in1r_abs)
+
+    with builder.if_else(in1i_abs_le_in1r_abs) as (then, otherwise):
+        with then:
+            r = builder.fdiv(in1i, in1r)
+            tmp0 = builder.fmul(in1i, r)
+            d = builder.fadd(in1r, tmp0)
+            inv_d = builder.fdiv(ONE, d)
+            minus_r = builder.fsub(ZERO, r)
+            out.real = inv_d
+            out.imag = builder.fmul(minus_r, inv_d)
+        with otherwise:
+            r = builder.fdiv(in1r, in1i)
+            tmp0 = builder.fmul(in1r, r)
+            d = builder.fadd(tmp0, in1i)
+            inv_d = builder.fdiv(ONE, d)
+            out.real = builder.fmul(r, inv_d)
+            out.imag = builder.fsub(ZERO, inv_d)
+
+    return out._getvalue()
+
+
+########################################################################
+# NumPy sin
+
+
+def np_real_sin_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.sin_impl(context, builder, sig, args)
+
+
+def np_complex_sin_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return cmathimpl.sin_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy cos
+
+
+def np_real_cos_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.cos_impl(context, builder, sig, args)
+
+
+def np_complex_cos_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return cmathimpl.cos_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy tan
+
+
+def np_real_tan_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.tan_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy asin
+
+
+def np_real_asin_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.asin_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy acos
+
+
+def np_real_acos_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.acos_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy atan
+
+
+def np_real_atan_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.atan_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy atan2
+
+
+def np_real_atan2_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    return mathimpl.atan2_float_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy hypot
+
+
+def np_real_hypot_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    return mathimpl.hypot_float_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy sinh
+
+
+def np_real_sinh_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.sinh_impl(context, builder, sig, args)
+
+
+def np_complex_sinh_impl(context, builder, sig, args):
+    # npymath does not provide a complex sinh. The code in funcs.inc.src
+    # is translated here...
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    fty = ty.underlying_float
+    fsig1 = typing.signature(*[fty] * 2)
+    x = context.make_complex(builder, ty, args[0])
+    out = context.make_complex(builder, ty)
+    xr = x.real
+    xi = x.imag
+
+    sxi = np_real_sin_impl(context, builder, fsig1, [xi])
+    shxr = np_real_sinh_impl(context, builder, fsig1, [xr])
+    cxi = np_real_cos_impl(context, builder, fsig1, [xi])
+    chxr = np_real_cosh_impl(context, builder, fsig1, [xr])
+
+    out.real = builder.fmul(cxi, shxr)
+    out.imag = builder.fmul(sxi, chxr)
+
+    return out._getvalue()
+
+
+########################################################################
+# NumPy cosh
+
+
+def np_real_cosh_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.cosh_impl(context, builder, sig, args)
+
+
+def np_complex_cosh_impl(context, builder, sig, args):
+    # npymath does not provide a complex cosh. The code in funcs.inc.src
+    # is translated here...
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    fty = ty.underlying_float
+    fsig1 = typing.signature(*[fty] * 2)
+    x = context.make_complex(builder, ty, args[0])
+    out = context.make_complex(builder, ty)
+    xr = x.real
+    xi = x.imag
+
+    cxi = np_real_cos_impl(context, builder, fsig1, [xi])
+    chxr = np_real_cosh_impl(context, builder, fsig1, [xr])
+    sxi = np_real_sin_impl(context, builder, fsig1, [xi])
+    shxr = np_real_sinh_impl(context, builder, fsig1, [xr])
+
+    out.real = builder.fmul(cxi, chxr)
+    out.imag = builder.fmul(sxi, shxr)
+
+    return out._getvalue()
+
+
+########################################################################
+# NumPy tanh
+
+
+def np_real_tanh_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.tanh_impl(context, builder, sig, args)
+
+
+def np_complex_tanh_impl(context, builder, sig, args):
+    # npymath does not provide complex tan functions. The code
+    # in funcs.inc.src for tanh is translated here...
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    fty = ty.underlying_float
+    fsig1 = typing.signature(*[fty] * 2)
+    ONE = context.get_constant(fty, 1.0)
+    x = context.make_complex(builder, ty, args[0])
+    out = context.make_complex(builder, ty)
+
+    xr = x.real
+    xi = x.imag
+    si = np_real_sin_impl(context, builder, fsig1, [xi])
+    ci = np_real_cos_impl(context, builder, fsig1, [xi])
+    shr = np_real_sinh_impl(context, builder, fsig1, [xr])
+    chr_ = np_real_cosh_impl(context, builder, fsig1, [xr])
+    rs = builder.fmul(ci, shr)
+    is_ = builder.fmul(si, chr_)
+    rc = builder.fmul(ci, chr_)
+    ic = builder.fmul(si, shr)  # note: opposite sign from code in funcs.inc.src
+    sqr_rc = builder.fmul(rc, rc)
+    sqr_ic = builder.fmul(ic, ic)
+    d = builder.fadd(sqr_rc, sqr_ic)
+    inv_d = builder.fdiv(ONE, d)
+    rs_rc = builder.fmul(rs, rc)
+    is_ic = builder.fmul(is_, ic)
+    is_rc = builder.fmul(is_, rc)
+    rs_ic = builder.fmul(rs, ic)
+    numr = builder.fadd(rs_rc, is_ic)
+    numi = builder.fsub(is_rc, rs_ic)
+    out.real = builder.fmul(numr, inv_d)
+    out.imag = builder.fmul(numi, inv_d)
+
+    return out._getvalue()
+
+
+########################################################################
+# NumPy asinh
+
+
+def np_real_asinh_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.asinh_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy acosh
+
+
+def np_real_acosh_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.acosh_impl(context, builder, sig, args)
+
+
+def np_complex_acosh_impl(context, builder, sig, args):
+    # npymath does not provide a complex acosh. The code in funcs.inc.src
+    # is translated here...
+    # log(x + sqrt(x+1) * sqrt(x-1))
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    ty = sig.args[0]
+    csig2 = typing.signature(*[ty] * 3)
+
+    ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j)
+    x = args[0]
+
+    x_plus_one = numbers.complex_add_impl(context, builder, csig2, [x, ONE])
+    x_minus_one = numbers.complex_sub_impl(context, builder, csig2, [x, ONE])
+    sqrt_x_plus_one = np_complex_sqrt_impl(context, builder, sig, [x_plus_one])
+    sqrt_x_minus_one = np_complex_sqrt_impl(
+        context, builder, sig, [x_minus_one]
+    )
+    prod_sqrt = numbers.complex_mul_impl(
+        context, builder, csig2, [sqrt_x_plus_one, sqrt_x_minus_one]
+    )
+    log_arg = numbers.complex_add_impl(context, builder, csig2, [x, prod_sqrt])
+
+    return np_complex_log_impl(context, builder, sig, [log_arg])
+
+
+########################################################################
+# NumPy atanh
+
+
+def np_real_atanh_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+    return mathimpl.atanh_impl(context, builder, sig, args)
+
+
+########################################################################
+# NumPy floor
+
+
+def np_real_floor_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    return mathimpl.call_fp_intrinsic(builder, "llvm.floor", args)
+
+
+########################################################################
+# NumPy ceil
+
+
+def np_real_ceil_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    return mathimpl.call_fp_intrinsic(builder, "llvm.ceil", args)
+
+
+########################################################################
+# NumPy trunc
+
+
+def np_real_trunc_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    return mathimpl.call_fp_intrinsic(builder, "llvm.trunc", args)
+
+
+########################################################################
+# NumPy fabs
+
+
+def np_real_fabs_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    return mathimpl.call_fp_intrinsic(builder, "llvm.fabs", args)
+
+
+########################################################################
+# NumPy style predicates
+
+
+# For real and integer types rely on numbers... but complex ordering in
+# NumPy is lexicographic (while Python does not provide ordering).
+def np_complex_ge_impl(context, builder, sig, args):
+    # equivalent to macro CGE in NumPy's loops.c.src
+    # ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi >= yi))
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+
+    ty = sig.args[0]
+    in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
+    xr = in1.real
+    xi = in1.imag
+    yr = in2.real
+    yi = in2.imag
+
+    xr_gt_yr = builder.fcmp_ordered(">", xr, yr)
+    no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
+    xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
+    xi_ge_yi = builder.fcmp_ordered(">=", xi, yi)
+    first_term = builder.and_(xr_gt_yr, no_nan_xi_yi)
+    second_term = builder.and_(xr_eq_yr, xi_ge_yi)
+    return builder.or_(first_term, second_term)
+
+
+def np_complex_le_impl(context, builder, sig, args):
+    # equivalent to macro CLE in NumPy's loops.c.src
+    # ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi <= yi))
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+
+    ty = sig.args[0]
+    in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
+    xr = in1.real
+    xi = in1.imag
+    yr = in2.real
+    yi = in2.imag
+
+    xr_lt_yr = builder.fcmp_ordered("<", xr, yr)
+    no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
+    xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
+    xi_le_yi = builder.fcmp_ordered("<=", xi, yi)
+    first_term = builder.and_(xr_lt_yr, no_nan_xi_yi)
+    second_term = builder.and_(xr_eq_yr, xi_le_yi)
+    return builder.or_(first_term, second_term)
+
+
+def np_complex_gt_impl(context, builder, sig, args):
+    # equivalent to macro CGT in NumPy's loops.c.src
+    # ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi > yi))
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+
+    ty = sig.args[0]
+    in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
+    xr = in1.real
+    xi = in1.imag
+    yr = in2.real
+    yi = in2.imag
+
+    xr_gt_yr = builder.fcmp_ordered(">", xr, yr)
+    no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
+    xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
+    xi_gt_yi = builder.fcmp_ordered(">", xi, yi)
+    first_term = builder.and_(xr_gt_yr, no_nan_xi_yi)
+    second_term = builder.and_(xr_eq_yr, xi_gt_yi)
+    return builder.or_(first_term, second_term)
+
+
+def np_complex_lt_impl(context, builder, sig, args):
+    # equivalent to macro CLT in NumPy's loops.c.src
+    # ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi < yi))
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+
+    ty = sig.args[0]
+    in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
+    xr = in1.real
+    xi = in1.imag
+    yr = in2.real
+    yi = in2.imag
+
+    xr_lt_yr = builder.fcmp_ordered("<", xr, yr)
+    no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
+    xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
+    xi_lt_yi = builder.fcmp_ordered("<", xi, yi)
+    first_term = builder.and_(xr_lt_yr, no_nan_xi_yi)
+    second_term = builder.and_(xr_eq_yr, xi_lt_yi)
+    return builder.or_(first_term, second_term)
+
+
+def np_complex_eq_impl(context, builder, sig, args):
+    # equivalent to macro CEQ in NumPy's loops.c.src
+    # (xr == yr && xi == yi)
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+
+    ty = sig.args[0]
+    in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
+    xr = in1.real
+    xi = in1.imag
+    yr = in2.real
+    yi = in2.imag
+
+    xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
+    xi_eq_yi = builder.fcmp_ordered("==", xi, yi)
+    return builder.and_(xr_eq_yr, xi_eq_yi)
+
+
+def np_complex_ne_impl(context, builder, sig, args):
+    # equivalent to macro CNE in NumPy's loops.c.src
+    # (xr != yr || xi != yi)
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+
+    ty = sig.args[0]
+    in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
+    xr = in1.real
+    xi = in1.imag
+    yr = in2.real
+    yi = in2.imag
+
+    xr_ne_yr = builder.fcmp_unordered("!=", xr, yr)
+    xi_ne_yi = builder.fcmp_unordered("!=", xi, yi)
+    return builder.or_(xr_ne_yr, xi_ne_yi)
+
+
+########################################################################
+# NumPy logical algebra
+
+# these are made generic for all types for now, assuming that
+# cgutils.is_true works in the underlying types.
+
+
+def _complex_is_true(context, builder, ty, val):
+    complex_val = context.make_complex(builder, ty, value=val)
+    re_true = cgutils.is_true(builder, complex_val.real)
+    im_true = cgutils.is_true(builder, complex_val.imag)
+    return builder.or_(re_true, im_true)
+
+
+def np_logical_and_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+    a = cgutils.is_true(builder, args[0])
+    b = cgutils.is_true(builder, args[1])
+    return builder.and_(a, b)
+
+
+def np_complex_logical_and_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+    a = _complex_is_true(context, builder, sig.args[0], args[0])
+    b = _complex_is_true(context, builder, sig.args[1], args[1])
+    return builder.and_(a, b)
+
+
+def np_logical_or_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+    a = cgutils.is_true(builder, args[0])
+    b = cgutils.is_true(builder, args[1])
+    return builder.or_(a, b)
+
+
+def np_complex_logical_or_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+    a = _complex_is_true(context, builder, sig.args[0], args[0])
+    b = _complex_is_true(context, builder, sig.args[1], args[1])
+    return builder.or_(a, b)
+
+
+def np_logical_xor_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+    a = cgutils.is_true(builder, args[0])
+    b = cgutils.is_true(builder, args[1])
+    return builder.xor(a, b)
+
+
+def np_complex_logical_xor_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
+    a = _complex_is_true(context, builder, sig.args[0], args[0])
+    b = _complex_is_true(context, builder, sig.args[1], args[1])
+    return builder.xor(a, b)
+
+
+def np_logical_not_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return cgutils.is_false(builder, args[0])
+
+
+def np_complex_logical_not_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    a = _complex_is_true(context, builder, sig.args[0], args[0])
+    return builder.not_(a)
+
+
+########################################################################
+# NumPy style max/min
+#
+# There are 2 different sets of functions to perform max and min in
+# NumPy: maximum/minimum and fmax/fmin.
+# Both differ in the way NaNs are handled, so the actual differences
+# come in action only on float/complex numbers. The functions used for
+# integers is shared. For booleans maximum is equivalent to or, and
+# minimum is equivalent to and. Datetime support will go elsewhere.
+
+
+def np_int_smax_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    arg1, arg2 = args
+    arg1_sge_arg2 = builder.icmp_signed(">=", arg1, arg2)
+    return builder.select(arg1_sge_arg2, arg1, arg2)
+
+
+def np_int_umax_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    arg1, arg2 = args
+    arg1_uge_arg2 = builder.icmp_unsigned(">=", arg1, arg2)
+    return builder.select(arg1_uge_arg2, arg1, arg2)
+
+
+def np_real_maximum_impl(context, builder, sig, args):
+    # maximum prefers nan (tries to return a nan).
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    arg1, arg2 = args
+    arg1_nan = builder.fcmp_unordered("uno", arg1, arg1)
+    any_nan = builder.fcmp_unordered("uno", arg1, arg2)
+    nan_result = builder.select(arg1_nan, arg1, arg2)
+
+    arg1_ge_arg2 = builder.fcmp_ordered(">=", arg1, arg2)
+    non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+def np_real_fmax_impl(context, builder, sig, args):
+    # fmax prefers non-nan (tries to return a non-nan).
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    arg1, arg2 = args
+    arg2_nan = builder.fcmp_unordered("uno", arg2, arg2)
+    any_nan = builder.fcmp_unordered("uno", arg1, arg2)
+    nan_result = builder.select(arg2_nan, arg1, arg2)
+
+    arg1_ge_arg2 = builder.fcmp_ordered(">=", arg1, arg2)
+    non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+def np_complex_maximum_impl(context, builder, sig, args):
+    # maximum prefers nan (tries to return a nan).
+    # There is an extra caveat with complex numbers, as there is more
+    # than one type of nan. NumPy's docs state that the nan in the
+    # first argument is returned when both arguments are nans.
+    # If only one nan is found, that nan is returned.
+    _check_arity_and_homogeneity(sig, args, 2)
+    ty = sig.args[0]
+    bc_sig = typing.signature(types.boolean, ty)
+    bcc_sig = typing.signature(types.boolean, *[ty] * 2)
+    arg1, arg2 = args
+    arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
+    arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
+    any_nan = builder.or_(arg1_nan, arg2_nan)
+    nan_result = builder.select(arg1_nan, arg1, arg2)
+
+    arg1_ge_arg2 = np_complex_ge_impl(context, builder, bcc_sig, args)
+    non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+def np_complex_fmax_impl(context, builder, sig, args):
+    # fmax prefers non-nan (tries to return a non-nan).
+    # There is an extra caveat with complex numbers, as there is more
+    # than one type of nan. NumPy's docs state that the nan in the
+    # first argument is returned when both arguments are nans.
+    _check_arity_and_homogeneity(sig, args, 2)
+    ty = sig.args[0]
+    bc_sig = typing.signature(types.boolean, ty)
+    bcc_sig = typing.signature(types.boolean, *[ty] * 2)
+    arg1, arg2 = args
+    arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
+    arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
+    any_nan = builder.or_(arg1_nan, arg2_nan)
+    nan_result = builder.select(arg2_nan, arg1, arg2)
+
+    arg1_ge_arg2 = np_complex_ge_impl(context, builder, bcc_sig, args)
+    non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+def np_int_smin_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    arg1, arg2 = args
+    arg1_sle_arg2 = builder.icmp_signed("<=", arg1, arg2)
+    return builder.select(arg1_sle_arg2, arg1, arg2)
+
+
+def np_int_umin_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+    arg1, arg2 = args
+    arg1_ule_arg2 = builder.icmp_unsigned("<=", arg1, arg2)
+    return builder.select(arg1_ule_arg2, arg1, arg2)
+
+
+def np_real_minimum_impl(context, builder, sig, args):
+    # minimum prefers nan (tries to return a nan).
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    arg1, arg2 = args
+    arg1_nan = builder.fcmp_unordered("uno", arg1, arg1)
+    any_nan = builder.fcmp_unordered("uno", arg1, arg2)
+    nan_result = builder.select(arg1_nan, arg1, arg2)
+
+    arg1_le_arg2 = builder.fcmp_ordered("<=", arg1, arg2)
+    non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+def np_real_fmin_impl(context, builder, sig, args):
+    # fmin prefers non-nan (tries to return a non-nan).
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    arg1, arg2 = args
+    arg1_nan = builder.fcmp_unordered("uno", arg1, arg1)
+    any_nan = builder.fcmp_unordered("uno", arg1, arg2)
+    nan_result = builder.select(arg1_nan, arg2, arg1)
+
+    arg1_le_arg2 = builder.fcmp_ordered("<=", arg1, arg2)
+    non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+def np_complex_minimum_impl(context, builder, sig, args):
+    # minimum prefers nan (tries to return a nan).
+    # There is an extra caveat with complex numbers, as there is more
+    # than one type of nan. NumPy's docs state that the nan in the
+    # first argument is returned when both arguments are nans.
+    # If only one nan is found, that nan is returned.
+    _check_arity_and_homogeneity(sig, args, 2)
+    ty = sig.args[0]
+    bc_sig = typing.signature(types.boolean, ty)
+    bcc_sig = typing.signature(types.boolean, *[ty] * 2)
+    arg1, arg2 = args
+    arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
+    arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
+    any_nan = builder.or_(arg1_nan, arg2_nan)
+    nan_result = builder.select(arg1_nan, arg1, arg2)
+
+    arg1_le_arg2 = np_complex_le_impl(context, builder, bcc_sig, args)
+    non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+def np_complex_fmin_impl(context, builder, sig, args):
+    # fmin prefers non-nan (tries to return a non-nan).
+    # There is an extra caveat with complex numbers, as there is more
+    # than one type of nan. NumPy's docs state that the nan in the
+    # first argument is returned when both arguments are nans.
+    _check_arity_and_homogeneity(sig, args, 2)
+    ty = sig.args[0]
+    bc_sig = typing.signature(types.boolean, ty)
+    bcc_sig = typing.signature(types.boolean, *[ty] * 2)
+    arg1, arg2 = args
+    arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
+    arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
+    any_nan = builder.or_(arg1_nan, arg2_nan)
+    nan_result = builder.select(arg2_nan, arg1, arg2)
+
+    arg1_le_arg2 = np_complex_le_impl(context, builder, bcc_sig, args)
+    non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
+
+    return builder.select(any_nan, nan_result, non_nan_result)
+
+
+########################################################################
+# NumPy floating point misc
+
+
+def np_int_isnan_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return cgutils.false_bit
+
+
+def np_real_isnan_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return mathimpl.is_nan(builder, args[0])
+
+
+def np_complex_isnan_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+
+    (x,) = args
+    (ty,) = sig.args
+    complex_val = context.make_complex(builder, ty, value=x)
+    return cmathimpl.is_nan(builder, complex_val)
+
+
+def np_int_isfinite_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return cgutils.true_bit
+
+
+def np_datetime_isfinite_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return builder.icmp_unsigned("!=", args[0], npdatetime.NAT)
+
+
+def np_datetime_isnat_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return builder.icmp_signed("==", args[0], npdatetime.NAT)
+
+
+def np_real_isfinite_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return mathimpl.is_finite(builder, args[0])
+
+
+def np_complex_isfinite_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    (x,) = args
+    (ty,) = sig.args
+    complex_val = context.make_complex(builder, ty, value=x)
+    return cmathimpl.is_finite(builder, complex_val)
+
+
+def np_int_isinf_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return cgutils.false_bit
+
+
+def np_real_isinf_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    return mathimpl.is_inf(builder, args[0])
+
+
+def np_complex_isinf_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    (x,) = args
+    (ty,) = sig.args
+    complex_val = context.make_complex(builder, ty, value=x)
+    return cmathimpl.is_inf(builder, complex_val)
+
+
+def np_real_signbit_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
+    # there's no signbit intrinsic in LLVM, so just bitcast as int, mask the
+    # signbit and cmp against 0.
+    masks = {
+        types.float16: context.get_constant(types.uint16, 0x8000),
+        types.float32: context.get_constant(types.uint32, 0x80000000),
+        types.float64: context.get_constant(types.uint64, 0x8000000000000000),
+    }
+    arg_ty = sig.args[0]
+    arg_int_ty = getattr(types, f"uint{arg_ty.bitwidth}")
+    arg_ll_int_ty = context.get_value_type(arg_int_ty)
+    int_res = builder.and_(
+        builder.bitcast(args[0], arg_ll_int_ty), masks[arg_ty]
+    )
+    bool_res = builder.icmp_unsigned("!=", int_res, int_res.type(0))
+    return bool_res
+
+
+def np_real_copysign_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    return mathimpl.copysign_float_impl(context, builder, sig, args)
+
+
+def np_real_nextafter_impl(context, builder, sig, args):
+    _check_arity_and_homogeneity(sig, args, 2)
+
+    dispatch_table = {
+        types.float32: "numba_nextafterf",
+        types.float64: "numba_nextafter",
+    }
+
+    return _dispatch_func_by_name_type(
+        context, builder, sig, args, dispatch_table, "nextafter"
+    )
+
+
+def np_real_spacing_impl(context, builder, sig, args):
+    # This is different to how NumPy does it, NumPy has a specialisation of
+    # nextafter called _next, which is used. See:
+    # https://github.com/numpy/numpy/blob/12c2b7dd62fc0c14b81c8892ed5f4f59cc94d09c/numpy/core/src/npymath/ieee754.c.src#L32-L38
+    # Numba elects to use `nextafter` for a similar behaviour to save
+    # translating this very involved function. Further, the NumPy comments note
+    # that there is a lot of redundancy present between the two.
+    _check_arity_and_homogeneity(sig, args, 1)
+
+    dispatch_table = {
+        types.float32: "numba_nextafterf",
+        types.float64: "numba_nextafter",
+    }
+
+    [ty] = sig.args
+    inner_sig = typing.signature(sig.return_type, ty, ty)
+    ll_ty = args[0].type
+    ll_inf = ll_ty(np.inf)
+    fnty = llvmlite.ir.FunctionType(ll_ty, [ll_ty, ll_ty])
+    fn = cgutils.insert_pure_function(
+        builder.module, fnty, name="llvm.copysign"
+    )
+    ll_sinf = builder.call(fn, [ll_inf, args[0]])
+    inner_args = args + [
+        ll_sinf,
+    ]
+    nextafter = _dispatch_func_by_name_type(
+        context, builder, inner_sig, inner_args, dispatch_table, "nextafter"
+    )
+    return builder.fsub(nextafter, args[0])
+
+
+def np_real_ldexp_impl(context, builder, sig, args):
+    # this one is slightly different to other ufuncs.
+    # arguments are not homogeneous and second arg may come as
+    # an 'i' or an 'l'.
+
+    # the function expects the second argument to be have a C int type
+    x1, x2 = args
+    ty1, ty2 = sig.args
+    # note that types.intc should be equivalent to int_ that is
+    # 'NumPy's default int')
+    x2 = context.cast(builder, x2, ty2, types.intc)
+    f_fi_sig = typing.signature(ty1, ty1, types.intc)
+    return mathimpl.ldexp_impl(context, builder, f_fi_sig, (x1, x2))
diff --git a/numba_cuda/numba/cuda/ufuncs.py b/numba_cuda/numba/cuda/ufuncs.py
index 6b28a4a3c..89e0a08f0 100644
--- a/numba_cuda/numba/cuda/ufuncs.py
+++ b/numba_cuda/numba/cuda/ufuncs.py
@@ -27,7 +27,7 @@ def get_ufunc_info(ufunc_key):
 def ufunc_db():
     # Imports here are at function scope to avoid circular imports
     from numba.cpython import cmathimpl, mathimpl, numbers
-    from numba.np import npyfuncs
+    from numba.cuda.np import npyfuncs
     from numba.cuda.np.numpy_support import numpy_version
 
     def np_unary_impl(fn, context, builder, sig, args):