@@ -355,15 +355,15 @@ cdef class {{name}}HashTable(HashTable):
355355
356356 return np.asarray(locs)
357357
358- def unique(self, const {{dtype}}_t[:] values, bint return_inverse):
358+ def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False ):
359359 """
360360 Calculate unique values and labels (no sorting!)
361361
362362 Parameters
363363 ----------
364364 values : ndarray[{{dtype}}]
365365 Array of values of which unique will be calculated
366- return_inverse : boolean
366+ return_inverse : boolean, default False
367367 Whether the mapping of the original array values to their location
368368 in the vector of uniques should be returned.
369369
@@ -376,14 +376,12 @@ cdef class {{name}}HashTable(HashTable):
376376 """
377377 uniques = {{name}}Vector()
378378 # explicitly compile path without inverse for performance
379- # the last three arguments are not relevant for this method, but we
380- # don't use kwargs to avoid cython perf hit (just using default values)
381379 if return_inverse:
382- return self._unique_with_inverse(values, uniques, 0, -1, None )
383- return self._unique_no_inverse(values, uniques, 0, -1, None )
380+ return self._unique_with_inverse(values, uniques)
381+ return self._unique_no_inverse(values, uniques)
384382
385- def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel,
386- object na_value):
383+ def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1 ,
384+ object na_value=None ):
387385 """
388386 Calculate unique values and labels (no sorting!)
389387
@@ -427,8 +425,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
427425 @cython.boundscheck(False)
428426 @cython.wraparound(False)
429427 def {{func_name}}(self, const {{dtype}}_t[:] values,
430- {{name}}Vector uniques, Py_ssize_t count_prior,
431- Py_ssize_t na_sentinel, object na_value):
428+ {{name}}Vector uniques, Py_ssize_t count_prior=0 ,
429+ Py_ssize_t na_sentinel=-1 , object na_value=None ):
432430 """
433431 Calculate unique values and labels (no sorting!)
434432{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -443,11 +441,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
443441 Array of values of which unique will be calculated
444442 uniques : {{name}}Vector
445443 Vector into which uniques will be written
446- count_prior : Py_ssize_t
444+ count_prior : Py_ssize_t, default 0
447445 Number of existing entries in uniques
448- na_sentinel : Py_ssize_t
446+ na_sentinel : Py_ssize_t, default -1
449447 Sentinel value used for all NA-values in inverse
450- na_value : object
448+ na_value : object, default None
451449 Value to identify as missing. If na_value is None, then
452450 any value "val" satisfying val != val is considered missing.
453451 If na_value is not None, then _additionally_, any value "val"
@@ -727,15 +725,15 @@ cdef class StringHashTable(HashTable):
727725 self.table.vals[k] = i
728726 free(vecs)
729727
730- def unique(self, ndarray[object] values, bint return_inverse):
728+ def unique(self, ndarray[object] values, bint return_inverse=False ):
731729 """
732730 Calculate unique values and labels (no sorting!)
733731
734732 Parameters
735733 ----------
736734 values : ndarray[object]
737735 Array of values of which unique will be calculated
738- return_inverse : boolean
736+ return_inverse : boolean, default False
739737 Whether the mapping of the original array values to their location
740738 in the vector of uniques should be returned.
741739
@@ -748,14 +746,12 @@ cdef class StringHashTable(HashTable):
748746 """
749747 uniques = ObjectVector()
750748 # explicitly compile path without inverse for performance
751- # the last three arguments are not relevant for this method, but we
752- # don't use kwargs to avoid cython perf hit (just using default values)
753749 if return_inverse:
754- return self._unique_with_inverse(values, uniques, 0, -1, None )
755- return self._unique_no_inverse(values, uniques, 0, -1, None )
750+ return self._unique_with_inverse(values, uniques)
751+ return self._unique_no_inverse(values, uniques)
756752
757- def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
758- object na_value):
753+ def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1 ,
754+ object na_value=None ):
759755 """
760756 Calculate unique values and labels (no sorting!)
761757
@@ -799,8 +795,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
799795 @cython.boundscheck(False)
800796 @cython.wraparound(False)
801797 def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
802- Py_ssize_t count_prior, Py_ssize_t na_sentinel,
803- object na_value):
798+ Py_ssize_t count_prior=0 , Py_ssize_t na_sentinel=-1 ,
799+ object na_value=None ):
804800 """
805801 Calculate unique values and labels (no sorting!)
806802{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -815,11 +811,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
815811 Array of values of which unique will be calculated
816812 uniques : ObjectVector
817813 Vector into which uniques will be written
818- count_prior : Py_ssize_t
814+ count_prior : Py_ssize_t, default 0
819815 Number of existing entries in uniques
820- na_sentinel : Py_ssize_t
816+ na_sentinel : Py_ssize_t, default -1
821817 Sentinel value used for all NA-values in inverse
822- na_value : object
818+ na_value : object, default None
823819 Value to identify as missing. If na_value is None, then any value
824820 that is not a string is considered missing. If na_value is
825821 not None, then _additionally_ any value "val" satisfying
@@ -1002,15 +998,15 @@ cdef class PyObjectHashTable(HashTable):
1002998
1003999 return np.asarray(locs)
10041000
1005- def unique(self, ndarray[object] values, bint return_inverse):
1001+ def unique(self, ndarray[object] values, bint return_inverse=False ):
10061002 """
10071003 Calculate unique values and labels (no sorting!)
10081004
10091005 Parameters
10101006 ----------
10111007 values : ndarray[object]
10121008 Array of values of which unique will be calculated
1013- return_inverse : boolean
1009+ return_inverse : boolean, default False
10141010 Whether the mapping of the original array values to their location
10151011 in the vector of uniques should be returned.
10161012
@@ -1023,14 +1019,12 @@ cdef class PyObjectHashTable(HashTable):
10231019 """
10241020 uniques = ObjectVector()
10251021 # explicitly compile path without inverse for performance
1026- # the last three arguments are not relevant for this method, but we
1027- # don't use kwargs to avoid cython perf hit (just using default values)
10281022 if return_inverse:
1029- return self._unique_with_inverse(values, uniques, 0, -1, None )
1030- return self._unique_no_inverse(values, uniques, 0, -1, None )
1023+ return self._unique_with_inverse(values, uniques)
1024+ return self._unique_no_inverse(values, uniques)
10311025
1032- def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
1033- object na_value):
1026+ def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1 ,
1027+ object na_value=None ):
10341028 """
10351029 Calculate unique values and labels (no sorting!)
10361030
@@ -1074,8 +1068,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
10741068 @cython.boundscheck(False)
10751069 @cython.wraparound(False)
10761070 def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
1077- Py_ssize_t count_prior, Py_ssize_t na_sentinel,
1078- object na_value):
1071+ Py_ssize_t count_prior=0 , Py_ssize_t na_sentinel=-1 ,
1072+ object na_value=None ):
10791073 """
10801074 Calculate unique values and labels (no sorting!)
10811075{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -1090,9 +1084,9 @@ unique_funcs = [('_unique_no_inverse', False, False),
10901084 Array of values of which unique will be calculated
10911085 uniques : ObjectVector
10921086 Vector into which uniques will be written
1093- count_prior : Py_ssize_t
1087+ count_prior : Py_ssize_t, default 0
10941088 Number of existing entries in uniques
1095- na_sentinel : Py_ssize_t
1089+ na_sentinel : Py_ssize_t, default -1
10961090 Sentinel value used for all NA-values in inverse
10971091 na_value : object
10981092 Value to identify as missing. If na_value is None, then None _plus_
0 commit comments