22import warnings
33import numpy as np
44
5+ from pandas .compat import u
56from pandas .core .dtypes .generic import ABCSeries , ABCIndexClass
7+ from pandas .util ._decorators import cache_readonly
68from pandas .compat import set_function_name
79from pandas .api .types import (is_integer , is_scalar , is_float ,
810 is_float_dtype , is_integer_dtype ,
1214from pandas .core .dtypes .base import ExtensionDtype
1315from pandas .core .dtypes .dtypes import registry
1416from pandas .core .dtypes .missing import isna , notna
15-
16- # available dtypes
17- _integer_dtypes = ['int8' , 'int16' , 'int32' , 'int64' ]
18- _integer_formatter = lambda x : x .capitalize ()
19- _unsigned_dtypes = ['uint8' , 'uint16' , 'uint32' , 'uint64' ]
20- _unsigned_formatter = lambda x : "{}{}" .format (x [0 ].upper (), x [1 :].capitalize ())
17+ from pandas .io .formats .printing import (
18+ format_object_summary , format_object_attrs , default_pprint )
2119
2220
2321class IntegerDtype (ExtensionDtype ):
2422 type = None
2523 na_value = np .nan
26- kind = 'i'
27- is_integer = True
28- is_signed_integer = True
29- is_unsigned_integer = False
24+
25+ @cache_readonly
26+ def is_signed_integer (self ):
27+ return self .kind == 'i'
28+
29+ @cache_readonly
30+ def is_unsigned_integer (self ):
31+ return self .kind == 'u'
32+
33+ @cache_readonly
34+ def numpy_dtype (self ):
35+ """ Return an instance of our numpy dtype """
36+ return np .dtype (self .type )
37+
38+ @cache_readonly
39+ def kind (self ):
40+ return self .numpy_dtype .kind
41+
42+ @classmethod
43+ def construct_array_type (cls , array ):
44+ """Return the array type associated with this dtype
45+
46+ Parameters
47+ ----------
48+ array : value array
49+
50+ Returns
51+ -------
52+ type
53+ """
54+ return IntegerArray
3055
3156 @classmethod
3257 def construct_from_string (cls , string ):
@@ -40,12 +65,6 @@ def construct_from_string(cls, string):
4065 "'{}'" .format (cls , string ))
4166
4267
43- class UnsignedIntegerDtype (IntegerDtype ):
44- kind = 'u'
45- is_signed_integer = False
46- is_unsigned_integer = True
47-
48-
4968def to_integer_array (values ):
5069 """
5170 Parameters
@@ -61,13 +80,14 @@ def to_integer_array(values):
6180 TypeError if incompatible types
6281 """
6382 values = np .array (values , copy = False )
64- kind = 'UInt' if values .dtype .kind == 'u' else 'Int'
65- array_type = "{}{}Array" .format (kind , values .dtype .itemsize * 8 )
6683 try :
67- array_type = getattr (module , array_type )
68- except AttributeError :
84+ dtype = _dtypes [str (values .dtype )]
85+ except KeyError :
86+ if is_float_dtype (values ):
87+ return IntegerArray (values )
88+
6989 raise TypeError ("Incompatible dtype for {}" .format (values .dtype ))
70- return array_type (values , copy = False )
90+ return IntegerArray (values , dtype = dtype , copy = False )
7191
7292
7393def coerce_to_array (values , dtype , mask = None , copy = False ):
@@ -86,6 +106,14 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
86106 -------
87107 tuple of (values, mask)
88108 """
109+
110+ if isinstance (values , IntegerArray ):
111+ values , mask = values .data , values .mask
112+ if copy :
113+ values = values .copy ()
114+ mask = mask .copy ()
115+ return values , mask
116+
89117 values = np .array (values , copy = copy )
90118 if is_object_dtype (values ):
91119 inferred_type = infer_dtype (values )
@@ -112,14 +140,23 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
112140 if is_object_dtype (values ):
113141 mask |= isna (values )
114142
143+ # infer dtype if needed
144+ if dtype is None :
145+ if is_integer_dtype (values ):
146+ dtype = values .dtype
147+ else :
148+ dtype = np .dtype ('int64' )
149+ else :
150+ dtype = dtype .type
151+
115152 # we copy as need to coerce here
116153 if mask .any ():
117154 values = values .copy ()
118155 values [mask ] = 1
119156
120- values = values .astype (dtype . type )
157+ values = values .astype (dtype )
121158 else :
122- values = values .astype (dtype . type , copy = False )
159+ values = values .astype (dtype , copy = False )
123160
124161 return values , mask
125162
@@ -131,26 +168,30 @@ class IntegerArray(ExtensionArray):
131168 - mask: a boolean array holding a mask on the data, False is missing
132169 """
133170
134- dtype = None
171+ @cache_readonly
172+ def dtype (self ):
173+ return _dtypes [str (self .data .dtype )]
135174
136- def __init__ (self , values , mask = None , copy = False ):
175+ def __init__ (self , values , mask = None , dtype = None , copy = False ):
137176 self .data , self .mask = coerce_to_array (
138- values , dtype = self . dtype , mask = mask , copy = copy )
177+ values , dtype = dtype , mask = mask , copy = copy )
139178
140179 @classmethod
141- def _from_sequence (cls , scalars , mask = None , copy = False ):
142- return cls (scalars , mask = mask , copy = copy )
180+ def _from_sequence (cls , scalars , mask = None , dtype = None , copy = False ):
181+ return cls (scalars , mask = mask , dtype = dtype , copy = copy )
143182
144183 @classmethod
145184 def _from_factorized (cls , values , original ):
146- return cls (values )
185+ return cls (values , dtype = original . dtype )
147186
148187 def __getitem__ (self , item ):
149188 if is_integer (item ):
150189 if self .mask [item ]:
151190 return self .dtype .na_value
152191 return self .data [item ]
153- return type (self )(self .data [item ], mask = self .mask [item ])
192+ return type (self )(self .data [item ],
193+ mask = self .mask [item ],
194+ dtype = self .dtype )
154195
155196 def _coerce_to_ndarray (self ):
156197 """ coerce to an ndarary, preserving my scalar types """
@@ -205,12 +246,12 @@ def take(self, indexer, allow_fill=False, fill_value=None):
205246 result [fill_mask ] = fill_value
206247 mask = mask ^ fill_mask
207248
208- return self . _from_sequence (result , mask = mask )
249+ return type ( self ) (result , mask = mask , dtype = self . dtype )
209250
210251 def copy (self , deep = False ):
211252 if deep :
212253 return type (self )(
213- self .data .copy (), mask = self .mask .copy ())
254+ self .data .copy (), mask = self .mask .copy (), dtype = self . dtype )
214255 return type (self )(self )
215256
216257 def __setitem__ (self , key , value ):
@@ -230,11 +271,23 @@ def __len__(self):
230271 return len (self .data )
231272
232273 def __repr__ (self ):
274+ """
275+ Return a string representation for this object.
276+
277+ Invoked by unicode(df) in py2 only. Yields a Unicode String in both
278+ py2/py3.
279+ """
280+ klass = self .__class__ .__name__
281+ data = format_object_summary (self , default_pprint , False )
282+ attrs = format_object_attrs (self )
283+ space = " "
233284
234- formatted = self ._formatting_values ()
235- return '{}({})' .format (
236- self .__class__ .__name__ ,
237- formatted .tolist ())
285+ prepr = (u (",%s" ) %
286+ space ).join (u ("%s=%s" ) % (k , v ) for k , v in attrs )
287+
288+ res = u ("%s(%s%s)" ) % (klass , data , prepr )
289+
290+ return res
238291
239292 @property
240293 def nbytes (self ):
@@ -251,7 +304,7 @@ def _na_value(self):
251304 def _concat_same_type (cls , to_concat ):
252305 data = np .concatenate ([x .data for x in to_concat ])
253306 mask = np .concatenate ([x .mask for x in to_concat ])
254- return cls (data , mask = mask )
307+ return cls (data , mask = mask , dtype = to_concat [ 0 ]. dtype )
255308
256309 def astype (self , dtype , copy = True ):
257310 """Cast to a NumPy array with 'dtype'.
@@ -269,7 +322,22 @@ def astype(self, dtype, copy=True):
269322 -------
270323 array : ndarray
271324 NumPy ndarray with 'dtype' for its dtype.
325+
326+ Raises
327+ ------
328+ TypeError
329+ if incompatible type with an IntegerDtype, equivalent of same_kind
330+ casting
272331 """
332+
333+ # if we are astyping to an existing IntegerDtype we can fastpath
334+ if isinstance (dtype , IntegerDtype ):
335+ result = self .data .astype (dtype .numpy_dtype ,
336+ casting = 'same_kind' , copy = False )
337+ return type (self )(result , mask = self .mask ,
338+ dtype = dtype , copy = False )
339+
340+ # coerce
273341 data = self ._coerce_to_ndarray ()
274342 return data .astype (dtype = dtype , copy = False )
275343
@@ -412,56 +480,37 @@ def integer_arithmetic_method(self, other):
412480 if is_float_dtype (result ):
413481 mask |= (result == np .inf ) | (result == - np .inf )
414482
415- return cls (result , mask = mask )
483+ return cls (result , mask = mask , dtype = self . dtype , copy = False )
416484
417485 name = '__{name}__' .format (name = op .__name__ )
418486 return set_function_name (integer_arithmetic_method , name , cls )
419487
420488
421- class UnsignedIntegerArray ( IntegerArray ):
422- pass
489+ IntegerArray . _add_numeric_methods_binary ()
490+ IntegerArray . _add_comparison_methods_binary ()
423491
424492
425493module = sys .modules [__name__ ]
426494
427495
428496# create the Dtype
429- types = [( _integer_dtypes , IntegerDtype , _integer_formatter ),
430- ( _unsigned_dtypes , UnsignedIntegerDtype , _unsigned_formatter )]
431- for dtypes , superclass , formatter in types :
497+ _dtypes = {}
498+ for dtype in [ 'int8' , 'int16' , 'int32' , 'int64' ,
499+ 'uint8' , 'uint16' , 'uint32' , 'uint64' ] :
432500
433- for dtype in dtypes :
434-
435- name = formatter (dtype )
436- classname = "{}Dtype" .format (name )
437- attributes_dict = {'type' : getattr (np , dtype ),
438- 'name' : name }
439- dtype_type = type (classname , (superclass , ), attributes_dict )
440- setattr (module , classname , dtype_type )
441-
442- # register
443- registry .register (dtype_type )
444-
445-
446- # create the Array
447- types = [(_integer_dtypes , IntegerArray , _integer_formatter ),
448- (_unsigned_dtypes , UnsignedIntegerArray , _unsigned_formatter )]
449- for dtypes , superclass , formatter in types :
450-
451- for dtype in dtypes :
452-
453- dtype_type = getattr (module , "{}Dtype" .format (formatter (dtype )))
454- classname = "{}Array" .format (formatter (dtype ))
455- attributes_dict = {'dtype' : dtype_type ()}
456- array_type = type (classname , (superclass , ), attributes_dict )
457- setattr (module , classname , array_type )
458-
459- # add ops
460- array_type ._add_numeric_methods_binary ()
461- array_type ._add_comparison_methods_binary ()
462-
463- # set the Array type on the Dtype
464- dtype_type .array_type = array_type
501+ if dtype .startswith ('u' ):
502+ name = "U{}" .format (dtype [1 :].capitalize ())
503+ else :
504+ name = dtype .capitalize ()
505+ classname = "{}Dtype" .format (name )
506+ attributes_dict = {'type' : getattr (np , dtype ),
507+ 'name' : name }
508+ dtype_type = type (classname , (IntegerDtype , ), attributes_dict )
509+ setattr (module , classname , dtype_type )
510+
511+ # register
512+ registry .register (dtype_type )
513+ _dtypes [dtype ] = dtype_type ()
465514
466515
467516def make_data ():
0 commit comments