1+ from datetime import datetime , timedelta
2+ from importlib import reload
3+ import string
4+ import sys
5+
16import numpy as np
27import pytest
38
4- from pandas import NA , Interval , Series , Timestamp , date_range
9+ from pandas ._libs .tslibs import iNaT
10+
11+ from pandas import (
12+ NA ,
13+ Categorical ,
14+ CategoricalDtype ,
15+ Index ,
16+ Interval ,
17+ Series ,
18+ Timedelta ,
19+ Timestamp ,
20+ date_range ,
21+ )
522import pandas ._testing as tm
623
724
25+ class TestAstypeAPI :
26+ def test_arg_for_errors_in_astype (self ):
27+ # see GH#14878
28+ ser = Series ([1 , 2 , 3 ])
29+
30+ msg = (
31+ r"Expected value of kwarg 'errors' to be one of \['raise', "
32+ r"'ignore'\]\. Supplied value is 'False'"
33+ )
34+ with pytest .raises (ValueError , match = msg ):
35+ ser .astype (np .float64 , errors = False )
36+
37+ ser .astype (np .int8 , errors = "raise" )
38+
39+ @pytest .mark .parametrize ("dtype_class" , [dict , Series ])
40+ def test_astype_dict_like (self , dtype_class ):
41+ # see GH#7271
42+ ser = Series (range (0 , 10 , 2 ), name = "abc" )
43+
44+ dt1 = dtype_class ({"abc" : str })
45+ result = ser .astype (dt1 )
46+ expected = Series (["0" , "2" , "4" , "6" , "8" ], name = "abc" )
47+ tm .assert_series_equal (result , expected )
48+
49+ dt2 = dtype_class ({"abc" : "float64" })
50+ result = ser .astype (dt2 )
51+ expected = Series ([0.0 , 2.0 , 4.0 , 6.0 , 8.0 ], dtype = "float64" , name = "abc" )
52+ tm .assert_series_equal (result , expected )
53+
54+ dt3 = dtype_class ({"abc" : str , "def" : str })
55+ msg = (
56+ "Only the Series name can be used for the key in Series dtype "
57+ r"mappings\."
58+ )
59+ with pytest .raises (KeyError , match = msg ):
60+ ser .astype (dt3 )
61+
62+ dt4 = dtype_class ({0 : str })
63+ with pytest .raises (KeyError , match = msg ):
64+ ser .astype (dt4 )
65+
66+ # GH#16717
67+ # if dtypes provided is empty, it should error
68+ if dtype_class is Series :
69+ dt5 = dtype_class ({}, dtype = object )
70+ else :
71+ dt5 = dtype_class ({})
72+
73+ with pytest .raises (KeyError , match = msg ):
74+ ser .astype (dt5 )
75+
76+
877class TestAstype :
78+ @pytest .mark .parametrize ("dtype" , [np .datetime64 , np .timedelta64 ])
79+ def test_astype_generic_timestamp_no_frequency (self , dtype , request ):
80+ # see GH#15524, GH#15987
81+ data = [1 ]
82+ s = Series (data )
83+
84+ if np .dtype (dtype ).name not in ["timedelta64" , "datetime64" ]:
85+ mark = pytest .mark .xfail (reason = "GH#33890 Is assigned ns unit" )
86+ request .node .add_marker (mark )
87+
88+ msg = (
89+ fr"The '{ dtype .__name__ } ' dtype has no unit\. "
90+ fr"Please pass in '{ dtype .__name__ } \[ns\]' instead."
91+ )
92+ with pytest .raises (ValueError , match = msg ):
93+ s .astype (dtype )
94+
995 def test_astype_dt64_to_str (self ):
1096 # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
1197 dti = date_range ("2012-01-01" , periods = 3 )
@@ -27,6 +113,87 @@ def test_astype_dt64tz_to_str(self):
27113 )
28114 tm .assert_series_equal (result , expected )
29115
116+ def test_astype_datetime (self ):
117+ s = Series (iNaT , dtype = "M8[ns]" , index = range (5 ))
118+
119+ s = s .astype ("O" )
120+ assert s .dtype == np .object_
121+
122+ s = Series ([datetime (2001 , 1 , 2 , 0 , 0 )])
123+
124+ s = s .astype ("O" )
125+ assert s .dtype == np .object_
126+
127+ s = Series ([datetime (2001 , 1 , 2 , 0 , 0 ) for i in range (3 )])
128+
129+ s [1 ] = np .nan
130+ assert s .dtype == "M8[ns]"
131+
132+ s = s .astype ("O" )
133+ assert s .dtype == np .object_
134+
135+ def test_astype_datetime64tz (self ):
136+ s = Series (date_range ("20130101" , periods = 3 , tz = "US/Eastern" ))
137+
138+ # astype
139+ result = s .astype (object )
140+ expected = Series (s .astype (object ), dtype = object )
141+ tm .assert_series_equal (result , expected )
142+
143+ result = Series (s .values ).dt .tz_localize ("UTC" ).dt .tz_convert (s .dt .tz )
144+ tm .assert_series_equal (result , s )
145+
146+ # astype - object, preserves on construction
147+ result = Series (s .astype (object ))
148+ expected = s .astype (object )
149+ tm .assert_series_equal (result , expected )
150+
151+ # astype - datetime64[ns, tz]
152+ result = Series (s .values ).astype ("datetime64[ns, US/Eastern]" )
153+ tm .assert_series_equal (result , s )
154+
155+ result = Series (s .values ).astype (s .dtype )
156+ tm .assert_series_equal (result , s )
157+
158+ result = s .astype ("datetime64[ns, CET]" )
159+ expected = Series (date_range ("20130101 06:00:00" , periods = 3 , tz = "CET" ))
160+ tm .assert_series_equal (result , expected )
161+
162+ def test_astype_str_cast_dt64 (self ):
163+ # see GH#9757
164+ ts = Series ([Timestamp ("2010-01-04 00:00:00" )])
165+ s = ts .astype (str )
166+
167+ expected = Series (["2010-01-04" ])
168+ tm .assert_series_equal (s , expected )
169+
170+ ts = Series ([Timestamp ("2010-01-04 00:00:00" , tz = "US/Eastern" )])
171+ s = ts .astype (str )
172+
173+ expected = Series (["2010-01-04 00:00:00-05:00" ])
174+ tm .assert_series_equal (s , expected )
175+
176+ def test_astype_str_cast_td64 (self ):
177+ # see GH#9757
178+
179+ td = Series ([Timedelta (1 , unit = "d" )])
180+ ser = td .astype (str )
181+
182+ expected = Series (["1 days" ])
183+ tm .assert_series_equal (ser , expected )
184+
185+ def test_dt64_series_astype_object (self ):
186+ dt64ser = Series (date_range ("20130101" , periods = 3 ))
187+ result = dt64ser .astype (object )
188+ assert isinstance (result .iloc [0 ], datetime )
189+ assert result .dtype == np .object_
190+
191+ def test_td64_series_astype_object (self ):
192+ tdser = Series (["59 Days" , "59 Days" , "NaT" ], dtype = "timedelta64[ns]" )
193+ result = tdser .astype (object )
194+ assert isinstance (result .iloc [0 ], timedelta )
195+ assert result .dtype == np .object_
196+
30197 @pytest .mark .parametrize (
31198 "values" ,
32199 [
@@ -70,3 +237,122 @@ def test_astype_to_str_preserves_na(self, value, string_value):
70237 result = s .astype (str )
71238 expected = Series (["a" , "b" , string_value ], dtype = object )
72239 tm .assert_series_equal (result , expected )
240+
241+ @pytest .mark .parametrize ("dtype" , ["float32" , "float64" , "int64" , "int32" ])
242+ def test_astype (self , dtype ):
243+ s = Series (np .random .randn (5 ), name = "foo" )
244+ as_typed = s .astype (dtype )
245+
246+ assert as_typed .dtype == dtype
247+ assert as_typed .name == s .name
248+
249+ @pytest .mark .parametrize ("value" , [np .nan , np .inf ])
250+ @pytest .mark .parametrize ("dtype" , [np .int32 , np .int64 ])
251+ def test_astype_cast_nan_inf_int (self , dtype , value ):
252+ # gh-14265: check NaN and inf raise error when converting to int
253+ msg = "Cannot convert non-finite values \\ (NA or inf\\ ) to integer"
254+ s = Series ([value ])
255+
256+ with pytest .raises (ValueError , match = msg ):
257+ s .astype (dtype )
258+
259+ @pytest .mark .parametrize ("dtype" , [int , np .int8 , np .int64 ])
260+ def test_astype_cast_object_int_fail (self , dtype ):
261+ arr = Series (["car" , "house" , "tree" , "1" ])
262+ msg = r"invalid literal for int\(\) with base 10: 'car'"
263+ with pytest .raises (ValueError , match = msg ):
264+ arr .astype (dtype )
265+
266+ def test_astype_cast_object_int (self ):
267+ arr = Series (["1" , "2" , "3" , "4" ], dtype = object )
268+ result = arr .astype (int )
269+
270+ tm .assert_series_equal (result , Series (np .arange (1 , 5 )))
271+
272+ def test_astype_unicode (self ):
273+ # see GH#7758: A bit of magic is required to set
274+ # default encoding to utf-8
275+ digits = string .digits
276+ test_series = [
277+ Series ([digits * 10 , tm .rands (63 ), tm .rands (64 ), tm .rands (1000 )]),
278+ Series (["データーサイエンス、お前はもう死んでいる" ]),
279+ ]
280+
281+ former_encoding = None
282+
283+ if sys .getdefaultencoding () == "utf-8" :
284+ test_series .append (Series (["野菜食べないとやばい" .encode ()]))
285+
286+ for s in test_series :
287+ res = s .astype ("unicode" )
288+ expec = s .map (str )
289+ tm .assert_series_equal (res , expec )
290+
291+ # Restore the former encoding
292+ if former_encoding is not None and former_encoding != "utf-8" :
293+ reload (sys )
294+ sys .setdefaultencoding (former_encoding )
295+
296+
297+ class TestAstypeCategorical :
298+ def test_astype_categoricaldtype (self ):
299+ s = Series (["a" , "b" , "a" ])
300+ result = s .astype (CategoricalDtype (["a" , "b" ], ordered = True ))
301+ expected = Series (Categorical (["a" , "b" , "a" ], ordered = True ))
302+ tm .assert_series_equal (result , expected )
303+
304+ result = s .astype (CategoricalDtype (["a" , "b" ], ordered = False ))
305+ expected = Series (Categorical (["a" , "b" , "a" ], ordered = False ))
306+ tm .assert_series_equal (result , expected )
307+
308+ result = s .astype (CategoricalDtype (["a" , "b" , "c" ], ordered = False ))
309+ expected = Series (
310+ Categorical (["a" , "b" , "a" ], categories = ["a" , "b" , "c" ], ordered = False )
311+ )
312+ tm .assert_series_equal (result , expected )
313+ tm .assert_index_equal (result .cat .categories , Index (["a" , "b" , "c" ]))
314+
315+ @pytest .mark .parametrize ("name" , [None , "foo" ])
316+ @pytest .mark .parametrize ("dtype_ordered" , [True , False ])
317+ @pytest .mark .parametrize ("series_ordered" , [True , False ])
318+ def test_astype_categorical_to_categorical (
319+ self , name , dtype_ordered , series_ordered
320+ ):
321+ # GH#10696, GH#18593
322+ s_data = list ("abcaacbab" )
323+ s_dtype = CategoricalDtype (list ("bac" ), ordered = series_ordered )
324+ s = Series (s_data , dtype = s_dtype , name = name )
325+
326+ # unspecified categories
327+ dtype = CategoricalDtype (ordered = dtype_ordered )
328+ result = s .astype (dtype )
329+ exp_dtype = CategoricalDtype (s_dtype .categories , dtype_ordered )
330+ expected = Series (s_data , name = name , dtype = exp_dtype )
331+ tm .assert_series_equal (result , expected )
332+
333+ # different categories
334+ dtype = CategoricalDtype (list ("adc" ), dtype_ordered )
335+ result = s .astype (dtype )
336+ expected = Series (s_data , name = name , dtype = dtype )
337+ tm .assert_series_equal (result , expected )
338+
339+ if dtype_ordered is False :
340+ # not specifying ordered, so only test once
341+ expected = s
342+ result = s .astype ("category" )
343+ tm .assert_series_equal (result , expected )
344+
345+ def test_astype_bool_missing_to_categorical (self ):
346+ # GH-19182
347+ s = Series ([True , False , np .nan ])
348+ assert s .dtypes == np .object_
349+
350+ result = s .astype (CategoricalDtype (categories = [True , False ]))
351+ expected = Series (Categorical ([True , False , np .nan ], categories = [True , False ]))
352+ tm .assert_series_equal (result , expected )
353+
354+ def test_astype_categories_raises (self ):
355+ # deprecated GH#17636, removed in GH#27141
356+ s = Series (["a" , "b" , "a" ])
357+ with pytest .raises (TypeError , match = "got an unexpected" ):
358+ s .astype ("category" , categories = ["a" , "b" ], ordered = True )
0 commit comments