2424from pandas .core import common as com
2525
2626if TYPE_CHECKING :
27+ from collections .abc import Generator
28+
2729 from pandas ._typing import MutableMappingT
2830
2931 from pandas import DataFrame
3032
3133
34+ def create_data_for_split (
35+ df : DataFrame , are_all_object_dtype_cols : bool , object_dtype_indices : list [int ]
36+ ) -> Generator [list , None , None ]:
37+ """
38+ Simple helper method to create data for to ``to_dict(orient="split")``
39+ to create the main output data
40+ """
41+ if are_all_object_dtype_cols :
42+ for tup in df .itertuples (index = False , name = None ):
43+ yield list (map (maybe_box_native , tup ))
44+ else :
45+ for tup in df .itertuples (index = False , name = None ):
46+ data = list (tup )
47+ if object_dtype_indices :
48+ # If we have object_dtype_cols, apply maybe_box_naive after
49+ # for perf
50+ for i in object_dtype_indices :
51+ data [i ] = maybe_box_native (data [i ])
52+ yield data
53+
54+
3255@overload
3356def to_dict (
3457 df : DataFrame ,
@@ -152,35 +175,38 @@ def to_dict(
152175 # GH46470 Return quickly if orient series to avoid creating dtype objects
153176 return into_c ((k , v ) for k , v in df .items ())
154177
178+ if orient == "dict" :
179+ return into_c ((k , v .to_dict (into = into )) for k , v in df .items ())
180+
155181 box_native_indices = [
156182 i
157183 for i , col_dtype in enumerate (df .dtypes .values )
158184 if col_dtype == np .dtype (object ) or isinstance (col_dtype , ExtensionDtype )
159185 ]
160- box_na_values = [
161- lib .no_default if not isinstance (col_dtype , BaseMaskedDtype ) else libmissing .NA
162- for i , col_dtype in enumerate (df .dtypes .values )
163- ]
164- are_all_object_dtype_cols = len (box_native_indices ) == len (df .dtypes )
165186
166- if orient == "dict" :
167- return into_c ((k , v .to_dict (into = into )) for k , v in df .items ())
187+ are_all_object_dtype_cols = len (box_native_indices ) == len (df .dtypes )
168188
169- elif orient == "list" :
189+ if orient == "list" :
170190 object_dtype_indices_as_set : set [int ] = set (box_native_indices )
191+ box_na_values = (
192+ lib .no_default
193+ if not isinstance (col_dtype , BaseMaskedDtype )
194+ else libmissing .NA
195+ for col_dtype in df .dtypes .values
196+ )
171197 return into_c (
172198 (
173199 k ,
174- list (map (maybe_box_native , v .to_numpy (na_value = box_na_values [ i ] )))
200+ list (map (maybe_box_native , v .to_numpy (na_value = box_na_value )))
175201 if i in object_dtype_indices_as_set
176202 else list (map (maybe_box_native , v .to_numpy ())),
177203 )
178- for i , (k , v ) in enumerate (df .items ())
204+ for i , (box_na_value , ( k , v )) in enumerate (zip ( box_na_values , df .items () ))
179205 )
180206
181207 elif orient == "split" :
182- data = df . _create_data_for_split_and_tight_to_dict (
183- are_all_object_dtype_cols , box_native_indices
208+ data = list (
209+ create_data_for_split ( df , are_all_object_dtype_cols , box_native_indices )
184210 )
185211
186212 return into_c (
@@ -192,10 +218,6 @@ def to_dict(
192218 )
193219
194220 elif orient == "tight" :
195- data = df ._create_data_for_split_and_tight_to_dict (
196- are_all_object_dtype_cols , box_native_indices
197- )
198-
199221 return into_c (
200222 ((("index" , df .index .tolist ()),) if index else ())
201223 + (
@@ -215,11 +237,9 @@ def to_dict(
215237 elif orient == "records" :
216238 columns = df .columns .tolist ()
217239 if are_all_object_dtype_cols :
218- rows = (
219- dict (zip (columns , row )) for row in df .itertuples (index = False , name = None )
220- )
221240 return [
222- into_c ((k , maybe_box_native (v )) for k , v in row .items ()) for row in rows
241+ into_c (zip (columns , map (maybe_box_native , row )))
242+ for row in df .itertuples (index = False , name = None )
223243 ]
224244 else :
225245 data = [
@@ -235,7 +255,7 @@ def to_dict(
235255 for row in data :
236256 for col in object_dtype_cols :
237257 row [col ] = maybe_box_native (row [col ])
238- return data
258+ return data # type: ignore[return-value]
239259
240260 elif orient == "index" :
241261 if not df .index .is_unique :
@@ -248,24 +268,21 @@ def to_dict(
248268 )
249269 elif box_native_indices :
250270 object_dtype_indices_as_set = set (box_native_indices )
251- is_object_dtype_by_index = [
252- i in object_dtype_indices_as_set for i in range (len (df .columns ))
253- ]
254271 return into_c (
255272 (
256273 t [0 ],
257274 {
258- columns [ i ] : maybe_box_native (v )
259- if is_object_dtype_by_index [ i ]
275+ column : maybe_box_native (v )
276+ if i in object_dtype_indices_as_set
260277 else v
261- for i , v in enumerate (t [1 :])
278+ for i , ( column , v ) in enumerate (zip ( columns , t [1 :]) )
262279 },
263280 )
264281 for t in df .itertuples (name = None )
265282 )
266283 else :
267284 return into_c (
268- (t [0 ], dict (zip (df . columns , t [1 :]))) for t in df .itertuples (name = None )
285+ (t [0 ], dict (zip (columns , t [1 :]))) for t in df .itertuples (name = None )
269286 )
270287
271288 else :
0 commit comments