33constructors before passing them to a BlockManager.
44"""
55from collections import abc
6- from typing import Tuple
6+ from typing import Dict , List , Optional , Tuple , Union
77
88import numpy as np
99import numpy .ma as ma
1010
1111from pandas ._libs import lib
12+ from pandas ._typing import Axis , Dtype , Scalar
1213
1314from pandas .core .dtypes .cast import (
1415 construct_1d_arraylike_from_scalar ,
@@ -522,29 +523,38 @@ def to_arrays(data, columns, coerce_float=False, dtype=None):
522523 return _list_to_arrays (data , columns , coerce_float = coerce_float , dtype = dtype )
523524
524525
525- def _list_to_arrays (data , columns , coerce_float = False , dtype = None ):
526+ def _list_to_arrays (
527+ data : List [Scalar ],
528+ columns : Union [Index , List ],
529+ coerce_float : bool = False ,
530+ dtype : Optional [Dtype ] = None ,
531+ ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
526532 if len (data ) > 0 and isinstance (data [0 ], tuple ):
527533 content = list (lib .to_object_array_tuples (data ).T )
528534 else :
529535 # list of lists
530536 content = list (lib .to_object_array (data ).T )
531537 # gh-26429 do not raise user-facing AssertionError
532538 try :
533- result = _convert_object_array (
534- content , columns , dtype = dtype , coerce_float = coerce_float
535- )
539+ columns = _validate_or_indexify_columns (content , columns )
540+ result = _convert_object_array (content , dtype = dtype , coerce_float = coerce_float )
536541 except AssertionError as e :
537542 raise ValueError (e ) from e
538- return result
543+ return result , columns
539544
540545
541- def _list_of_series_to_arrays (data , columns , coerce_float = False , dtype = None ):
546+ def _list_of_series_to_arrays (
547+ data : List ,
548+ columns : Union [Index , List ],
549+ coerce_float : bool = False ,
550+ dtype : Optional [Dtype ] = None ,
551+ ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
542552 if columns is None :
543553 # We know pass_data is non-empty because data[0] is a Series
544554 pass_data = [x for x in data if isinstance (x , (ABCSeries , ABCDataFrame ))]
545555 columns = get_objs_combined_axis (pass_data , sort = False )
546556
547- indexer_cache = {}
557+ indexer_cache : Dict [ int , Scalar ] = {}
548558
549559 aligned_values = []
550560 for s in data :
@@ -564,14 +574,19 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
564574
565575 if values .dtype == np .object_ :
566576 content = list (values .T )
567- return _convert_object_array (
568- content , columns , dtype = dtype , coerce_float = coerce_float
569- )
577+ columns = _validate_or_indexify_columns ( content , columns )
578+ content = _convert_object_array ( content , dtype = dtype , coerce_float = coerce_float )
579+ return content , columns
570580 else :
571581 return values .T , columns
572582
573583
574- def _list_of_dict_to_arrays (data , columns , coerce_float = False , dtype = None ):
584+ def _list_of_dict_to_arrays (
585+ data : List ,
586+ columns : Union [Index , List ],
587+ coerce_float : bool = False ,
588+ dtype : Optional [Dtype ] = None ,
589+ ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
575590 """
576591 Convert list of dicts to numpy arrays
577592
@@ -603,22 +618,85 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None):
603618 data = [(type (d ) is dict ) and d or dict (d ) for d in data ]
604619
605620 content = list (lib .dicts_to_array (data , list (columns )).T )
606- return _convert_object_array (
607- content , columns , dtype = dtype , coerce_float = coerce_float
608- )
621+ columns = _validate_or_indexify_columns ( content , columns )
622+ content = _convert_object_array ( content , dtype = dtype , coerce_float = coerce_float )
623+ return content , columns
609624
610625
611- def _convert_object_array (content , columns , coerce_float = False , dtype = None ):
626+ def _validate_or_indexify_columns (
627+ content : List , columns : Union [Index , List , None ]
628+ ) -> Union [Index , List [Axis ]]:
629+ """
630+ If columns is None, make numbers as column names; Otherwise, validate that
631+ columns have valid length.
632+
633+ Parameters
634+ ----------
635+ content: list of data
636+ columns: Iterable or None
637+
638+ Returns
639+ -------
640+ columns: If columns is Iterable, return as is; If columns is None, assign
641+ positional column index value as columns.
642+
643+ Raises
644+ ------
645+ 1. AssertionError when content is not composed of list of lists, and if
646+ length of columns is not equal to length of content.
647+ 2. ValueError when content is list of lists, but length of each sub-list
648+ is not equal
649+ 3. ValueError when content is list of lists, but length of sub-list is
650+ not equal to length of content
651+ """
612652 if columns is None :
613653 columns = ibase .default_index (len (content ))
614654 else :
615- if len (columns ) != len (content ): # pragma: no cover
655+
656+ # Add mask for data which is composed of list of lists
657+ is_mi_list = isinstance (columns , list ) and all (
658+ isinstance (col , list ) for col in columns
659+ )
660+
661+ if not is_mi_list and len (columns ) != len (content ): # pragma: no cover
616662 # caller's responsibility to check for this...
617663 raise AssertionError (
618664 f"{ len (columns )} columns passed, passed data had "
619665 f"{ len (content )} columns"
620666 )
667+ elif is_mi_list :
668+
669+ # check if nested list column, length of each sub-list should be equal
670+ if len ({len (col ) for col in columns }) > 1 :
671+ raise ValueError (
672+ "Length of columns passed for MultiIndex columns is different"
673+ )
674+
675+ # if columns is not empty and length of sublist is not equal to content
676+ elif columns and len (columns [0 ]) != len (content ):
677+ raise ValueError (
678+ f"{ len (columns [0 ])} columns passed, passed data had "
679+ f"{ len (content )} columns"
680+ )
681+ return columns
682+
683+
684+ def _convert_object_array (
685+ content : List [Scalar ], coerce_float : bool = False , dtype : Optional [Dtype ] = None
686+ ) -> List [Scalar ]:
687+ """
688+ Internal function ot convert object array.
689+
690+ Parameters
691+ ----------
692+ content: list of processed data records
693+ coerce_float: bool, to coerce floats or not, default is False
694+ dtype: np.dtype, default is None
621695
696+ Returns
697+ -------
698+ arrays: casted content if not object dtype, otherwise return as is in list.
699+ """
622700 # provide soft conversion of object dtypes
623701 def convert (arr ):
624702 if dtype != object and dtype != np .object :
@@ -628,7 +706,7 @@ def convert(arr):
628706
629707 arrays = [convert (arr ) for arr in content ]
630708
631- return arrays , columns
709+ return arrays
632710
633711
634712# ---------------------------------------------------------------------
0 commit comments