|
30 | 30 | from pandas._config import ( |
31 | 31 | config, |
32 | 32 | get_option, |
| 33 | + using_pyarrow_string_dtype, |
33 | 34 | ) |
34 | 35 |
|
35 | 36 | from pandas._libs import ( |
36 | 37 | lib, |
37 | 38 | writers as libwriters, |
38 | 39 | ) |
| 40 | +from pandas._libs.lib import is_string_array |
39 | 41 | from pandas._libs.tslibs import timezones |
40 | 42 | from pandas.compat._optional import import_optional_dependency |
41 | 43 | from pandas.compat.pickle_compat import patch_pickle |
|
66 | 68 | ) |
67 | 69 | from pandas.core.dtypes.missing import array_equivalent |
68 | 70 |
|
| 71 | +import pandas as pd |
69 | 72 | from pandas import ( |
70 | 73 | DataFrame, |
71 | 74 | DatetimeIndex, |
@@ -3219,7 +3222,12 @@ def read( |
3219 | 3222 | self.validate_read(columns, where) |
3220 | 3223 | index = self.read_index("index", start=start, stop=stop) |
3221 | 3224 | values = self.read_array("values", start=start, stop=stop) |
3222 | | - return Series(values, index=index, name=self.name, copy=False) |
| 3225 | + result = Series(values, index=index, name=self.name, copy=False) |
| 3226 | + if using_pyarrow_string_dtype() and is_string_array(values, skipna=True): |
| 3227 | + import pyarrow as pa |
| 3228 | + |
| 3229 | + result = result.astype(pd.ArrowDtype(pa.string())) |
| 3230 | + return result |
3223 | 3231 |
|
3224 | 3232 | # error: Signature of "write" incompatible with supertype "Fixed" |
3225 | 3233 | def write(self, obj, **kwargs) -> None: # type: ignore[override] |
@@ -3287,6 +3295,10 @@ def read( |
3287 | 3295 |
|
3288 | 3296 | columns = items[items.get_indexer(blk_items)] |
3289 | 3297 | df = DataFrame(values.T, columns=columns, index=axes[1], copy=False) |
| 3298 | + if using_pyarrow_string_dtype() and is_string_array(values, skipna=True): |
| 3299 | + import pyarrow as pa |
| 3300 | + |
| 3301 | + df = df.astype(pd.ArrowDtype(pa.string())) |
3290 | 3302 | dfs.append(df) |
3291 | 3303 |
|
3292 | 3304 | if len(dfs) > 0: |
@@ -4668,7 +4680,15 @@ def read( |
4668 | 4680 | else: |
4669 | 4681 | # Categorical |
4670 | 4682 | df = DataFrame._from_arrays([values], columns=cols_, index=index_) |
4671 | | - assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) |
| 4683 | + if not (using_pyarrow_string_dtype() and values.dtype.kind == "O"): |
| 4684 | + assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) |
| 4685 | + if using_pyarrow_string_dtype() and is_string_array( |
| 4686 | + values, # type: ignore[arg-type] |
| 4687 | + skipna=True, |
| 4688 | + ): |
| 4689 | + import pyarrow as pa |
| 4690 | + |
| 4691 | + df = df.astype(pd.ArrowDtype(pa.string())) |
4672 | 4692 | frames.append(df) |
4673 | 4693 |
|
4674 | 4694 | if len(frames) == 1: |
|
0 commit comments