diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index b94abe4e2e6..efd6c892e7c 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -466,11 +466,20 @@ def _get_index_level(df, name): return df.index.get_level_values(key) +def _level_name(name): + # preserve type when default serializable, otherwise str it + try: + json.dumps(name) + return name + except TypeError: + return str(name) + + def _get_range_index_descriptor(level): # public start/stop/step attributes added in pandas 0.25.0 return { 'kind': 'range', - 'name': level.name, + 'name': _level_name(level.name), 'start': _pandas_api.get_rangeindex_attribute(level, 'start'), 'stop': _pandas_api.get_rangeindex_attribute(level, 'stop'), 'step': _pandas_api.get_rangeindex_attribute(level, 'step') diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 5de84b30432..2165627ad08 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -4045,6 +4045,20 @@ def test_metadata_compat_missing_field_name(): tm.assert_frame_equal(result, expected, check_like=True) +def test_metadata_index_name_not_json_serializable(): + name = np.int64(6) # not json serializable by default + table = pa.table(pd.DataFrame(index=pd.RangeIndex(0, 4, name=name))) + metadata = table.schema.pandas_metadata + assert metadata['index_columns'][0]['name'] == '6' + + +def test_metadata_index_name_is_json_serializable(): + name = 6 # json serializable by default + table = pa.table(pd.DataFrame(index=pd.RangeIndex(0, 4, name=name))) + metadata = table.schema.pandas_metadata + assert metadata['index_columns'][0]['name'] == 6 + + def make_df_with_timestamps(): # Some of the milliseconds timestamps deliberately don't fit in the range # that is possible with nanosecond timestamps.