diff --git a/environment_unix.yml b/environment_unix.yml index 22176c72e9..535f15046a 100644 --- a/environment_unix.yml +++ b/environment_unix.yml @@ -56,7 +56,7 @@ dependencies: - python =3.10 # Python 3.11 requires protobuf>=4 - packaging - numpy - - pandas < 2 + - pandas - pytest - boto3 - werkzeug diff --git a/python/arcticdb/version_store/_normalization.py b/python/arcticdb/version_store/_normalization.py index dd0148260f..6de7c53b9f 100644 --- a/python/arcticdb/version_store/_normalization.py +++ b/python/arcticdb/version_store/_normalization.py @@ -726,10 +726,10 @@ def denormalize(self, item, norm_meta): if key in data: category_info = list(norm_meta.common.categories[key].category) codes = data[key] - # `pd.Categorical.from_codes` from `pandas~=0.25.x` (pandas' supported version for python 3.6) - # does not support `codes` of `dtype=object`: it has to have an integral dtype. - # See: https://github.com/pandas-dev/pandas/blob/0.25.x/pandas/core/arrays/categorical.py#L688-L704 if IS_PANDAS_ZERO: + # `pd.Categorical.from_codes` from `pandas~=0.25.x` (pandas' supported version for python 3.6) + # does not support `codes` of `dtype=object`: it has to have an integral dtype. + # See: https://github.com/pandas-dev/pandas/blob/0.25.x/pandas/core/arrays/categorical.py#L688-L704 codes = np.asarray(codes, dtype=int) df[key] = pd.Categorical.from_codes(codes=codes, categories=category_info) for key in norm_meta.common.int_categories: diff --git a/python/tests/scripts/test_update_storage.py b/python/tests/scripts/test_update_storage.py index f93b01345b..c9c2d3a6f2 100644 --- a/python/tests/scripts/test_update_storage.py +++ b/python/tests/scripts/test_update_storage.py @@ -2,13 +2,13 @@ import pytest import pandas as pd -from pandas.util.testing import assert_frame_equal from arcticdb import Arctic from arcticdb.scripts.update_storage import run from arcticdb.options import LibraryOptions from arcticc.pb2.s3_storage_pb2 import Config as S3Config from arcticc.pb2.azure_storage_pb2 import Config as AzureConfig +from arcticdb.util.test import assert_frame_equal from arcticdb.adapters.s3_library_adapter import USE_AWS_CRED_PROVIDERS_TOKEN from arcticdb_ext.tools import AZURE_SUPPORT diff --git a/python/tests/stress/arcticdb/version_store/test_stress_multicolumn.py b/python/tests/stress/arcticdb/version_store/test_stress_multicolumn.py index f08c9c5d3d..086366f046 100644 --- a/python/tests/stress/arcticdb/version_store/test_stress_multicolumn.py +++ b/python/tests/stress/arcticdb/version_store/test_stress_multicolumn.py @@ -10,10 +10,8 @@ import pandas as pd from pandas.tseries.offsets import MonthBegin import pytest -import sys from arcticdb.util.test import assert_frame_equal -from arcticdb.util._versions import IS_PANDAS_TWO from arcticdb_ext.tools import AZURE_SUPPORT @@ -82,9 +80,7 @@ def test_stress_multicolumn(lib_type, request): output_df = lib.read(name).data print("reading from arctic native: {}".format(pd.Timestamp("now") - now)) - if IS_PANDAS_TWO and test_data.empty: - # In Pandas 2.0, RangeIndex is used by default when an empty dataframe or series is created. - # The index has to be converted to a DatetimeIndex by ArcticDB to perform updates. - test_data.index = test_data.index.astype("datetime64[ns]") - - assert_frame_equal(test_data, output_df) + # ArcticDB stores empty columns under a dedicated `EMPTYVAL` type, so the types are not going + # to match with pandas until the first append. + is_not_empty = not test_data.empty + assert_frame_equal(test_data, output_df, check_dtype=is_not_empty, check_index_type=is_not_empty) diff --git a/setup.cfg b/setup.cfg index c180c7ce4a..d010b51816 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ install_requires = # # See: https://conda-forge.org/feedstock-outputs/ numpy - pandas <2 + pandas attrs dataclasses ; python_version < '3.7' grpcio-tools