Skip to content
Merged
75 changes: 70 additions & 5 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,16 +292,81 @@ def test_is_monotonic(self, data, non_lexsorted_data):
assert c.is_monotonic_decreasing is False

def test_has_duplicates(self):

idx = CategoricalIndex([0, 0, 0], name="foo")
assert idx.is_unique is False
assert idx.has_duplicates is True

def test_drop_duplicates(self):
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
assert idx.is_unique is False
assert idx.has_duplicates is True

idx = CategoricalIndex([0, 0, 0], name="foo")
expected = CategoricalIndex([0], name="foo")
tm.assert_index_equal(idx.drop_duplicates(), expected)
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
assert idx.is_unique is True
assert idx.has_duplicates is False

@pytest.mark.parametrize(
"data, categories, expected",
[
(
[1, 1, 1],
[1, 2, 3],
{
"first": np.array([False, True, True]),
"last": np.array([True, True, False]),
False: np.array([True, True, True]),
},
),
(
[1, 1, 1],
list("abc"),
{
"first": np.array([False, True, True]),
"last": np.array([True, True, False]),
False: np.array([True, True, True]),
},
),
(
[2, "a", "b"],
list("abc"),
{
"first": np.zeros(shape=(3), dtype=np.bool),
"last": np.zeros(shape=(3), dtype=np.bool),
False: np.zeros(shape=(3), dtype=np.bool),
},
),
(
list("abb"),
list("abc"),
{
"first": np.array([False, False, True]),
"last": np.array([False, True, False]),
False: np.array([False, True, True]),
},
),
],
)
def test_drop_duplicates(self, data, categories, expected):

idx = CategoricalIndex(data, categories=categories, name="foo")
for keep, e in expected.items():
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
e = idx[~e]
result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, e)

@pytest.mark.parametrize(
"data, categories, expected_data, expected_categories",
[
([1, 1, 1], [1, 2, 3], [1], [1]),
([1, 1, 1], list("abc"), [np.nan], []),
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan], [1, 2]),
([2, "a", "b"], list("abc"), [np.nan, "a", "b"], ["a", "b"]),
],
)
def test_unique(self, data, categories, expected_data, expected_categories):

idx = CategoricalIndex(data, categories=categories)
expected = CategoricalIndex(expected_data, categories=expected_categories)
tm.assert_index_equal(idx.unique(), expected)

def test_repr_roundtrip(self):
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexes/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,12 @@ def sort(request):
in in the Index setops methods.
"""
return request.param


@pytest.fixture(params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"])
def freq_sample(request):
"""
Valid values for 'freq' parameter used to create date_range and
timedelta_range..
"""
return request.param
71 changes: 26 additions & 45 deletions pandas/tests/indexes/datetimes/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,9 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture)
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None

def test_drop_duplicates_metadata(self):
def test_drop_duplicates_metadata(self, freq_sample):
# GH 10115
idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
Expand All @@ -277,57 +277,38 @@ def test_drop_duplicates_metadata(self):
tm.assert_index_equal(idx, result)
assert result.freq is None

def test_drop_duplicates(self):
@pytest.mark.parametrize(
"keep, expected, index",
[
("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
(
False,
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
np.arange(5, 10),
),
],
)
def test_drop_duplicates(self, freq_sample, keep, expected, index):
# to check Index/Series compat
base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx = base.append(base[:5])
idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
idx = idx.append(idx[:5])

res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
expected = idx[~expected]

res = idx.drop_duplicates(keep="last")
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep="last")
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, expected)

res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
result = Series(idx).drop_duplicates(keep=keep)
tm.assert_series_equal(result, Series(expected, index=index))

@pytest.mark.parametrize(
"freq",
[
"A",
"2A",
"-2A",
"Q",
"-1Q",
"M",
"-1M",
"D",
"3D",
"-3D",
"W",
"-1W",
"H",
"2H",
"-2H",
"T",
"2T",
"S",
"-3S",
],
)
def test_infer_freq(self, freq):
def test_infer_freq(self, freq_sample):
# GH 11018
idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10)
idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
result = pd.DatetimeIndex(idx.asi8, freq="infer")
tm.assert_index_equal(idx, result)
assert result.freq == freq
assert result.freq == freq_sample

def test_nat(self, tz_naive_fixture):
tz = tz_naive_fixture
Expand Down
49 changes: 28 additions & 21 deletions pandas/tests/indexes/period/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@ def test_value_counts_unique(self):

tm.assert_index_equal(idx.unique(), exp_idx)

def test_drop_duplicates_metadata(self):
@pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
def test_drop_duplicates_metadata(self, freq):
# GH 10115
idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
Expand All @@ -93,26 +94,32 @@ def test_drop_duplicates_metadata(self):
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq

def test_drop_duplicates(self):
@pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
@pytest.mark.parametrize(
"keep, expected, index",
[
("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
(
False,
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
np.arange(5, 10),
),
],
)
def test_drop_duplicates(self, freq, keep, expected, index):
# to check Index/Series compat
base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx = base.append(base[:5])

res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))

res = idx.drop_duplicates(keep="last")
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep="last")
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))

res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
idx = idx.append(idx[:5])

tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
expected = idx[~expected]

result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, expected)

result = Series(idx).drop_duplicates(keep=keep)
tm.assert_series_equal(result, Series(expected, index=index))

def test_order_compat(self):
def _check_freq(index, expected_index):
Expand Down
50 changes: 26 additions & 24 deletions pandas/tests/indexes/timedeltas/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ def test_order(self):
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None

def test_drop_duplicates_metadata(self):
def test_drop_duplicates_metadata(self, freq_sample):
# GH 10115
idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
idx = pd.timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
Expand All @@ -147,36 +147,38 @@ def test_drop_duplicates_metadata(self):
tm.assert_index_equal(idx, result)
assert result.freq is None

def test_drop_duplicates(self):
@pytest.mark.parametrize(
"keep, expected, index",
[
("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
(
False,
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
np.arange(5, 10),
),
],
)
def test_drop_duplicates(self, freq_sample, keep, expected, index):
# to check Index/Series compat
base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
idx = base.append(base[:5])
idx = pd.timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
idx = idx.append(idx[:5])

res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
expected = idx[~expected]

res = idx.drop_duplicates(keep="last")
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep="last")
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, expected)

res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
result = Series(idx).drop_duplicates(keep=keep)
tm.assert_series_equal(result, Series(expected, index=index))

@pytest.mark.parametrize(
"freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
)
def test_infer_freq(self, freq):
def test_infer_freq(self, freq_sample):
# GH#11018
idx = pd.timedelta_range("1", freq=freq, periods=10)
idx = pd.timedelta_range("1", freq=freq_sample, periods=10)
result = pd.TimedeltaIndex(idx.asi8, freq="infer")
tm.assert_index_equal(idx, result)
assert result.freq == freq
assert result.freq == freq_sample

def test_repeat(self):
index = pd.timedelta_range("1 days", periods=2, freq="D")
Expand Down