44import numpy as np
55import pytest
66
7+ from pandas ._config import using_string_dtype
8+
9+ from pandas .compat import HAS_PYARROW
710from pandas .errors import PerformanceWarning
811import pandas .util ._test_decorators as td
912
@@ -167,7 +170,16 @@ def test_contains_na_kwarg_for_nullable_string_dtype(
167170 # https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416
168171
169172 values = Series (["a" , "b" , "c" , "a" , np .nan ], dtype = nullable_string_dtype )
170- result = values .str .contains ("a" , na = na , regex = regex )
173+
174+ msg = (
175+ "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
176+ "will raise in a future version"
177+ )
178+ warn = None
179+ if not pd .isna (na ) and not isinstance (na , bool ):
180+ warn = FutureWarning
181+ with tm .assert_produces_warning (warn , match = msg ):
182+ result = values .str .contains ("a" , na = na , regex = regex )
171183 expected = Series ([True , False , False , True , expected ], dtype = "boolean" )
172184 tm .assert_series_equal (result , expected )
173185
@@ -233,6 +245,7 @@ def test_contains_nan(any_string_dtype):
233245 expected = Series ([True , True , True ], dtype = expected_dtype )
234246 tm .assert_series_equal (result , expected )
235247
248+ # TODO(infer_string)
236249 # this particular combination of events is broken on 2.3
237250 # would require cherry picking #58483, which in turn requires #57481
238251 # which introduce many behavioral changes
@@ -241,14 +254,19 @@ def test_contains_nan(any_string_dtype):
241254 and any_string_dtype .storage == "python"
242255 and any_string_dtype .na_value is np .nan
243256 ):
244- result = s .str .contains ("foo" , na = "foo" )
257+ msg = (
258+ "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
259+ "will raise in a future version"
260+ )
261+ with tm .assert_produces_warning (FutureWarning , match = msg ):
262+ result = s .str .contains ("foo" , na = "foo" )
245263 if any_string_dtype == "object" :
246264 expected = Series (["foo" , "foo" , "foo" ], dtype = np .object_ )
247265 elif any_string_dtype .na_value is np .nan :
248266 expected = Series ([True , True , True ], dtype = np .bool_ )
249267 else :
250268 expected = Series ([True , True , True ], dtype = "boolean" )
251- tm .assert_series_equal (result , expected )
269+ tm .assert_series_equal (result , expected )
252270
253271 result = s .str .contains ("foo" )
254272 expected_dtype = (
@@ -263,6 +281,37 @@ def test_contains_nan(any_string_dtype):
263281# --------------------------------------------------------------------------------------
264282
265283
284+ @pytest .mark .xfail (
285+ using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
286+ )
287+ def test_startswith_endswith_validate_na (any_string_dtype ):
288+ # GH#59615
289+ ser = Series (
290+ ["om" , np .nan , "foo_nom" , "nom" , "bar_foo" , np .nan , "foo" ],
291+ dtype = any_string_dtype ,
292+ )
293+
294+ dtype = ser .dtype
295+ if (
296+ isinstance (dtype , pd .StringDtype ) and dtype .storage == "python"
297+ ) or dtype == np .dtype ("object" ):
298+ msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
299+ with tm .assert_produces_warning (FutureWarning , match = msg ):
300+ ser .str .startswith ("kapow" , na = "baz" )
301+ msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
302+ with tm .assert_produces_warning (FutureWarning , match = msg ):
303+ ser .str .endswith ("bar" , na = "baz" )
304+ else :
305+ # TODO(infer_string): don't surface pyarrow errors
306+ import pyarrow as pa
307+
308+ msg = "Could not convert 'baz' with type str: tried to convert to boolean"
309+ with pytest .raises (pa .lib .ArrowInvalid , match = msg ):
310+ ser .str .startswith ("kapow" , na = "baz" )
311+ with pytest .raises (pa .lib .ArrowInvalid , match = msg ):
312+ ser .str .endswith ("kapow" , na = "baz" )
313+
314+
266315@pytest .mark .parametrize ("pat" , ["foo" , ("foo" , "baz" )])
267316@pytest .mark .parametrize ("dtype" , ["object" , "category" ])
268317@pytest .mark .parametrize ("null_value" , [None , np .nan , pd .NA ])
0 commit comments