Skip to content

Commit 1f7a17a

Browse files
committed
BUG: ser.str.match with mismatched case/pat/flags
1 parent bc458ec commit 1f7a17a

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed

pandas/core/strings/accessor.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,13 @@ def contains(
13511351
return self._wrap_result(result, fill_value=na, returns_string=False)
13521352

13531353
@forbid_nonstring_types(["bytes"])
1354-
def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
1354+
def match(
1355+
self,
1356+
pat: str,
1357+
case: bool | lib.NoDefault = lib.no_default,
1358+
flags: int | lib.NoDefault = lib.no_default,
1359+
na=lib.no_default,
1360+
):
13551361
"""
13561362
Determine if each string starts with a match of a regular expression.
13571363
@@ -1397,6 +1403,40 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
13971403
2 False
13981404
dtype: bool
13991405
"""
1406+
if flags is not lib.no_default:
1407+
# pat.flags will have re.U regardless, so we need to add it here
1408+
# before checking for a match
1409+
flags = flags | re.U
1410+
if is_re(pat):
1411+
if pat.flags != flags:
1412+
raise ValueError(
1413+
"Cannot both specify 'flags' and pass a compiled regexp "
1414+
"object with conflicting flags"
1415+
)
1416+
else:
1417+
pat = re.compile(pat, flags=flags)
1418+
# set flags=0 to ensure that when we call
1419+
# re.compile(pat, flags=flags) the constructor does not raise.
1420+
flags = 0
1421+
else:
1422+
flags = 0
1423+
1424+
if case is lib.no_default:
1425+
if is_re(pat):
1426+
implicit_case = not bool(pat.flags & re.IGNORECASE)
1427+
case = True
1428+
else:
1429+
# Case-sensitive default
1430+
case = True
1431+
elif is_re(pat):
1432+
implicit_case = not bool(pat.flags & re.IGNORECASE)
1433+
if implicit_case != case:
1434+
# GH#62240
1435+
raise ValueError(
1436+
"Cannot both specify 'case' and pass a compiled regexp "
1437+
"object with conflicting case-sensitivity"
1438+
)
1439+
14001440
result = self._data.array._str_match(pat, case=case, flags=flags, na=na)
14011441
return self._wrap_result(result, fill_value=na, returns_string=False)
14021442

pandas/tests/strings/test_find_replace.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,26 +1004,27 @@ def test_match_compiled_regex(any_string_dtype):
10041004
expected = Series([True, False, True, False], dtype=expected_dtype)
10051005
tm.assert_series_equal(result, expected)
10061006

1007-
# TODO this currently works for pyarrow-backed dtypes but raises for python
1008-
if any_string_dtype == "string" and any_string_dtype.storage == "pyarrow":
1009-
result = values.str.match(re.compile("ab"), case=False)
1010-
expected = Series([True, True, True, True], dtype=expected_dtype)
1011-
tm.assert_series_equal(result, expected)
1012-
else:
1013-
with pytest.raises(
1014-
ValueError, match="cannot process flags argument with a compiled pattern"
1015-
):
1016-
values.str.match(re.compile("ab"), case=False)
1007+
msg = (
1008+
"Cannot both specify 'case' and pass a compiled "
1009+
"regexp object with conflicting case-sensitivity"
1010+
)
1011+
with pytest.raises(ValueError, match=msg):
1012+
values.str.match(re.compile("ab"), case=False)
10171013

10181014
result = values.str.match(re.compile("ab", flags=re.IGNORECASE))
10191015
expected = Series([True, True, True, True], dtype=expected_dtype)
10201016
tm.assert_series_equal(result, expected)
10211017

1022-
with pytest.raises(
1023-
ValueError, match="cannot process flags argument with a compiled pattern"
1024-
):
1018+
msg = (
1019+
"Cannot both specify 'flags' and pass a compiled "
1020+
"regexp object with conflicting flags"
1021+
)
1022+
with pytest.raises(ValueError, match=msg):
10251023
values.str.match(re.compile("ab"), flags=re.IGNORECASE)
10261024

1025+
# But if the flags match you're OK
1026+
values.str.match(re.compile("ab", flags=re.IGNORECASE), flags=re.IGNORECASE)
1027+
10271028

10281029
@pytest.mark.parametrize(
10291030
"pat, case, exp",

0 commit comments

Comments
 (0)