Skip to content

Commit f8d2cd2

Browse files
fix: Support str.replace re.compile with flags (#1736)
1 parent 108f4d2 commit f8d2cd2

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

bigframes/operations/strings.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import re
18-
from typing import cast, Literal, Optional, Union
18+
from typing import Literal, Optional, Union
1919

2020
import bigframes_vendored.constants as constants
2121
import bigframes_vendored.pandas.core.strings.accessor as vendorstr
@@ -230,21 +230,26 @@ def replace(
230230
flags: int = 0,
231231
regex: bool = False,
232232
) -> series.Series:
233-
is_compiled = isinstance(pat, re.Pattern)
234-
patstr = cast(str, pat.pattern if is_compiled else pat) # type: ignore
233+
if isinstance(pat, re.Pattern):
234+
assert isinstance(pat.pattern, str)
235+
pat_str = pat.pattern
236+
flags = pat.flags | flags
237+
else:
238+
pat_str = pat
239+
235240
if case is False:
236-
return self.replace(pat, repl, flags=flags | re.IGNORECASE, regex=True)
241+
return self.replace(pat_str, repl, flags=flags | re.IGNORECASE, regex=True)
237242
if regex:
238243
re2flags = _parse_flags(flags)
239244
if re2flags:
240-
patstr = re2flags + patstr
241-
return self._apply_unary_op(ops.RegexReplaceStrOp(pat=patstr, repl=repl))
245+
pat_str = re2flags + pat_str
246+
return self._apply_unary_op(ops.RegexReplaceStrOp(pat=pat_str, repl=repl))
242247
else:
243-
if is_compiled:
248+
if isinstance(pat, re.Pattern):
244249
raise ValueError(
245250
"Must set 'regex'=True if using compiled regex pattern."
246251
)
247-
return self._apply_unary_op(ops.ReplaceStrOp(pat=patstr, repl=repl))
252+
return self._apply_unary_op(ops.ReplaceStrOp(pat=pat_str, repl=repl))
248253

249254
def startswith(
250255
self,
@@ -318,10 +323,15 @@ def to_blob(self, connection: Optional[str] = None) -> series.Series:
318323
def _parse_flags(flags: int) -> Optional[str]:
319324
re2flags = []
320325
for reflag, re2flag in REGEXP_FLAGS.items():
321-
if flags & flags:
326+
if flags & reflag:
322327
re2flags.append(re2flag)
323328
flags = flags ^ reflag
324329

330+
# re2 handles unicode fine by default
331+
# most compiled re in python will have unicode set
332+
if re.U and flags:
333+
flags = flags ^ re.U
334+
325335
# Remaining flags couldn't be mapped to re2 engine
326336
if flags:
327337
raise NotImplementedError(

tests/system/small/operations/test_strings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def test_str_extract(scalars_dfs, pat):
9898
(re.compile("(?i).e.."), "blah", None, 0, True),
9999
("H", "h", True, 0, False),
100100
(", ", "__", True, 0, False),
101+
(re.compile(r"hEllo", flags=re.I), "blah", None, 0, True),
101102
],
102103
)
103104
def test_str_replace(scalars_dfs, pat, repl, case, flags, regex):

0 commit comments

Comments
 (0)