|
15 | 15 | from __future__ import annotations
|
16 | 16 |
|
17 | 17 | import re
|
18 |
| -from typing import cast, Literal, Optional, Union |
| 18 | +from typing import Literal, Optional, Union |
19 | 19 |
|
20 | 20 | import bigframes_vendored.constants as constants
|
21 | 21 | import bigframes_vendored.pandas.core.strings.accessor as vendorstr
|
@@ -230,21 +230,26 @@ def replace(
|
230 | 230 | flags: int = 0,
|
231 | 231 | regex: bool = False,
|
232 | 232 | ) -> series.Series:
|
233 |
| - is_compiled = isinstance(pat, re.Pattern) |
234 |
| - patstr = cast(str, pat.pattern if is_compiled else pat) # type: ignore |
| 233 | + if isinstance(pat, re.Pattern): |
| 234 | + assert isinstance(pat.pattern, str) |
| 235 | + pat_str = pat.pattern |
| 236 | + flags = pat.flags | flags |
| 237 | + else: |
| 238 | + pat_str = pat |
| 239 | + |
235 | 240 | if case is False:
|
236 |
| - return self.replace(pat, repl, flags=flags | re.IGNORECASE, regex=True) |
| 241 | + return self.replace(pat_str, repl, flags=flags | re.IGNORECASE, regex=True) |
237 | 242 | if regex:
|
238 | 243 | re2flags = _parse_flags(flags)
|
239 | 244 | if re2flags:
|
240 |
| - patstr = re2flags + patstr |
241 |
| - return self._apply_unary_op(ops.RegexReplaceStrOp(pat=patstr, repl=repl)) |
| 245 | + pat_str = re2flags + pat_str |
| 246 | + return self._apply_unary_op(ops.RegexReplaceStrOp(pat=pat_str, repl=repl)) |
242 | 247 | else:
|
243 |
| - if is_compiled: |
| 248 | + if isinstance(pat, re.Pattern): |
244 | 249 | raise ValueError(
|
245 | 250 | "Must set 'regex'=True if using compiled regex pattern."
|
246 | 251 | )
|
247 |
| - return self._apply_unary_op(ops.ReplaceStrOp(pat=patstr, repl=repl)) |
| 252 | + return self._apply_unary_op(ops.ReplaceStrOp(pat=pat_str, repl=repl)) |
248 | 253 |
|
249 | 254 | def startswith(
|
250 | 255 | self,
|
@@ -318,10 +323,15 @@ def to_blob(self, connection: Optional[str] = None) -> series.Series:
|
318 | 323 | def _parse_flags(flags: int) -> Optional[str]:
|
319 | 324 | re2flags = []
|
320 | 325 | for reflag, re2flag in REGEXP_FLAGS.items():
|
321 |
| - if flags & flags: |
| 326 | + if flags & reflag: |
322 | 327 | re2flags.append(re2flag)
|
323 | 328 | flags = flags ^ reflag
|
324 | 329 |
|
| 330 | + # re2 handles unicode fine by default |
| 331 | + # most compiled re in python will have unicode set |
| 332 | + if re.U and flags: |
| 333 | + flags = flags ^ re.U |
| 334 | + |
325 | 335 | # Remaining flags couldn't be mapped to re2 engine
|
326 | 336 | if flags:
|
327 | 337 | raise NotImplementedError(
|
|
0 commit comments