googleapis · chelsea-lin · May 8, 2025 · May 7, 2025
@@ -456,9 +456,19 @@ def upper_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.StringValue, x).upper()
 
 
-@scalar_op_compiler.register_unary_op(ops.strip_op)
-def strip_op_impl(x: ibis_types.Value):
-    return typing.cast(ibis_types.StringValue, x).strip()
+@scalar_op_compiler.register_unary_op(ops.StrLstripOp, pass_op=True)
+def str_lstrip_op_impl(x: ibis_types.Value, op: ops.StrStripOp):
+    return str_lstrip_op(x, to_strip=op.to_strip)
+
+
+@scalar_op_compiler.register_unary_op(ops.StrRstripOp, pass_op=True)
+def str_rstrip_op_impl(x: ibis_types.Value, op: ops.StrRstripOp):
+    return str_rstrip_op(x, to_strip=op.to_strip)
+
+
+@scalar_op_compiler.register_unary_op(ops.StrStripOp, pass_op=True)
+def str_strip_op_impl(x: ibis_types.Value, op: ops.StrStripOp):
+    return str_strip_op(x, to_strip=op.to_strip)
 
 
 @scalar_op_compiler.register_unary_op(ops.isnumeric_op)
@@ -519,16 +529,6 @@ def isupper_op_impl(x: ibis_types.Value):
     ).re_search(r"\p{Ll}|\p{Lt}")
 
 
-@scalar_op_compiler.register_unary_op(ops.rstrip_op)
-def rstrip_op_impl(x: ibis_types.Value):
-    return typing.cast(ibis_types.StringValue, x).rstrip()
-
-
-@scalar_op_compiler.register_unary_op(ops.lstrip_op)
-def lstrip_op_impl(x: ibis_types.Value):
-    return typing.cast(ibis_types.StringValue, x).lstrip()
-
-
 @scalar_op_compiler.register_unary_op(ops.capitalize_op)
 def capitalize_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.StringValue, x).capitalize()
@@ -2077,3 +2077,24 @@ def obj_make_ref(uri: str, authorizer: str) -> _OBJ_REF_IBIS_DTYPE:  # type: ign
 @ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL")
 def obj_get_access_url(obj_ref: _OBJ_REF_IBIS_DTYPE, mode: ibis_dtypes.String) -> ibis_dtypes.JSON:  # type: ignore
     """Get access url (as ObjectRefRumtime JSON) from ObjectRef."""
+
+
+@ibis_udf.scalar.builtin(name="ltrim")
+def str_lstrip_op(  # type: ignore[empty-body]
+    x: ibis_dtypes.String, to_strip: ibis_dtypes.String
+) -> ibis_dtypes.String:
+    """Remove leading and trailing characters."""
+
+
+@ibis_udf.scalar.builtin(name="rtrim")
+def str_rstrip_op(  # type: ignore[empty-body]
+    x: ibis_dtypes.String, to_strip: ibis_dtypes.String
+) -> ibis_dtypes.String:
+    """Remove leading and trailing characters."""
+
+
+@ibis_udf.scalar.builtin(name="trim")
+def str_strip_op(  # type: ignore[empty-body]
+    x: ibis_dtypes.String, to_strip: ibis_dtypes.String
+) -> ibis_dtypes.String:
+    """Remove leading and trailing characters."""
@@ -167,11 +167,9 @@
     isupper_op,
     len_op,
     lower_op,
-    lstrip_op,
     RegexReplaceStrOp,
     ReplaceStrOp,
     reverse_op,
-    rstrip_op,
     StartsWithOp,
     strconcat_op,
     StrContainsOp,
@@ -180,10 +178,12 @@
     StrFindOp,
     StrGetOp,
     StringSplitOp,
-    strip_op,
+    StrLstripOp,
     StrPadOp,
     StrRepeatOp,
+    StrRstripOp,
     StrSliceOp,
+    StrStripOp,
     upper_op,
     ZfillOp,
 )
@@ -237,23 +237,24 @@
     "isupper_op",
     "len_op",
     "lower_op",
-    "lstrip_op",
     "RegexReplaceStrOp",
     "ReplaceStrOp",
     "reverse_op",
-    "rstrip_op",
     "StartsWithOp",
     "strconcat_op",
     "StrContainsOp",
     "StrContainsRegexOp",
     "StrExtractOp",
     "StrFindOp",
     "StrGetOp",
+    "StrLstripOp",
     "StringSplitOp",
     "strip_op",
     "StrPadOp",
     "StrRepeatOp",
+    "StrRstripOp",
     "StrSliceOp",
+    "StrStripOp",
     "upper_op",
     "ZfillOp",
     # Date ops

@@ -41,10 +41,6 @@
     name="upper", type_signature=op_typing.STRING_TRANSFORM
 )
 
-strip_op = base_ops.create_unary_op(
-    name="strip", type_signature=op_typing.STRING_TRANSFORM
-)
-
 isalnum_op = base_ops.create_unary_op(
     name="isalnum", type_signature=op_typing.STRING_PREDICATE
 )
@@ -77,14 +73,6 @@
     name="isupper", type_signature=op_typing.STRING_PREDICATE
 )
 
-rstrip_op = base_ops.create_unary_op(
-    name="rstrip", type_signature=op_typing.STRING_TRANSFORM
-)
-
-lstrip_op = base_ops.create_unary_op(
-    name="lstrip", type_signature=op_typing.STRING_TRANSFORM
-)
-
 capitalize_op = base_ops.create_unary_op(
     name="capitalize", type_signature=op_typing.STRING_TRANSFORM
 )
@@ -128,6 +116,33 @@ def output_type(self, *input_types):
         return op_typing.STRING_TRANSFORM.output_type(input_types[0])
 
 
+@dataclasses.dataclass(frozen=True)
+class StrStripOp(base_ops.UnaryOp):
+    name: typing.ClassVar[str] = "str_strip"
+    to_strip: str
+
+    def output_type(self, *input_types):
+        return op_typing.STRING_TRANSFORM.output_type(input_types[0])
+
+
+@dataclasses.dataclass(frozen=True)
+class StrLstripOp(base_ops.UnaryOp):
+    name: typing.ClassVar[str] = "str_lstrip"
+    to_strip: str
+
+    def output_type(self, *input_types):
+        return op_typing.STRING_TRANSFORM.output_type(input_types[0])
+
+
+@dataclasses.dataclass(frozen=True)
+class StrRstripOp(base_ops.UnaryOp):
+    name: typing.ClassVar[str] = "str_rstrip"
+    to_strip: str
+
+    def output_type(self, *input_types):
+        return op_typing.STRING_TRANSFORM.output_type(input_types[0])
+
+
 @dataclasses.dataclass(frozen=True)
 class ReplaceStrOp(base_ops.UnaryOp):
     name: typing.ClassVar[str] = "str_replace"

@@ -91,8 +91,10 @@ def slice(
     ) -> series.Series:
         return self._apply_unary_op(ops.StrSliceOp(start=start, end=stop))
 
-    def strip(self) -> series.Series:
-        return self._apply_unary_op(ops.strip_op)
+    def strip(self, to_strip: Optional[str] = None) -> series.Series:
+        return self._apply_unary_op(
+            ops.StrStripOp(to_strip=" \n\t" if to_strip is None else to_strip)
+        )
 
     def upper(self) -> series.Series:
         return self._apply_unary_op(ops.upper_op)
@@ -135,11 +137,15 @@ def isupper(
     ) -> series.Series:
         return self._apply_unary_op(ops.isupper_op)
 
-    def rstrip(self) -> series.Series:
-        return self._apply_unary_op(ops.rstrip_op)
+    def rstrip(self, to_strip: Optional[str] = None) -> series.Series:
+        return self._apply_unary_op(
+            ops.StrRstripOp(to_strip=" \n\t" if to_strip is None else to_strip)
+        )
 
-    def lstrip(self) -> series.Series:
-        return self._apply_unary_op(ops.lstrip_op)
+    def lstrip(self, to_strip: Optional[str] = None) -> series.Series:
+        return self._apply_unary_op(
+            ops.StrLstripOp(to_strip=" \n\t" if to_strip is None else to_strip)
+        )
 
     def repeat(self, repeats: int) -> series.Series:
         return self._apply_unary_op(ops.StrRepeatOp(repeats=repeats))

@@ -265,6 +265,28 @@ def test_strip(scalars_dfs):
     )
 
 
+@pytest.mark.parametrize(
+    ("to_strip"),
+    [
+        pytest.param(None, id="none"),
+        pytest.param(" ", id="space"),
+        pytest.param(" \n", id="space_newline"),
+        pytest.param("123.!? \n\t", id="multiple_chars"),
+    ],
+)
+def test_strip_w_to_strip(to_strip):
+    s = bpd.Series(["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", bpd.NA])
+    pd_s = s.to_pandas()
+
+    bf_result = s.str.strip(to_strip=to_strip).to_pandas()
+    pd_result = pd_s.str.strip(to_strip=to_strip)
+
+    assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 def test_upper(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
@@ -387,6 +409,28 @@ def test_rstrip(scalars_dfs):
     )
 
 
+@pytest.mark.parametrize(
+    ("to_strip"),
+    [
+        pytest.param(None, id="none"),
+        pytest.param(" ", id="space"),
+        pytest.param(" \n", id="space_newline"),
+        pytest.param("123.!? \n\t", id="multiple_chars"),
+    ],
+)
+def test_rstrip_w_to_strip(to_strip):
+    s = bpd.Series(["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", bpd.NA])
+    pd_s = s.to_pandas()
+
+    bf_result = s.str.rstrip(to_strip=to_strip).to_pandas()
+    pd_result = pd_s.str.rstrip(to_strip=to_strip)
+
+    assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 def test_lstrip(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
@@ -400,6 +444,28 @@ def test_lstrip(scalars_dfs):
     )
 
 
+@pytest.mark.parametrize(
+    ("to_strip"),
+    [
+        pytest.param(None, id="none"),
+        pytest.param(" ", id="space"),
+        pytest.param(" \n", id="space_newline"),
+        pytest.param("123.!? \n\t", id="multiple_chars"),
+    ],
+)
+def test_lstrip_w_to_strip(to_strip):
+    s = bpd.Series(["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", bpd.NA])
+    pd_s = s.to_pandas()
+
+    bf_result = s.str.lstrip(to_strip=to_strip).to_pandas()
+    pd_result = pd_s.str.lstrip(to_strip=to_strip)
+
+    assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 @pytest.mark.parametrize(["repeats"], [(5,), (0,), (1,)])
 def test_repeat(scalars_dfs, repeats):
     scalars_df, scalars_pandas_df = scalars_dfs

@@ -239,7 +239,7 @@ def slice(self, start=None, stop=None):
 
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def strip(self):
+    def strip(self, to_strip: typing.Optional[str] = None):
         """Remove leading and trailing characters.
 
         Strip whitespaces (including newlines) or a set of specified characters
@@ -252,22 +252,35 @@ def strip(self):
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            >>> s = bpd.Series(['Ant', '  Bee ', '\\tCat\\n', bpd.NA])
+            >>> s = bpd.Series(['1. Ant.', '  2. Bee? ', '\\t3. Cat!\\n', bpd.NA])
             >>> s
-            0       Ant
-            1      Bee
-            2       Cat
+            0        1. Ant.
+            1       2. Bee?
+            2       3. Cat!
             <BLANKLINE>
             3      <NA>
             dtype: string
 
             >>> s.str.strip()
+            0    1. Ant.
+            1    2. Bee?
+            2    3. Cat!
+            3       <NA>
+            dtype: string
+
+            >>> s.str.strip('123.!? \\n\\t')
             0     Ant
             1     Bee
             2     Cat
             3    <NA>
             dtype: string
 
+        Args:
+            to_strip (str, default None):
+                Specifying the set of characters to be removed. All combinations
+                of this set of characters will be stripped. If None then
+                whitespaces are removed.
+
         Returns:
             bigframes.series.Series: Series or Index without leading
                 and trailing characters.
@@ -529,7 +542,7 @@ def isdecimal(self):
 
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def rstrip(self):
+    def rstrip(self, to_strip: typing.Optional[str] = None):
         """Remove trailing characters.
 
         Strip whitespaces (including newlines) or a set of specified characters
@@ -558,13 +571,19 @@ def rstrip(self):
             3     <NA>
             dtype: string
 
+        Args:
+            to_strip (str, default None):
+                Specifying the set of characters to be removed. All combinations
+                of this set of characters will be stripped. If None then
+                whitespaces are removed.
+
         Returns:
             bigframes.series.Series: Series without trailing characters.
         """
 
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def lstrip(self):
+    def lstrip(self, to_strip: typing.Optional[str] = None):
         """Remove leading characters.
 
         Strip whitespaces (including newlines) or a set of specified characters
@@ -594,6 +613,12 @@ def lstrip(self):
             3    <NA>
             dtype: string
 
+        Args:
+            to_strip (str, default None):
+                Specifying the set of characters to be removed. All combinations
+                of this set of characters will be stripped. If None then
+                whitespaces are removed.
+
         Returns:
             bigframes.series.Series: Series without leading characters.
         """