From 264becbd49eb9d6854768be492454f72fd7fb064 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Wed, 24 Jan 2024 02:38:11 +0000
Subject: [PATCH 01/10] feat: limited support of lamdas in `Series.apply`

---
 bigframes/core/compile/scalar_op_compiler.py |  4 ---
 tests/system/small/test_series.py            | 29 ++++++++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index bf0755acc7..a30cb676d3 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -658,10 +658,6 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
 
 @scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)
 def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp):
-    if not hasattr(op.func, "bigframes_remote_function"):
-        raise TypeError(
-            f"only a bigframes remote function is supported as a callable. {constants.FEEDBACK_LINK}"
-        )
     x_transformed = op.func(x)
     if not op.apply_on_null:
         x_transformed = ibis.case().when(x.isnull(), x).else_(x_transformed).end()
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 69b35d102c..e2a97c03d8 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2997,3 +2997,32 @@ def test_series_iter(
         scalars_df_index["int64_too"], scalars_pandas_df_index["int64_too"]
     ):
         assert bf_i == pd_i
+
+
+@pytest.mark.parametrize(
+    ("lambda_",),
+    [
+        pytest.param(lambda x: x * x + x + 1),
+        pytest.param(
+            lambda x: f"I got {x}",
+            marks=pytest.mark.xfail(
+                raises=AttributeError,
+            ),
+        ),
+    ],
+    ids=[
+        "lamda_arithmatic",
+        "lambda_arbitrary",
+    ],
+)
+def test_apply_lambda(scalars_dfs, lambda_):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_col = scalars_df["int64_col"]
+    bf_result = bf_col.apply(lambda_).to_pandas()
+
+    pd_col = scalars_pandas_df["int64_col"]
+    pd_result = pd_col.apply(lambda_)
+
+    # ignore dtype check, which are Int64 and object respectively
+    assert_series_equal(bf_result, pd_result, check_dtype=False)

From 266a3ea2e4f6e25e04032e896c773d0860666e98 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 25 Jan 2024 02:37:45 +0000
Subject: [PATCH 02/10] add code sample for non-remote-function `Series.apply`

---
 .../bigframes_vendored/pandas/core/series.py  | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 1aa4ffffbb..203c53c334 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1152,6 +1152,42 @@ def apply(
             >>> names = bpd.Series(["Alice", "Bob"])
             >>> hashes = names.apply(get_hash)
 
+        There is a limited support of simple functions and lambdas which can be
+        operated directly (without converting into a `remote_function`) on the
+        BigQuery DataFrames objects.
+
+        .. note::
+            Bigframes does not yet support ``dict`` subclasses that define
+            ``__missing__`` (i.e. provide a method for default values). These
+            are treated the same as ``dict``.
+
+        This approach takes advantage of a nuance in the way BigQuery DataFrames
+        objects are modelled internally and works only if the function body
+        contains only arithmatic or logical operators.
+
+            >>> nums = bpd.Series([1, 2, 3, 4])
+            >>> nums
+            0    1
+            1    2
+            2    3
+            3    4
+            dtype: Int64
+            >>> nums.apply(lambda x: x*x + 2*x + 1)
+            0     4
+            1     9
+            2    16
+            3    25
+            dtype: Int64
+
+            >>> def is_odd(num):
+            ...     return num % 2 == 1
+            >>> nums.apply(is_odd)
+            0     True
+            1    False
+            2     True
+            3    False
+            dtype: boolean
+
         Args:
             func (function):
                 BigFrames DataFrames ``remote_function`` to apply. The function

From d3f9878076fccc901e550e8a5e69c335f79de42a Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 25 Jan 2024 08:42:55 +0000
Subject: [PATCH 03/10] remove ..note in the middle of code samples due to
 rendering issue

---
 .../bigframes_vendored/pandas/core/series.py        | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 203c53c334..a6de38236d 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1154,16 +1154,9 @@ def apply(
 
         There is a limited support of simple functions and lambdas which can be
         operated directly (without converting into a `remote_function`) on the
-        BigQuery DataFrames objects.
-
-        .. note::
-            Bigframes does not yet support ``dict`` subclasses that define
-            ``__missing__`` (i.e. provide a method for default values). These
-            are treated the same as ``dict``.
-
-        This approach takes advantage of a nuance in the way BigQuery DataFrames
-        objects are modelled internally and works only if the function body
-        contains only arithmatic or logical operators.
+        BigQuery DataFrames objects. This approach takes advantage of a nuance
+        in the way BigQuery DataFrames objects are modeled internally and works
+        only if the function body contains only arithmatic or logical operators.
 
             >>> nums = bpd.Series([1, 2, 3, 4])
             >>> nums

From 89b8dd1cbc704b545bbc8d1b7b52e996f2c5f3d8 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 25 Jan 2024 22:54:14 +0000
Subject: [PATCH 04/10] fix typo

---
 tests/system/small/test_series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index e2a97c03d8..55e85528ae 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3011,7 +3011,7 @@ def test_series_iter(
         ),
     ],
     ids=[
-        "lamda_arithmatic",
+        "lambda_arithmatic",
         "lambda_arbitrary",
     ],
 )

From 4ed8f7b068466bf9c7621b2ede937d889aacf165 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 26 Jan 2024 01:56:45 +0000
Subject: [PATCH 05/10] add lambda test coverage and code samples for
 `Series.mask`

---
 tests/system/small/test_series.py             | 61 +++++++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 32 +++++++++-
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 55e85528ae..f12a00881e 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2515,6 +2515,51 @@ def test_mask_custom_value(scalars_dfs):
     assert_pandas_df_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("lambda_",),
+    [
+        pytest.param(lambda x: x > 0),
+        pytest.param(
+            lambda x: True if x > 0 else False,
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+            ),
+        ),
+    ],
+    ids=[
+        "lambda_arithmatic",
+        "lambda_arbitrary",
+    ],
+)
+def test_mask_lambda(scalars_dfs, lambda_):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_col = scalars_df["int64_col"]
+    bf_result = bf_col.apply(lambda_).to_pandas()
+
+    pd_col = scalars_pandas_df["int64_col"]
+    pd_result = pd_col.apply(lambda_)
+
+    # ignore dtype check, which are Int64 and object respectively
+    assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_mask_simple_udf(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    def foo(x):
+        return x < 1000000
+
+    bf_col = scalars_df["int64_col"]
+    bf_result = bf_col.apply(foo).to_pandas()
+
+    pd_col = scalars_pandas_df["int64_col"]
+    pd_result = pd_col.apply(foo)
+
+    # ignore dtype check, which are Int64 and object respectively
+    assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     ("column", "to_type"),
     [
@@ -3026,3 +3071,19 @@ def test_apply_lambda(scalars_dfs, lambda_):
 
     # ignore dtype check, which are Int64 and object respectively
     assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_apply_simple_udf(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    def foo(x):
+        return x * x + 2 * x + 3
+
+    bf_col = scalars_df["int64_col"]
+    bf_result = bf_col.apply(foo).to_pandas()
+
+    pd_col = scalars_pandas_df["int64_col"]
+    pd_result = pd_col.apply(foo)
+
+    # ignore dtype check, which are Int64 and object respectively
+    assert_series_equal(bf_result, pd_result, check_dtype=False)
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index a6de38236d..6ece262812 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -2625,7 +2625,8 @@ def mask(self, cond, other):
             dtype: Int64
 
         You can mask the values in the Series based on a condition. The values
-        matching the condition would be masked.
+        matching the condition would be masked. The condition can be provided in
+        formm of a Series.
 
             >>> s.mask(s % 2 == 0)
             0    <NA>
@@ -2681,6 +2682,35 @@ def mask(self, cond, other):
             2    Caroline
             dtype: string
 
+        There is a limited support of simple functions and lambdas which can be
+        operated directly (without converting into a `remote_function`) on the
+        BigQuery DataFrames objects. This approach takes advantage of a nuance
+        in the way BigQuery DataFrames objects are modeled internally and works
+        only if the function body contains only arithmatic or logical operators.
+
+            >>> nums = bpd.Series([1, 2, 3, 4], name="nums")
+            >>> nums
+            0    1
+            1    2
+            2    3
+            3    4
+            Name: nums, dtype: Int64
+            >>> nums.mask(lambda x: (x+1) % 2 == 1)
+            0        1
+            1     <NA>
+            2        3
+            3     <NA>
+            Name: nums, dtype: Int64
+
+            >>> def is_odd(num):
+            ...     return num % 2 == 1
+            >>> nums.mask(is_odd)
+            0     <NA>
+            1        2
+            2     <NA>
+            3        4
+            Name: nums, dtype: Int64
+
         Args:
             cond (bool Series/DataFrame, array-like, or callable):
                 Where cond is False, keep the original value. Where True, replace

From 2603ba138d41fdc89f3a3bfe4f243a4a71fa55ab Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Wed, 31 Jan 2024 23:47:17 +0000
Subject: [PATCH 06/10] apply the non-remote function on series level

---
 bigframes/core/compile/scalar_op_compiler.py  |  4 +++
 bigframes/series.py                           |  8 +++++
 tests/system/small/test_series.py             | 32 +++++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 35 +++++++++++--------
 4 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index a30cb676d3..bf0755acc7 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -658,6 +658,10 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
 
 @scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)
 def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp):
+    if not hasattr(op.func, "bigframes_remote_function"):
+        raise TypeError(
+            f"only a bigframes remote function is supported as a callable. {constants.FEEDBACK_LINK}"
+        )
     x_transformed = op.func(x)
     if not op.apply_on_null:
         x_transformed = ibis.case().when(x.isnull(), x).else_(x_transformed).end()
diff --git a/bigframes/series.py b/bigframes/series.py
index c802fd2467..a208336528 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1216,6 +1216,14 @@ def apply(self, func) -> Series:
         # Reproject as workaround to applying filter too late. This forces the filter
         # to be applied before passing data to remote function, protecting from bad
         # inputs causing errors.
+        if not callable(func):
+            raise ValueError(
+                "Only a ufunc (a NumPy function that applies to the entire Series) or a remote function that only works on single values are supported."
+            )
+
+        if not hasattr(func, "bigframes_remote_function"):
+            return func(self)
+
         reprojected_series = Series(self._block._force_reproject())
         return reprojected_series._apply_unary_op(
             ops.RemoteFunctionOp(func=func, apply_on_null=True)
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 65e7b40877..93183f2b96 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3088,10 +3088,17 @@ def test_series_iter(
                 raises=AttributeError,
             ),
         ),
+        pytest.param(
+            {1: 2, 3: 4},
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+            ),
+        ),
     ],
     ids=[
         "lambda_arithmatic",
         "lambda_arbitrary",
+        "not_lambda",
     ],
 )
 def test_apply_lambda(scalars_dfs, lambda_):
@@ -3107,6 +3114,31 @@ def test_apply_lambda(scalars_dfs, lambda_):
     assert_series_equal(bf_result, pd_result, check_dtype=False)
 
 
+@pytest.mark.parametrize(
+    ("ufunc",),
+    [
+        pytest.param(numpy.log),
+        pytest.param(numpy.sqrt),
+        pytest.param(numpy.sin),
+    ],
+    ids=[
+        "log",
+        "sqrt",
+        "sin",
+    ],
+)
+def test_apply_numpy_ufunc(scalars_dfs, ufunc):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_col = scalars_df["int64_col"]
+    bf_result = bf_col.apply(ufunc).to_pandas()
+
+    pd_col = scalars_pandas_df["int64_col"]
+    pd_result = pd_col.apply(ufunc)
+
+    assert_series_equal(bf_result, pd_result)
+
+
 def test_apply_simple_udf(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 79adb966cd..3713cefb53 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1099,14 +1099,19 @@ def apply(
         """
         Invoke function on values of a Series.
 
+        Can be ufunc (a NumPy function that applies to the entire Series) or a
+        Python function that only works on single values. If it is an arbitrary
+        python function then converting it into a `remote_function` is recommended.
+
         **Examples:**
 
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-        Let's use ``reuse=False`` flag to make sure a new ``remote_function``
+        For applying arbitrary python function a `remote_funciton` is recommended.
+        Let's use ``reuse=False`` flag to make sure a new `remote_function`
         is created every time we run the following code, but you can skip it
-        to potentially reuse a previously deployed ``remote_function`` from
+        to potentially reuse a previously deployed `remote_function` from
         the same user defined function.
 
             >>> @bpd.remote_function([int], float, reuse=False)
@@ -1131,9 +1136,9 @@ def apply(
             4    2.0
             dtype: Float64
 
-        You could turn a user defined function with external package
-        dependencies into a BigQuery DataFrames remote function. You would
-        provide the names of the packages via ``packages`` param.
+        To turn a user defined function with external package dependencies into
+        a `remote_function`, you would provide the names of the packages via
+        `packages` param.
 
             >>> @bpd.remote_function(
             ...     [str],
@@ -1155,11 +1160,7 @@ def apply(
             >>> names = bpd.Series(["Alice", "Bob"])
             >>> hashes = names.apply(get_hash)
 
-        There is a limited support of simple functions and lambdas which can be
-        operated directly (without converting into a `remote_function`) on the
-        BigQuery DataFrames objects. This approach takes advantage of a nuance
-        in the way BigQuery DataFrames objects are modeled internally and works
-        only if the function body contains only arithmatic or logical operators.
+        Simple functions, lambdas or ufuncs can be applied directly.
 
             >>> nums = bpd.Series([1, 2, 3, 4])
             >>> nums
@@ -1184,6 +1185,13 @@ def apply(
             3    False
             dtype: boolean
 
+            >>> nums.apply(np.log)
+            0         0.0
+            1    0.693147
+            2    1.098612
+            3    1.386294
+            dtype: Float64
+
         Args:
             func (function):
                 BigFrames DataFrames ``remote_function`` to apply. The function
@@ -2745,11 +2753,8 @@ def mask(self, cond, other):
             2    Caroline
             dtype: string
 
-        There is a limited support of simple functions and lambdas which can be
-        operated directly (without converting into a `remote_function`) on the
-        BigQuery DataFrames objects. This approach takes advantage of a nuance
-        in the way BigQuery DataFrames objects are modeled internally and works
-        only if the function body contains only arithmatic or logical operators.
+        Simple lambdas or python functions can be used as long as they only
+        perform operations supported on a Series.
 
             >>> nums = bpd.Series([1, 2, 3, 4], name="nums")
             >>> nums

From e782877110048cc73cc20325ae2c930220968d6f Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 9 Feb 2024 21:45:43 +0000
Subject: [PATCH 07/10] add suggestion to use remote function if direct func
 errors out

---
 bigframes/series.py               | 16 ++++++++-
 tests/system/small/test_series.py | 54 +++++++++++++++++++------------
 2 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 5898a243a4..4aa0f5e8b2 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -58,6 +58,12 @@
 LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]
 
 
+_remote_function_recommendation_message = (
+    "Your functions could not be applied directly to the Series."
+    " Try converting it to a remote function."
+)
+
+
 @log_adapter.class_logger
 class Series(bigframes.operations.base.SeriesMethods, vendored_pandas_series.Series):
     def __init__(self, *args, **kwargs):
@@ -1222,7 +1228,15 @@ def apply(self, func) -> Series:
             )
 
         if not hasattr(func, "bigframes_remote_function"):
-            return func(self)
+            try:
+                return func(self)
+            except Exception as ex:
+                # This could happen if any of the operators in func is not
+                # supported on a Series. Let's guide the customer to use a
+                # remote function instead
+                if hasattr(ex, "message"):
+                    ex.message += "\n{_remote_function_recommendation_message}"
+                raise
 
         reprojected_series = Series(self._block._force_reproject())
         return reprojected_series._apply_unary_op(
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index c0376e069b..b0b176c4e8 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3090,35 +3090,28 @@ def test_series_iter(
 
 
 @pytest.mark.parametrize(
-    ("lambda_",),
+    (
+        "col",
+        "lambda_",
+    ),
     [
-        pytest.param(lambda x: x * x + x + 1),
-        pytest.param(
-            lambda x: f"I got {x}",
-            marks=pytest.mark.xfail(
-                raises=AttributeError,
-            ),
-        ),
-        pytest.param(
-            {1: 2, 3: 4},
-            marks=pytest.mark.xfail(
-                raises=ValueError,
-            ),
-        ),
+        pytest.param("int64_col", lambda x: x * x + x + 1),
+        pytest.param("int64_col", lambda x: x % 2 == 1),
+        pytest.param("string_col", lambda x: x + "_suffix"),
     ],
     ids=[
-        "lambda_arithmatic",
-        "lambda_arbitrary",
-        "not_lambda",
+        "lambda_int_int",
+        "lambda_int_bool",
+        "lambda_str_str",
     ],
 )
-def test_apply_lambda(scalars_dfs, lambda_):
+def test_apply_lambda(scalars_dfs, col, lambda_):
     scalars_df, scalars_pandas_df = scalars_dfs
 
-    bf_col = scalars_df["int64_col"]
+    bf_col = scalars_df[col]
     bf_result = bf_col.apply(lambda_).to_pandas()
 
-    pd_col = scalars_pandas_df["int64_col"]
+    pd_col = scalars_pandas_df[col]
     pd_result = pd_col.apply(lambda_)
 
     # ignore dtype check, which are Int64 and object respectively
@@ -3164,3 +3157,24 @@ def foo(x):
 
     # ignore dtype check, which are Int64 and object respectively
     assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("col", "lambda_", "exception"),
+    [
+        pytest.param("int64_col", {1: 2, 3: 4}, ValueError),
+        pytest.param("int64_col", numpy.square, TypeError),
+        pytest.param("string_col", lambda x: x.capitalize(), AttributeError),
+    ],
+    ids=[
+        "not_callable",
+        "numpy_ufunc",
+        "custom_lambda",
+    ],
+)
+def test_apply_not_supported(scalars_dfs, col, lambda_, exception):
+    scalars_df, _ = scalars_dfs
+
+    bf_col = scalars_df[col]
+    with pytest.raises(exception):
+        bf_col.apply(lambda_)

From 4357d4f086f96eee2f862335ef9c9f31e342f70d Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 9 Feb 2024 22:50:36 +0000
Subject: [PATCH 08/10] support by_row param in Series.apply

---
 bigframes/series.py                           | 19 ++++++++++++---
 tests/system/small/test_series.py             | 23 +++++++++++++++----
 .../bigframes_vendored/pandas/core/series.py  | 14 +++++++----
 3 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 4aa0f5e8b2..012fd3fd5f 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1216,18 +1216,31 @@ def _groupby_values(
             dropna=dropna,
         )
 
-    def apply(self, func) -> Series:
+    def apply(
+        self, func, by_row: typing.Union[typing.Literal["compat"], bool] = "compat"
+    ) -> Series:
         # TODO(shobs, b/274645634): Support convert_dtype, args, **kwargs
         # is actually a ternary op
         # Reproject as workaround to applying filter too late. This forces the filter
         # to be applied before passing data to remote function, protecting from bad
         # inputs causing errors.
+
+        if by_row not in ["compat", False]:
+            raise ValueError("Param by_row must be one of 'compat' or False")
+
         if not callable(func):
             raise ValueError(
-                "Only a ufunc (a NumPy function that applies to the entire Series) or a remote function that only works on single values are supported."
+                "Only a ufunc (a function that applies to the entire Series) or a remote function that only works on single values are supported."
             )
 
         if not hasattr(func, "bigframes_remote_function"):
+            # It is not a remote function
+            # Then it must be a vectorized function that applies to the Series
+            # as a whole
+            assert (
+                not by_row
+            ), "A vectorized non-remote function can be provided only with by_row=False"
+
             try:
                 return func(self)
             except Exception as ex:
@@ -1235,7 +1248,7 @@ def apply(self, func) -> Series:
                 # supported on a Series. Let's guide the customer to use a
                 # remote function instead
                 if hasattr(ex, "message"):
-                    ex.message += "\n{_remote_function_recommendation_message}"
+                    ex.message += f"\n{_remote_function_recommendation_message}"
                 raise
 
         reprojected_series = Series(self._block._force_reproject())
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index b0b176c4e8..25cbc33e31 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3109,7 +3109,12 @@ def test_apply_lambda(scalars_dfs, col, lambda_):
     scalars_df, scalars_pandas_df = scalars_dfs
 
     bf_col = scalars_df[col]
-    bf_result = bf_col.apply(lambda_).to_pandas()
+
+    # Can't be applied to BigFrames Series without by_row=False
+    with pytest.raises(AssertionError, match="by_row=False"):
+        bf_col.apply(lambda_)
+
+    bf_result = bf_col.apply(lambda_, by_row=False).to_pandas()
 
     pd_col = scalars_pandas_df[col]
     pd_result = pd_col.apply(lambda_)
@@ -3135,7 +3140,12 @@ def test_apply_numpy_ufunc(scalars_dfs, ufunc):
     scalars_df, scalars_pandas_df = scalars_dfs
 
     bf_col = scalars_df["int64_col"]
-    bf_result = bf_col.apply(ufunc).to_pandas()
+
+    # Can't be applied to BigFrames Series without by_row=False
+    with pytest.raises(AssertionError, match="by_row=False"):
+        bf_col.apply(ufunc)
+
+    bf_result = bf_col.apply(ufunc, by_row=False).to_pandas()
 
     pd_col = scalars_pandas_df["int64_col"]
     pd_result = pd_col.apply(ufunc)
@@ -3150,7 +3160,12 @@ def foo(x):
         return x * x + 2 * x + 3
 
     bf_col = scalars_df["int64_col"]
-    bf_result = bf_col.apply(foo).to_pandas()
+
+    # Can't be applied to BigFrames Series without by_row=False
+    with pytest.raises(AssertionError, match="by_row=False"):
+        bf_col.apply(foo)
+
+    bf_result = bf_col.apply(foo, by_row=False).to_pandas()
 
     pd_col = scalars_pandas_df["int64_col"]
     pd_result = pd_col.apply(foo)
@@ -3177,4 +3192,4 @@ def test_apply_not_supported(scalars_dfs, col, lambda_, exception):
 
     bf_col = scalars_df[col]
     with pytest.raises(exception):
-        bf_col.apply(lambda_)
+        bf_col.apply(lambda_, by_row=False)
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 2099d2be50..dc1b8014e9 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1116,6 +1116,7 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
     def apply(
         self,
         func,
+        by_row="compat",
     ) -> DataFrame | Series:
         """
         Invoke function on values of a Series.
@@ -1181,7 +1182,8 @@ def apply(
             >>> names = bpd.Series(["Alice", "Bob"])
             >>> hashes = names.apply(get_hash)
 
-        Simple functions, lambdas or ufuncs can be applied directly.
+        Simple vectorized functions, lambdas or ufuncs can be applied directly
+        with `by_row=False`.
 
             >>> nums = bpd.Series([1, 2, 3, 4])
             >>> nums
@@ -1190,7 +1192,7 @@ def apply(
             2    3
             3    4
             dtype: Int64
-            >>> nums.apply(lambda x: x*x + 2*x + 1)
+            >>> nums.apply(lambda x: x*x + 2*x + 1, by_row=False)
             0     4
             1     9
             2    16
@@ -1199,14 +1201,14 @@ def apply(
 
             >>> def is_odd(num):
             ...     return num % 2 == 1
-            >>> nums.apply(is_odd)
+            >>> nums.apply(is_odd, by_row=False)
             0     True
             1    False
             2     True
             3    False
             dtype: boolean
 
-            >>> nums.apply(np.log)
+            >>> nums.apply(np.log, by_row=False)
             0         0.0
             1    0.693147
             2    1.098612
@@ -1218,6 +1220,10 @@ def apply(
                 BigFrames DataFrames ``remote_function`` to apply. The function
                 should take a scalar and return a scalar. It will be applied to
                 every element in the ``Series``.
+            by_row (False or "compat", default "compat"):
+                If `"compat"` , func must be a remote function which will be
+                passed each element of the Series, like `Series.map`. If False,
+                the func will be passed the whole Series at once.
 
         Returns:
             bigframes.series.Series: A new Series with values representing the

From 3b51709fc0ee4b3c8bd5a14721bfb3155b067e87 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 9 Feb 2024 23:08:20 +0000
Subject: [PATCH 09/10] raise ValueError instead of AssertionError

---
 bigframes/series.py               | 7 ++++---
 tests/system/small/test_series.py | 6 +++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 012fd3fd5f..14dc1fc504 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1237,9 +1237,10 @@ def apply(
             # It is not a remote function
             # Then it must be a vectorized function that applies to the Series
             # as a whole
-            assert (
-                not by_row
-            ), "A vectorized non-remote function can be provided only with by_row=False"
+            if by_row:
+                raise ValueError(
+                    "A vectorized non-remote function can be provided only with by_row=False"
+                )
 
             try:
                 return func(self)
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 25cbc33e31..ed44ea35a5 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3111,7 +3111,7 @@ def test_apply_lambda(scalars_dfs, col, lambda_):
     bf_col = scalars_df[col]
 
     # Can't be applied to BigFrames Series without by_row=False
-    with pytest.raises(AssertionError, match="by_row=False"):
+    with pytest.raises(ValueError, match="by_row=False"):
         bf_col.apply(lambda_)
 
     bf_result = bf_col.apply(lambda_, by_row=False).to_pandas()
@@ -3142,7 +3142,7 @@ def test_apply_numpy_ufunc(scalars_dfs, ufunc):
     bf_col = scalars_df["int64_col"]
 
     # Can't be applied to BigFrames Series without by_row=False
-    with pytest.raises(AssertionError, match="by_row=False"):
+    with pytest.raises(ValueError, match="by_row=False"):
         bf_col.apply(ufunc)
 
     bf_result = bf_col.apply(ufunc, by_row=False).to_pandas()
@@ -3162,7 +3162,7 @@ def foo(x):
     bf_col = scalars_df["int64_col"]
 
     # Can't be applied to BigFrames Series without by_row=False
-    with pytest.raises(AssertionError, match="by_row=False"):
+    with pytest.raises(ValueError, match="by_row=False"):
         bf_col.apply(foo)
 
     bf_result = bf_col.apply(foo, by_row=False).to_pandas()

From 33d8b8b7832f033ee15df70fae85f4c99f937a22 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Sat, 10 Feb 2024 00:22:08 +0000
Subject: [PATCH 10/10] fix Series.mask tests

---
 bigframes/series.py                                  | 9 +++++++--
 tests/system/small/test_series.py                    | 8 ++++----
 third_party/bigframes_vendored/pandas/core/series.py | 4 ++--
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 14dc1fc504..4aef959a76 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1239,7 +1239,8 @@ def apply(
             # as a whole
             if by_row:
                 raise ValueError(
-                    "A vectorized non-remote function can be provided only with by_row=False"
+                    "A vectorized non-remote function can be provided only with by_row=False."
+                    " For element-wise operation it must be a remote function."
                 )
 
             try:
@@ -1361,7 +1362,11 @@ def duplicated(self, keep: str = "first") -> Series:
 
     def mask(self, cond, other=None) -> Series:
         if callable(cond):
-            cond = self.apply(cond)
+            if hasattr(cond, "bigframes_remote_function"):
+                cond = self.apply(cond)
+            else:
+                # For non-remote function assume that it is applicable on Series
+                cond = self.apply(cond, by_row=False)
 
         if not isinstance(cond, Series):
             raise TypeError(
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index ed44ea35a5..42651ed96f 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2580,10 +2580,10 @@ def test_mask_lambda(scalars_dfs, lambda_):
     scalars_df, scalars_pandas_df = scalars_dfs
 
     bf_col = scalars_df["int64_col"]
-    bf_result = bf_col.apply(lambda_).to_pandas()
+    bf_result = bf_col.mask(lambda_).to_pandas()
 
     pd_col = scalars_pandas_df["int64_col"]
-    pd_result = pd_col.apply(lambda_)
+    pd_result = pd_col.mask(lambda_)
 
     # ignore dtype check, which are Int64 and object respectively
     assert_series_equal(bf_result, pd_result, check_dtype=False)
@@ -2596,10 +2596,10 @@ def foo(x):
         return x < 1000000
 
     bf_col = scalars_df["int64_col"]
-    bf_result = bf_col.apply(foo).to_pandas()
+    bf_result = bf_col.mask(foo).to_pandas()
 
     pd_col = scalars_pandas_df["int64_col"]
-    pd_result = pd_col.apply(foo)
+    pd_result = pd_col.mask(foo)
 
     # ignore dtype check, which are Int64 and object respectively
     assert_series_equal(bf_result, pd_result, check_dtype=False)
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index dc1b8014e9..b203471606 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -2780,8 +2780,8 @@ def mask(self, cond, other):
             2    Caroline
             dtype: string
 
-        Simple lambdas or python functions can be used as long as they only
-        perform operations supported on a Series.
+        Simple vectorized (i.e. they only perform operations supported on a
+        Series) lambdas or python functions can be used directly.
 
             >>> nums = bpd.Series([1, 2, 3, 4], name="nums")
             >>> nums