From bc6f172e544a06423b868bb16f1d5d6b806bbe2b Mon Sep 17 00:00:00 2001
From: Lily Zhang <lilyzhang@google.com>
Date: Thu, 14 Mar 2024 18:15:46 +0000
Subject: [PATCH 1/4] feat: support datetime related casting in
 (Series|DataFrame|Index).astype

---
 bigframes/core/compile/scalar_op_compiler.py | 58 +++++++++---
 bigframes/dtypes.py                          | 48 ++++++++--
 tests/system/small/test_series.py            | 96 ++++++++++++++++++++
 3 files changed, 180 insertions(+), 22 deletions(-)

diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 3bcdd70581..6e5185dd5d 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -634,11 +634,55 @@ def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp):
     return struct_value[name].name(name)
 
 
+def numeric_to_datatime(x: ibis_types.Value, unit: str) -> ibis_types.TimestampValue:
+    if not isinstance(x, ibis_types.IntegerValue) and not isinstance(
+        x, ibis_types.FloatingValue
+    ):
+        raise TypeError("Non-numerical types are not supposed to reach this function.")
+
+    if unit not in UNIT_TO_US_CONVERSION_FACTORS:
+        raise ValueError(f"Cannot convert input with unit '{unit}'.")
+    x_converted = x * UNIT_TO_US_CONVERSION_FACTORS[unit]
+    x_converted = x_converted.cast(ibis_dtypes.int64)
+
+    # Note: Due to an issue where casting directly to a timestamp
+    # without a timezone does not work, we first cast to UTC. This
+    # approach appears to bypass a potential bug in Ibis's cast function,
+    # allowing for subsequent casting to a timestamp type without timezone
+    # information. Further investigation is needed to confirm this behavior.
+    return x_converted.to_timestamp(unit="us").cast(
+        ibis_dtypes.Timestamp(timezone="UTC")
+    )
+
+
 @scalar_op_compiler.register_unary_op(ops.AsTypeOp, pass_op=True)
 def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
     to_type = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(op.to_type)
     if isinstance(x, ibis_types.NullScalar):
         return ibis_types.null().cast(to_type)
+
+    # When casting DATETIME column into INT column, we need to convert the column into TIMESTAMP first.
+    if to_type == ibis_dtypes.int64 and x.type() == ibis_dtypes.timestamp:
+        x_converted = x.cast(ibis_dtypes.Timestamp(timezone="UTC"))
+        return bigframes.dtypes.cast_ibis_value(x_converted, to_type)
+
+    if to_type == ibis_dtypes.int64 and x.type() == ibis_dtypes.time:
+        # The conversion unit is set to "us" (microseconds) for consistency
+        # with pandas converting time64[us][pyarrow] to int64[pyarrow].
+        return x.delta(ibis.time("00:00:00"), part="microsecond")
+
+    if x.type() == ibis_dtypes.int64:
+        # The conversion unit is set to "us" (microseconds) for consistency
+        # with pandas converting timestamp[us][pyarrow] to int64[pyarrow].
+        unit = "us"
+        x_converted = numeric_to_datatime(x, unit)
+        if to_type == ibis_dtypes.timestamp:
+            return x_converted.cast(ibis_dtypes.Timestamp())
+        elif to_type == ibis_dtypes.Timestamp(timezone="UTC"):
+            return x_converted
+        elif to_type == ibis_dtypes.time:
+            return x_converted.time()
+
     return bigframes.dtypes.cast_ibis_value(x, to_type)
 
 
@@ -677,19 +721,7 @@ def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp):
         # The default unit is set to "ns" (nanoseconds) for consistency
         # with pandas, where "ns" is the default unit for datetime operations.
         unit = op.unit or "ns"
-        if unit not in UNIT_TO_US_CONVERSION_FACTORS:
-            raise ValueError(f"Cannot convert input with unit '{unit}'.")
-        x_converted = x * UNIT_TO_US_CONVERSION_FACTORS[unit]
-        x_converted = x_converted.cast(ibis_dtypes.int64)
-
-        # Note: Due to an issue where casting directly to a timestamp
-        # without a timezone does not work, we first cast to UTC. This
-        # approach appears to bypass a potential bug in Ibis's cast function,
-        # allowing for subsequent casting to a timestamp type without timezone
-        # information. Further investigation is needed to confirm this behavior.
-        x = x_converted.to_timestamp(unit="us").cast(
-            ibis_dtypes.Timestamp(timezone="UTC")
-        )
+        x = numeric_to_datatime(x, unit)
 
     return x.cast(ibis_dtypes.Timestamp(timezone="UTC" if op.utc else None))
 
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 366820f9f6..d78a88dfeb 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -60,6 +60,7 @@
     "boolean",
     "Float64",
     "Int64",
+    "int64[pyarrow]",
     "string",
     "string[pyarrow]",
     "timestamp[us, tz=UTC][pyarrow]",
@@ -173,6 +174,9 @@
 # "string" and "string[pyarrow]" are accepted
 BIGFRAMES_STRING_TO_BIGFRAMES["string[pyarrow]"] = pd.StringDtype(storage="pyarrow")
 
+# special case - both "Int64" and "int64[pyarrow]" are accepted
+BIGFRAMES_STRING_TO_BIGFRAMES["int64[pyarrow]"] = pd.Int64Dtype()
+
 # For the purposes of dataframe.memory_usage
 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_sizes
 DTYPE_BYTE_SIZES = {
@@ -310,11 +314,12 @@ def bigframes_dtype_to_ibis_dtype(
             textwrap.dedent(
                 f"""
                 Unexpected data type {bigframes_dtype}. The following
-                        str dtypes are supppted: 'boolean','Float64','Int64', 'string',
-                        'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
-                        'timestamp[us][pyarrow]','date32[day][pyarrow]',
-                        'time64[us][pyarrow]'. The following pandas.ExtensionDtype are
-                        supported: pandas.BooleanDtype(), pandas.Float64Dtype(),
+                        str dtypes are supppted: 'boolean','Float64','Int64',
+                        'int64[pyarrow]','string','string[pyarrow]',
+                        'timestamp[us, tz=UTC][pyarrow]','timestamp[us][pyarrow]',
+                        'date32[day][pyarrow]','time64[us][pyarrow]'.
+                        The following pandas.ExtensionDtype are supported:
+                        pandas.BooleanDtype(), pandas.Float64Dtype(),
                         pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
                         pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")),
                         pd.ArrowDtype(pa.timestamp("us")),
@@ -434,6 +439,9 @@ def cast_ibis_value(
             ibis_dtypes.string,
             ibis_dtypes.Decimal(precision=38, scale=9),
             ibis_dtypes.Decimal(precision=76, scale=38),
+            ibis_dtypes.time,
+            ibis_dtypes.timestamp,
+            ibis_dtypes.Timestamp(timezone="UTC"),
         ),
         ibis_dtypes.float64: (
             ibis_dtypes.string,
@@ -447,8 +455,15 @@ def cast_ibis_value(
             ibis_dtypes.Decimal(precision=38, scale=9),
             ibis_dtypes.Decimal(precision=76, scale=38),
             ibis_dtypes.binary,
+            ibis_dtypes.date,
+            ibis_dtypes.timestamp,
+            ibis_dtypes.Timestamp(timezone="UTC"),
+        ),
+        ibis_dtypes.date: (
+            ibis_dtypes.string,
+            ibis_dtypes.timestamp,
+            ibis_dtypes.Timestamp(timezone="UTC"),
         ),
-        ibis_dtypes.date: (ibis_dtypes.string,),
         ibis_dtypes.Decimal(precision=38, scale=9): (
             ibis_dtypes.float64,
             ibis_dtypes.Decimal(precision=76, scale=38),
@@ -457,9 +472,24 @@ def cast_ibis_value(
             ibis_dtypes.float64,
             ibis_dtypes.Decimal(precision=38, scale=9),
         ),
-        ibis_dtypes.time: (),
-        ibis_dtypes.timestamp: (ibis_dtypes.Timestamp(timezone="UTC"),),
-        ibis_dtypes.Timestamp(timezone="UTC"): (ibis_dtypes.timestamp,),
+        ibis_dtypes.time: (
+            ibis_dtypes.int64,
+            ibis_dtypes.string,
+        ),
+        ibis_dtypes.timestamp: (
+            ibis_dtypes.date,
+            ibis_dtypes.int64,
+            ibis_dtypes.string,
+            ibis_dtypes.time,
+            ibis_dtypes.Timestamp(timezone="UTC"),
+        ),
+        ibis_dtypes.Timestamp(timezone="UTC"): (
+            ibis_dtypes.date,
+            ibis_dtypes.int64,
+            ibis_dtypes.string,
+            ibis_dtypes.time,
+            ibis_dtypes.timestamp,
+        ),
         ibis_dtypes.binary: (ibis_dtypes.string,),
     }
 
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 37b4f8c1de..055a41a7da 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2625,6 +2625,9 @@ def foo(x):
         ("int64_col", "boolean"),
         ("int64_col", pd.ArrowDtype(pa.decimal128(38, 9))),
         ("int64_col", pd.ArrowDtype(pa.decimal256(76, 38))),
+        ("int64_col", pd.ArrowDtype(pa.timestamp("us"))),
+        ("int64_col", pd.ArrowDtype(pa.timestamp("us", tz="UTC"))),
+        ("int64_col", "time64[us][pyarrow]"),
         ("bool_col", "Int64"),
         ("bool_col", "string[pyarrow]"),
         ("string_col", "binary[pyarrow]"),
@@ -2633,9 +2636,17 @@ def foo(x):
         # raises a deprecation warning to use tz_localize/tz_convert instead,
         # but BigQuery always stores values as UTC and doesn't have to deal
         # with timezone conversions, so we'll allow it.
+        ("timestamp_col", "date32[day][pyarrow]"),
+        ("timestamp_col", "time64[us][pyarrow]"),
         ("timestamp_col", pd.ArrowDtype(pa.timestamp("us"))),
+        ("datetime_col", "date32[day][pyarrow]"),
+        ("datetime_col", "string[pyarrow]"),
+        ("datetime_col", "time64[us][pyarrow]"),
         ("datetime_col", pd.ArrowDtype(pa.timestamp("us", tz="UTC"))),
         ("date_col", "string[pyarrow]"),
+        ("date_col", pd.ArrowDtype(pa.timestamp("us"))),
+        ("date_col", pd.ArrowDtype(pa.timestamp("us", tz="UTC"))),
+        ("time_col", "string[pyarrow]"),
         # TODO(bmil): fix Ibis bug: BigQuery backend rounds to nearest int
         # ("float64_col", "Int64"),
         # TODO(bmil): decide whether to fix Ibis bug: BigQuery backend
@@ -2653,6 +2664,23 @@ def test_astype(scalars_df_index, scalars_pandas_df_index, column, to_type):
     pd.testing.assert_series_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("column", "to_type"),
+    [
+        ("timestamp_col", "int64[pyarrow]"),
+        ("datetime_col", "int64[pyarrow]"),
+        ("time_col", "int64[pyarrow]"),
+    ],
+)
+@skip_legacy_pandas
+def test_date_time_astype_int(
+    scalars_df_index, scalars_pandas_df_index, column, to_type
+):
+    bf_result = scalars_df_index[column].astype(to_type).to_pandas()
+    pd_result = scalars_pandas_df_index[column].astype(to_type)
+    pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
 def test_string_astype_int():
     pd_series = pd.Series(["4", "-7", "0", "    -03"])
     bf_series = series.Series(pd_series)
@@ -2676,6 +2704,74 @@ def test_string_astype_float():
     pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
 
 
+def test_string_astype_date():
+    pd_series = pd.Series(["2014-08-15", "2215-08-15", "2016-02-29"]).astype(
+        pd.ArrowDtype(pa.string())
+    )
+
+    bf_series = series.Series(pd_series)
+
+    pd_result = pd_series.astype("date32[day][pyarrow]")
+    bf_result = bf_series.astype("date32[day][pyarrow]").to_pandas()
+
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_string_astype_datetime():
+    pd_series = pd.Series(
+        ["2014-08-15 08:15:12", "2015-08-15 08:15:12.654754", "2016-02-29 00:00:00"]
+    ).astype(pd.ArrowDtype(pa.string()))
+
+    bf_series = series.Series(pd_series)
+
+    pd_result = pd_series.astype(pd.ArrowDtype(pa.timestamp("us")))
+    bf_result = bf_series.astype(pd.ArrowDtype(pa.timestamp("us"))).to_pandas()
+
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_string_astype_timestamp():
+    pd_series = pd.Series(
+        [
+            "2014-08-15 08:15:12+00:00",
+            "2015-08-15 08:15:12.654754+05:00",
+            "2016-02-29 00:00:00+08:00",
+        ]
+    ).astype(pd.ArrowDtype(pa.string()))
+
+    bf_series = series.Series(pd_series)
+
+    pd_result = pd_series.astype(pd.ArrowDtype(pa.timestamp("us", tz="UTC")))
+    bf_result = bf_series.astype(
+        pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
+    ).to_pandas()
+
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_timestamp_astype_string():
+    bf_series = series.Series(
+        [
+            "2014-08-15 08:15:12+00:00",
+            "2015-08-15 08:15:12.654754+05:00",
+            "2016-02-29 00:00:00+08:00",
+        ]
+    ).astype(pd.ArrowDtype(pa.timestamp("us", tz="UTC")))
+
+    expected_result = pd.Series(
+        [
+            "2014-08-15 08:15:12+00",
+            "2015-08-15 03:15:12.654754+00",
+            "2016-02-28 16:00:00+00",
+        ]
+    )
+    bf_result = bf_series.astype(pa.string()).to_pandas()
+
+    pd.testing.assert_series_equal(
+        bf_result, expected_result, check_index_type=False, check_dtype=False
+    )
+
+
 @pytest.mark.parametrize(
     "index",
     [0, 5, -2],

From 11b4a0cf8014330813b0341096dd3cf5ac958086 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 14 Mar 2024 13:22:52 -0700
Subject: [PATCH 2/4] chore: add deferred exec code samples (#439)

* chore: add deferred exec code samples

* fix tests

* fix tests
---
 bigframes/_config/compute_options.py          | 11 ++++++++++
 .../pandas/core/config_init.py                | 20 +++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py
index 20c31d3906..fb708b844c 100644
--- a/bigframes/_config/compute_options.py
+++ b/bigframes/_config/compute_options.py
@@ -23,6 +23,17 @@ class ComputeOptions:
     """
     Encapsulates configuration for compute options.
 
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
+
+        >>> bpd.options.compute.maximum_bytes_billed = 500
+        >>> # df.to_pandas() # this should fail
+        google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required.
+
+        >>> bpd.options.compute.maximum_bytes_billed = None  # reset option
+
     Attributes:
         maximum_bytes_billed (int, Options):
             Limits the bytes billed for query jobs. Queries that will have
diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py
index dfb91dfeb8..33c6b3e093 100644
--- a/third_party/bigframes_vendored/pandas/core/config_init.py
+++ b/third_party/bigframes_vendored/pandas/core/config_init.py
@@ -15,6 +15,26 @@
 display_options_doc = """
 Encapsulates configuration for displaying objects.
 
+**Examples:**
+
+Define Repr mode to "deferred" will prevent job execution in repr.
+    >>> import bigframes.pandas as bpd
+    >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
+
+    >>> bpd.options.display.repr_mode = "deferred"
+    >>> df.head(20) # will no longer run the job
+    Computation deferred. Computation will process 28.9 kB
+
+Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect.
+    >>> df.query_job.total_bytes_processed
+    28947
+
+User can execute the job by calling .to_pandas()
+    >>> # df.to_pandas()
+
+Reset option
+    >>> bpd.options.display.repr_mode = "head"
+
 Attributes:
     max_columns (int, default 20):
         If `max_columns` is exceeded, switch to truncate view.

From 79cb05ae44e875d2e2694a93a10580ef742d41df Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Thu, 14 Mar 2024 15:14:29 -0700
Subject: [PATCH 3/4] feat: add DataFrame.pipe() method (#421)

---
 tests/system/small/test_dataframe.py          |  25 +++++
 tests/system/small/test_series.py             |  25 +++++
 .../bigframes_vendored/pandas/core/common.py  |  42 +++++++
 .../bigframes_vendored/pandas/core/generic.py | 105 +++++++++++++++++-
 4 files changed, 196 insertions(+), 1 deletion(-)
 create mode 100644 third_party/bigframes_vendored/pandas/core/common.py

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 61dcd778ef..be4211a2fc 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -1000,6 +1000,31 @@ def test_apply_series_scalar_callable(
     pandas.testing.assert_series_equal(bf_result, pd_result)
 
 
+def test_df_pipe(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+
+    def foo(x: int, y: int, df):
+        return (df + x) % y
+
+    bf_result = (
+        scalars_df_index[columns]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+        .to_pandas()
+    )
+
+    pd_result = (
+        scalars_pandas_df_index[columns]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+    )
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_df_keys(
     scalars_df_index,
     scalars_pandas_df_index,
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 055a41a7da..3627e8249c 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3299,3 +3299,28 @@ def test_apply_not_supported(scalars_dfs, col, lambda_, exception):
     bf_col = scalars_df[col]
     with pytest.raises(exception):
         bf_col.apply(lambda_, by_row=False)
+
+
+def test_series_pipe(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    column = "int64_too"
+
+    def foo(x: int, y: int, df):
+        return (df + x) % y
+
+    bf_result = (
+        scalars_df_index[column]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+        .to_pandas()
+    )
+
+    pd_result = (
+        scalars_pandas_df_index[column]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+    )
+
+    assert_series_equal(bf_result, pd_result)
diff --git a/third_party/bigframes_vendored/pandas/core/common.py b/third_party/bigframes_vendored/pandas/core/common.py
new file mode 100644
index 0000000000..ded5a22b8f
--- /dev/null
+++ b/third_party/bigframes_vendored/pandas/core/common.py
@@ -0,0 +1,42 @@
+# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/common.py
+from __future__ import annotations
+
+from typing import Callable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from bigframes_vendored.pandas.pandas._typing import T
+
+
+def pipe(
+    obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
+) -> T:
+    """
+    Apply a function ``func`` to object ``obj`` either by passing obj as the
+    first argument to the function or, in the case that the func is a tuple,
+    interpret the first element of the tuple as a function and pass the obj to
+    that function as a keyword argument whose key is the value of the second
+    element of the tuple.
+
+    Args:
+        func (callable or tuple of (callable, str)):
+            Function to apply to this object or, alternatively, a
+            ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
+            string indicating the keyword of ``callable`` that expects the
+            object.
+        args (iterable, optional):
+            Positional arguments passed into ``func``.
+        kwargs (dict, optional):
+            A dictionary of keyword arguments passed into ``func``.
+
+    Returns:
+        object: the return type of ``func``.
+    """
+    if isinstance(func, tuple):
+        func, target = func
+        if target in kwargs:
+            msg = f"{target} is both the pipe target and a keyword argument"
+            raise ValueError(msg)
+        kwargs[target] = obj
+        return func(*args, **kwargs)
+    else:
+        return func(obj, *args, **kwargs)
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 01d8f7a174..7f8e1f7b53 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -1,12 +1,16 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py
 from __future__ import annotations
 
-from typing import Iterator, Literal, Optional
+from typing import Callable, Iterator, Literal, Optional, TYPE_CHECKING
 
 from bigframes_vendored.pandas.core import indexing
+import bigframes_vendored.pandas.core.common as common
 
 from bigframes import constants
 
+if TYPE_CHECKING:
+    from bigframes_vendored.pandas.pandas._typing import T
+
 
 class NDFrame(indexing.IndexingMixin):
     """
@@ -963,6 +967,105 @@ def expanding(self, min_periods=1):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def pipe(
+        self,
+        func: Callable[..., T] | tuple[Callable[..., T], str],
+        *args,
+        **kwargs,
+    ) -> T:
+        """
+        Apply chainable functions that expect Series or DataFrames.
+
+        **Examples:**
+
+        Constructing a income DataFrame from a dictionary.
+
+            >>> import bigframes.pandas as bpd
+            >>> import numpy as np
+            >>> bpd.options.display.progress_bar = None
+
+            >>> data = [[8000, 1000], [9500, np.nan], [5000, 2000]]
+            >>> df = bpd.DataFrame(data, columns=['Salary', 'Others'])
+            >>> df
+               Salary  Others
+            0    8000  1000.0
+            1    9500    <NA>
+            2    5000  2000.0
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Functions that perform tax reductions on an income DataFrame.
+
+            >>> def subtract_federal_tax(df):
+            ...     return df * 0.9
+            >>> def subtract_state_tax(df, rate):
+            ...     return df * (1 - rate)
+            >>> def subtract_national_insurance(df, rate, rate_increase):
+            ...     new_rate = rate + rate_increase
+            ...     return df * (1 - new_rate)
+
+        Instead of writing
+
+            >>> subtract_national_insurance(
+            ...     subtract_state_tax(subtract_federal_tax(df), rate=0.12),
+            ...     rate=0.05,
+            ...     rate_increase=0.02)  # doctest: +SKIP
+
+        You can write
+
+            >>> (
+            ...     df.pipe(subtract_federal_tax)
+            ...     .pipe(subtract_state_tax, rate=0.12)
+            ...     .pipe(subtract_national_insurance, rate=0.05, rate_increase=0.02)
+            ... )
+                Salary   Others
+            0  5892.48   736.56
+            1  6997.32     <NA>
+            2   3682.8  1473.12
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        If you have a function that takes the data as (say) the second
+        argument, pass a tuple indicating which keyword expects the
+        data. For example, suppose ``national_insurance`` takes its data as ``df``
+        in the second argument:
+
+            >>> def subtract_national_insurance(rate, df, rate_increase):
+            ...     new_rate = rate + rate_increase
+            ...     return df * (1 - new_rate)
+            >>> (
+            ...     df.pipe(subtract_federal_tax)
+            ...     .pipe(subtract_state_tax, rate=0.12)
+            ...     .pipe(
+            ...         (subtract_national_insurance, 'df'),
+            ...         rate=0.05,
+            ...         rate_increase=0.02
+            ...     )
+            ... )
+                Salary   Others
+            0  5892.48   736.56
+            1  6997.32     <NA>
+            2   3682.8  1473.12
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Args:
+            func (function):
+                Function to apply to this object.
+                ``args``, and ``kwargs`` are passed into ``func``.
+                Alternatively a ``(callable, data_keyword)`` tuple where
+                ``data_keyword`` is a string indicating the keyword of
+                ``callable`` that expects this object.
+            args (iterable, optional):
+                Positional arguments passed into ``func``.
+            kwargs (mapping, optional):
+                A dictionary of keyword arguments passed into ``func``.
+
+        Returns:
+            same type as caller
+        """
+        return common.pipe(self, func, *args, **kwargs)
+
     def __nonzero__(self):
         raise ValueError(
             f"The truth value of a {type(self).__name__} is ambiguous. "

From efbf1f28c39ee2f2e327938c8a2d3095231eee31 Mon Sep 17 00:00:00 2001
From: Lily Zhang <lilyzhang@google.com>
Date: Fri, 15 Mar 2024 00:32:23 +0000
Subject: [PATCH 4/4] addressed comments

---
 bigframes/core/compile/scalar_op_compiler.py | 3 ++-
 tests/system/small/test_series.py            | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 6e5185dd5d..67761c0330 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -673,7 +673,8 @@ def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
 
     if x.type() == ibis_dtypes.int64:
         # The conversion unit is set to "us" (microseconds) for consistency
-        # with pandas converting timestamp[us][pyarrow] to int64[pyarrow].
+        # with pandas converting int64[pyarrow] to timestamp[us][pyarrow],
+        # timestamp[us, tz=UTC][pyarrow], and time64[us][pyarrow].
         unit = "us"
         x_converted = numeric_to_datatime(x, unit)
         if to_type == ibis_dtypes.timestamp:
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 3627e8249c..e22037a1ce 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2679,6 +2679,7 @@ def test_date_time_astype_int(
     bf_result = scalars_df_index[column].astype(to_type).to_pandas()
     pd_result = scalars_pandas_df_index[column].astype(to_type)
     pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+    assert bf_result.dtype == "Int64"
 
 
 def test_string_astype_int():
@@ -2770,6 +2771,7 @@ def test_timestamp_astype_string():
     pd.testing.assert_series_equal(
         bf_result, expected_result, check_index_type=False, check_dtype=False
     )
+    assert bf_result.dtype == "string[pyarrow]"
 
 
 @pytest.mark.parametrize(