diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index ce9c22132b..9358dab1b1 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -401,6 +401,15 @@ def join( return ArrayValue(bigframes.core.rewrite.maybe_rewrite_join(join_node)) return ArrayValue(join_node) + def explode(self, column_ids: typing.Sequence[str]) -> ArrayValue: + assert len(column_ids) > 0 + for column_id in column_ids: + assert bigframes.dtypes.is_array_like(self.get_column_type(column_id)) + + return ArrayValue( + nodes.ExplodeNode(child=self.node, column_ids=tuple(column_ids)) + ) + def _uniform_sampling(self, fraction: float) -> ArrayValue: """Sampling the table on given fraction. diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 04a98ac9a4..0b6e50cfa3 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -1162,6 +1162,36 @@ def calculate_pairwise_metric(self, op=agg_ops.CorrOp()): index_labels=self.column_labels.names, ) + def explode( + self, + column_ids: typing.Sequence[str], + ignore_index: Optional[bool], + ) -> Block: + column_ids = [ + column_id + for column_id in column_ids + if bigframes.dtypes.is_array_like(self.expr.get_column_type(column_id)) + ] + if len(column_ids) == 0: + expr = self.expr + else: + expr = self.expr.explode(column_ids) + + if ignore_index: + return Block( + expr.drop_columns(self.index_columns), + column_labels=self.column_labels, + # Initiates default index creation using the block constructor. + index_columns=[], + ) + else: + return Block( + expr, + column_labels=self.column_labels, + index_columns=self.index_columns, + index_labels=self.column_labels.names, + ) + def _standard_stats(self, column_id) -> typing.Sequence[agg_ops.UnaryAggregateOp]: """ Gets a standard set of stats to preemptively fetch for a column if diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index af2d69275a..f1c5d62010 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -20,6 +20,7 @@ import typing from typing import Collection, Iterable, Literal, Optional, Sequence +import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops import ibis import ibis.backends.bigquery as ibis_bigquery import ibis.common.deferred # type: ignore @@ -502,6 +503,51 @@ def _uniform_sampling(self, fraction: float) -> UnorderedIR: columns=columns, ) + def explode(self, column_ids: typing.Sequence[str]) -> UnorderedIR: + table = self._to_ibis_expr() + + # The offset array ensures null represents empty arrays after unnesting. + offset_array_id = bigframes.core.guid.generate_guid("offset_array_") + offset_array = ( + vendored_ibis_ops.GenerateArray( + ibis.greatest( + 0, + ibis.least( + *[table[column_id].length() - 1 for column_id in column_ids] + ), + ) + ) + .to_expr() + .name(offset_array_id), + ) + table_w_offset_array = table.select( + offset_array, + *self._column_names, + ) + + unnest_offset_id = bigframes.core.guid.generate_guid("unnest_offset_") + unnest_offset = ( + table_w_offset_array[offset_array_id].unnest().name(unnest_offset_id) + ) + table_w_offset = table_w_offset_array.select( + unnest_offset, + *self._column_names, + ) + + unnested_columns = [ + table_w_offset[column_id][table_w_offset[unnest_offset_id]].name(column_id) + if column_id in column_ids + else table_w_offset[column_id] + for column_id in self._column_names + ] + table_w_unnest = table_w_offset.select(*unnested_columns) + + columns = [table_w_unnest[column_name] for column_name in self._column_names] + return UnorderedIR( + table_w_unnest, + columns=columns, + ) + ## Helpers def _set_or_replace_by_id( self, id: str, new_value: ibis_types.Value @@ -719,6 +765,78 @@ def _uniform_sampling(self, fraction: float) -> OrderedIR: ordering=self._ordering, ) + def explode(self, column_ids: typing.Sequence[str]) -> OrderedIR: + table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True) + + offset_array_id = bigframes.core.guid.generate_guid("offset_array_") + offset_array = ( + vendored_ibis_ops.GenerateArray( + ibis.greatest( + 0, + ibis.least( + *[table[column_id].length() - 1 for column_id in column_ids] + ), + ) + ) + .to_expr() + .name(offset_array_id), + ) + table_w_offset_array = table.select( + offset_array, + *self._column_names, + *self._hidden_ordering_column_names, + ) + + unnest_offset_id = bigframes.core.guid.generate_guid("unnest_offset_") + unnest_offset = ( + table_w_offset_array[offset_array_id].unnest().name(unnest_offset_id) + ) + table_w_offset = table_w_offset_array.select( + unnest_offset, + *self._column_names, + *self._hidden_ordering_column_names, + ) + + unnested_columns = [ + table_w_offset[column_id][table_w_offset[unnest_offset_id]].name(column_id) + if column_id in column_ids + else table_w_offset[column_id] + for column_id in self._column_names + ] + + table_w_unnest = table_w_offset.select( + table_w_offset[unnest_offset_id], + *unnested_columns, + *self._hidden_ordering_column_names, + ) + + columns = [table_w_unnest[column_name] for column_name in self._column_names] + hidden_ordering_columns = [ + *[ + table_w_unnest[column_name] + for column_name in self._hidden_ordering_column_names + ], + table_w_unnest[unnest_offset_id], + ] + ordering = ExpressionOrdering( + ordering_value_columns=tuple( + [ + *self._ordering.ordering_value_columns, + ascending_over(unnest_offset_id), + ] + ), + total_ordering_columns=frozenset( + [*self._ordering.total_ordering_columns, unnest_offset_id] + ), + ) + + return OrderedIR( + table_w_unnest, + columns=columns, + hidden_ordering_columns=hidden_ordering_columns, + ordering=ordering, + ) + def promote_offsets(self, col_id: str) -> OrderedIR: """ Convenience function to promote copy of column offsets to a value column. Can be used to reset index. diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py index 6f10d85f31..638e3eacdd 100644 --- a/bigframes/core/compile/compiler.py +++ b/bigframes/core/compile/compiler.py @@ -191,6 +191,11 @@ def compile_unpivot(node: nodes.UnpivotNode, ordered: bool = True): ) +@_compile_node.register +def compiler_explode(node: nodes.ExplodeNode, ordered: bool = True): + return compile_node(node.child, ordered).explode(node.column_ids) + + @_compile_node.register def compiler_random_sample(node: nodes.RandomSampleNode, ordered: bool = True): return compile_node(node.child, ordered)._uniform_sampling(node.fraction) diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 8f646ac4bb..d740605a56 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -484,3 +484,30 @@ def row_preserving(self) -> bool: def __hash__(self): return self._node_hash + + +@dataclass(frozen=True) +class ExplodeNode(UnaryNode): + column_ids: typing.Tuple[str, ...] + + @property + def row_preserving(self) -> bool: + return False + + def __hash__(self): + return self._node_hash + + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + items = tuple( + schemata.SchemaItem( + name, + bigframes.dtypes.arrow_dtype_to_bigframes_dtype( + self.child.schema.get_type(name).pyarrow_dtype.value_type + ), + ) + if name in self.column_ids + else schemata.SchemaItem(name, self.child.schema.get_type(name)) + for name in self.child.schema.names + ) + return schemata.ArraySchema(items) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 066b082490..e1483b74dd 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2532,6 +2532,36 @@ def sample( )[0] ) + def explode( + self, + column: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + *, + ignore_index: Optional[bool] = False, + ) -> DataFrame: + if not utils.is_list_like(column): + column_labels = typing.cast(typing.Sequence[blocks.Label], (column,)) + else: + column_labels = typing.cast(typing.Sequence[blocks.Label], tuple(column)) + + if not column_labels: + raise ValueError("column must be nonempty") + if len(column_labels) > len(set(column_labels)): + raise ValueError("column must be unique") + + column_ids = [self._resolve_label_exact(label) for label in column_labels] + missing = [ + column_labels[i] for i in range(len(column_ids)) if column_ids[i] is None + ] + if len(missing) > 0: + raise KeyError(f"None of {missing} are in the columns") + + return DataFrame( + self._block.explode( + column_ids=typing.cast(typing.Sequence[str], tuple(column_ids)), + ignore_index=ignore_index, + ) + ) + def _split( self, ns: Iterable[int] = (), diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 3d8c06d188..c5bf5db2fe 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -129,16 +129,19 @@ def is_string_like(type: ExpressionType) -> bool: def is_array_like(type: ExpressionType) -> bool: - if isinstance(type, pd.ArrowDtype) and isinstance(type.pyarrow_dtype, pa.ListType): - return True - else: - return type in (STRING_DTYPE, BYTES_DTYPE) + return isinstance(type, pd.ArrowDtype) and isinstance( + type.pyarrow_dtype, pa.ListType + ) def is_numeric(type: ExpressionType) -> bool: return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE +def is_iterable(type: ExpressionType) -> bool: + return type in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type) + + def is_comparable(type: ExpressionType) -> bool: return (type is not None) and (type not in UNORDERED_DTYPES) @@ -348,6 +351,10 @@ def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType: ) +def arrow_dtype_to_bigframes_dtype(arrow_dtype: pa.DataType) -> Dtype: + return ibis_dtype_to_bigframes_dtype(arrow_dtype_to_ibis_dtype(arrow_dtype)) + + def bigframes_dtype_to_ibis_dtype( bigframes_dtype: Union[DtypeString, Dtype, np.dtype[Any]] ) -> ibis_dtypes.DataType: diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 0dcc643238..d631ba8508 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -212,7 +212,7 @@ def create_binary_op( len_op = create_unary_op( name="len", type_signature=op_typing.FixedOutputType( - dtypes.is_array_like, dtypes.INT_DTYPE, description="array-like" + dtypes.is_iterable, dtypes.INT_DTYPE, description="iterable" ), ) reverse_op = create_unary_op(name="reverse", type_signature=op_typing.STRING_TRANSFORM) diff --git a/bigframes/series.py b/bigframes/series.py index e7b358c2fe..e7b77b13ce 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1547,6 +1547,13 @@ def sample( )[0] ) + def explode(self, *, ignore_index: Optional[bool] = False) -> Series: + return Series( + self._block.explode( + column_ids=[self._value_column], ignore_index=ignore_index + ) + ) + def __array_ufunc__( self, ufunc: numpy.ufunc, method: str, *inputs, **kwargs ) -> Series: diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index cf907b02d6..bb3f920a6d 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -20,6 +20,7 @@ from typing import Tuple import geopandas as gpd # type: ignore +import numpy as np import pandas as pd import pandas.testing import pyarrow as pa # type: ignore @@ -28,6 +29,7 @@ import bigframes import bigframes._config.display_options as display_options import bigframes.dataframe as dataframe +import bigframes.pandas as bpd import bigframes.series as series from tests.system.utils import ( assert_pandas_df_equal, @@ -4128,3 +4130,72 @@ def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_cre loaded_scalars_df_index = session.read_gbq(result_table) assert not loaded_scalars_df_index.empty + + +@pytest.mark.parametrize( + ("col_names", "ignore_index"), + [ + pytest.param(["A"], False, id="one_array_false"), + pytest.param(["A"], True, id="one_array_true"), + pytest.param(["B"], False, id="one_float_false"), + pytest.param(["B"], True, id="one_float_true"), + pytest.param(["A", "C"], False, id="two_arrays_false"), + pytest.param(["A", "C"], True, id="two_arrays_true"), + ], +) +def test_dataframe_explode(col_names, ignore_index): + data = { + "A": [[0, 1, 2], [], [3, 4]], + "B": 3, + "C": [["a", "b", "c"], np.nan, ["d", "e"]], + } + df = bpd.DataFrame(data) + pd_df = df.to_pandas() + pd.testing.assert_frame_equal( + df.explode(col_names, ignore_index=ignore_index).to_pandas(), + pd_df.explode(col_names, ignore_index=ignore_index), + check_index_type=False, + check_dtype=False, + ) + + +@pytest.mark.parametrize( + ("ignore_index", "ordered"), + [ + pytest.param(True, True, id="include_index_ordered"), + pytest.param(True, False, id="include_index_unordered"), + pytest.param(False, True, id="ignore_index_ordered"), + ], +) +def test_dataframe_explode_reserve_order(ignore_index, ordered): + data = { + "a": [np.random.randint(0, 10, 10) for _ in range(10)], + "b": [np.random.randint(0, 10, 10) for _ in range(10)], + } + df = bpd.DataFrame(data) + pd_df = pd.DataFrame(data) + + res = df.explode(["a", "b"], ignore_index=ignore_index).to_pandas(ordered=ordered) + pd_res = pd_df.explode(["a", "b"], ignore_index=ignore_index).astype( + pd.Int64Dtype() + ) + pd.testing.assert_frame_equal( + res if ordered else res.sort_index(), + pd_res, + check_index_type=False, + ) + + +@pytest.mark.parametrize( + ("col_names"), + [ + pytest.param([], id="empty", marks=pytest.mark.xfail(raises=ValueError)), + pytest.param( + ["A", "A"], id="duplicate", marks=pytest.mark.xfail(raises=ValueError) + ), + pytest.param("unknown", id="unknown", marks=pytest.mark.xfail(raises=KeyError)), + ], +) +def test_dataframe_explode_xfail(col_names): + df = bpd.DataFrame({"A": [[0, 1, 2], [], [3, 4]]}) + df.explode(col_names) diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py index d585d4f73e..6aca7628cf 100644 --- a/tests/system/small/test_multiindex.py +++ b/tests/system/small/test_multiindex.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import pandas import pytest @@ -1168,3 +1169,19 @@ def test_column_multi_index_dot_not_supported(): NotImplementedError, match="Multi-level column input is not supported" ): bf1 @ bf2 + + +def test_explode_w_multi_index(): + data = [[[1, 1], np.nan, [3, 3]], [[2], [5], []]] + multi_level_columns = pandas.MultiIndex.from_arrays( + [["col0", "col0", "col1"], ["col00", "col01", "col11"]] + ) + + df = bpd.DataFrame(data, columns=multi_level_columns) + pd_df = df.to_pandas() + pandas.testing.assert_frame_equal( + df["col0"].explode("col00").to_pandas(), + pd_df["col0"].explode("col00"), + check_dtype=False, + check_index_type=False, + ) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 794ab6b7a2..e15dbc6a3f 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -3416,3 +3416,104 @@ def foo(x: int, y: int, df): ) assert_series_equal(bf_result, pd_result) + + +@pytest.mark.parametrize( + ("data"), + [ + pytest.param([1, 2, 3], id="int"), + pytest.param([[1, 2, 3], [], numpy.nan, [3, 4]], id="int_array"), + pytest.param( + [["A", "AA", "AAA"], ["BB", "B"], numpy.nan, [], ["C"]], id="string_array" + ), + pytest.param( + [ + {"A": {"x": 1.0}, "B": "b"}, + {"A": {"y": 2.0}, "B": "bb"}, + {"A": {"z": 4.0}}, + {}, + numpy.nan, + ], + id="struct_array", + ), + ], +) +def test_series_explode(data): + data = [[1, 2, 3], [], numpy.nan, [3, 4]] + s = bigframes.pandas.Series(data) + pd_s = pd.Series(data) + pd.testing.assert_series_equal( + s.explode().to_pandas(), + pd_s.explode(), + check_index_type=False, + check_dtype=False, + ) + + +@pytest.mark.parametrize( + ("index", "ignore_index"), + [ + pytest.param(None, True, id="default_index"), + pytest.param(None, False, id="ignore_default_index"), + pytest.param([5, 1, 3, 2], True, id="unordered_index"), + pytest.param([5, 1, 3, 2], False, id="ignore_unordered_index"), + pytest.param(["z", "x", "a", "b"], True, id="str_index"), + pytest.param(["z", "x", "a", "b"], False, id="ignore_str_index"), + ], +) +def test_series_explode_w_index(index, ignore_index): + data = [[], [200.0, 23.12], [4.5, -9.0], [1.0]] + s = bigframes.pandas.Series(data, index=index) + pd_s = pd.Series(data, index=index) + pd.testing.assert_series_equal( + s.explode(ignore_index=ignore_index).to_pandas(), + pd_s.explode(ignore_index=ignore_index).astype(pd.Float64Dtype()), + check_index_type=False, + ) + + +@pytest.mark.parametrize( + ("ignore_index", "ordered"), + [ + pytest.param(True, True, id="include_index_ordered"), + pytest.param(True, False, id="include_index_unordered"), + pytest.param(False, True, id="ignore_index_ordered"), + ], +) +def test_series_explode_reserve_order(ignore_index, ordered): + data = [numpy.random.randint(0, 10, 10) for _ in range(10)] + s = bigframes.pandas.Series(data) + pd_s = pd.Series(data) + + res = s.explode(ignore_index=ignore_index).to_pandas(ordered=ordered) + pd_res = pd_s.explode(ignore_index=ignore_index).astype(pd.Int64Dtype()) + pd.testing.assert_series_equal( + res if ordered else res.sort_index(), + pd_res, + check_index_type=False, + ) + + +def test_series_explode_w_aggregate(): + data = [[1, 2, 3], [], numpy.nan, [3, 4]] + s = bigframes.pandas.Series(data) + pd_s = pd.Series(data) + assert s.explode().sum() == pd_s.explode().sum() + + +@pytest.mark.parametrize( + ("data"), + [ + pytest.param(numpy.nan, id="null"), + pytest.param([numpy.nan], id="null_array"), + pytest.param([[]], id="empty_array"), + pytest.param([numpy.nan, []], id="null_and_empty_array"), + ], +) +def test_series_explode_null(data): + s = bigframes.pandas.Series(data) + pd.testing.assert_series_equal( + s.explode().to_pandas(), + s.to_pandas().explode(), + check_dtype=False, + ) diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py index 3f89feaa34..88826b31ce 100644 --- a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py +++ b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py @@ -26,11 +26,17 @@ def _to_json_string(translator, op: vendored_ibis_ops.ToJsonString): return f"TO_JSON_STRING({arg})" +def _generate_array(translator, op: vendored_ibis_ops.GenerateArray): + arg = translator.translate(op.arg) + return f"GENERATE_ARRAY(0, {arg})" + + patched_ops = { vendored_ibis_ops.ApproximateMultiQuantile: _approx_quantiles, # type:ignore vendored_ibis_ops.FirstNonNullValue: _first_non_null_value, # type:ignore vendored_ibis_ops.LastNonNullValue: _last_non_null_value, # type:ignore vendored_ibis_ops.ToJsonString: _to_json_string, # type:ignore + vendored_ibis_ops.GenerateArray: _generate_array, # type:ignore } OPERATION_REGISTRY.update(patched_ops) diff --git a/third_party/bigframes_vendored/ibis/expr/operations/__init__.py b/third_party/bigframes_vendored/ibis/expr/operations/__init__.py index 2c2efe528d..3d5a5a7fa0 100644 --- a/third_party/bigframes_vendored/ibis/expr/operations/__init__.py +++ b/third_party/bigframes_vendored/ibis/expr/operations/__init__.py @@ -2,5 +2,6 @@ from __future__ import annotations from bigframes_vendored.ibis.expr.operations.analytic import * # noqa: F401 F403 +from bigframes_vendored.ibis.expr.operations.generic import * # noqa: F401 F403 from bigframes_vendored.ibis.expr.operations.json import * # noqa: F401 F403 from bigframes_vendored.ibis.expr.operations.reductions import * # noqa: F401 F403 diff --git a/third_party/bigframes_vendored/ibis/expr/operations/generic.py b/third_party/bigframes_vendored/ibis/expr/operations/generic.py new file mode 100644 index 0000000000..82d0a13371 --- /dev/null +++ b/third_party/bigframes_vendored/ibis/expr/operations/generic.py @@ -0,0 +1,9 @@ +# Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/expr/operations/generic.py +from __future__ import annotations + +import ibis.expr.datatypes as dt +from ibis.expr.operations.core import Unary + + +class GenerateArray(Unary): + dtype = dt.Array(dt.int64) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 3ae5b0db2a..e5aa47ad3e 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2804,6 +2804,57 @@ def combine_first(self, other) -> DataFrame: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def explode( + self, column: Union[str, Sequence[str]], *, ignore_index: Optional[bool] = False + ) -> DataFrame: + """ + Transform each element of an array to a row, replicating index values. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'A': [[0, 1, 2], [], [], [3, 4]], + ... 'B': 1, + ... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]}) + >>> df.explode('A') + A B C + 0 0 1 ['a' 'b' 'c'] + 0 1 1 ['a' 'b' 'c'] + 0 2 1 ['a' 'b' 'c'] + 1 1 [] + 2 1 [] + 3 3 1 ['d' 'e'] + 3 4 1 ['d' 'e'] + + [7 rows x 3 columns] + >>> df.explode(list('AC')) + A B C + 0 0 1 a + 0 1 1 b + 0 2 1 c + 1 1 + 2 1 + 3 3 1 d + 3 4 1 e + + [7 rows x 3 columns] + + Args: + column (str, Sequence[str]): + Column(s) to explode. For multiple columns, specify a non-empty list + with each element be str or tuple, and all specified columns their + list-like data on same row of the frame must have matching length. + ignore_index (bool, default False): + If True, the resulting index will be labeled 0, 1, …, n - 1. + + Returns: + bigframes.series.DataFrame: Exploded lists to rows of the subset columns; + index will be duplicated for these rows. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def corr(self, method, min_periods, numeric_only) -> DataFrame: """ Compute pairwise correlation of columns, excluding NA/null values. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 89b39cf8a0..785755a562 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -3,7 +3,7 @@ """ from __future__ import annotations -from typing import Hashable, IO, Literal, Mapping, Sequence, TYPE_CHECKING +from typing import Hashable, IO, Literal, Mapping, Optional, Sequence, TYPE_CHECKING from bigframes_vendored.pandas.core.generic import NDFrame import numpy as np @@ -751,6 +751,34 @@ def round(self, decimals: int = 0) -> Series: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def explode(self, *, ignore_index: Optional[bool] = False) -> Series: + """ + Transform each element of a list-like to a row. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([[1, 2, 3], [], [3, 4]]) + >>> s.explode() + 0 1 + 0 2 + 0 3 + 1 + 2 3 + 2 4 + dtype: Int64 + + Args: + ignore_index (bool, default False): + If True, the resulting index will be labeled 0, 1, …, n - 1. + + Returns: + bigframes.series.Series: Exploded lists to rows; index will be duplicated for these rows. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def corr(self, other, method="pearson", min_periods=None) -> float: """ Compute the correlation with the other Series. Non-number values are ignored in the