Skip to content

Commit 9a56bb5

Browse files
committed
add docs and fix tests
1 parent 61ae92b commit 9a56bb5

File tree

3 files changed

+110
-36
lines changed

3 files changed

+110
-36
lines changed

tests/system/small/test_series.py

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3418,55 +3418,50 @@ def foo(x: int, y: int, df):
34183418
assert_series_equal(bf_result, pd_result)
34193419

34203420

3421-
def test_series_explode_int():
3421+
@pytest.mark.parametrize(
3422+
("data"),
3423+
[
3424+
pytest.param([[1, 2, 3], [], numpy.nan, [3, 4]], id="int"),
3425+
pytest.param(
3426+
[["A", "AA", "AAA"], ["BB", "B"], numpy.nan, [], ["C"]], id="string"
3427+
),
3428+
pytest.param(
3429+
[
3430+
{"A": {"x": 1.0}, "B": "b"},
3431+
{"A": {"y": 2.0}, "B": "bb"},
3432+
{"A": {"z": 4.0}},
3433+
{},
3434+
numpy.nan,
3435+
],
3436+
id="struct",
3437+
),
3438+
],
3439+
)
3440+
def test_series_explode(data):
34223441
data = [[1, 2, 3], [], numpy.nan, [3, 4]]
34233442
s = bigframes.pandas.Series(data)
34243443
pd_s = pd.Series(data)
34253444
pd.testing.assert_series_equal(
34263445
s.explode().to_pandas(),
3427-
pd_s.explode().astype(pd.Int64Dtype()),
3428-
check_index_type=False,
3429-
)
3430-
3431-
3432-
def test_series_explode_string():
3433-
array = [["A", "AA", "AAA"], ["BB", "B"], numpy.nan, [], ["C"]]
3434-
s = bigframes.pandas.Series(array)
3435-
pd_s = pd.Series(array, dtype=pd.ArrowDtype(pa.list_(pa.string())))
3436-
pd.testing.assert_series_equal(
3437-
s.explode().to_pandas(),
3438-
pd_s.explode().astype(pd.StringDtype(storage="pyarrow")),
3439-
check_index_type=False,
3440-
)
3441-
3442-
3443-
def test_series_explode_struct():
3444-
array = [
3445-
{"A": {"x": 1.0}, "B": "b"},
3446-
{"A": {"y": 2.0}, "B": "bb"},
3447-
{"A": {"z": 4.0}},
3448-
{},
3449-
numpy.nan,
3450-
]
3451-
s = bigframes.pandas.Series(array)
3452-
pd_s = s.to_pandas()
3453-
pd.testing.assert_series_equal(
3454-
s.explode().to_pandas(),
3455-
pd_s.explode(),
3446+
pd_s.explode().astype(),
34563447
check_index_type=False,
3448+
check_dtype=False,
34573449
)
34583450

34593451

34603452
@pytest.mark.parametrize(
3461-
("ignore_index"),
3453+
("index", "ignore_index"),
34623454
[
3463-
pytest.param(True, id="include_index"),
3464-
pytest.param(False, id="ignore_index"),
3455+
pytest.param(None, True, id="default_index"),
3456+
pytest.param(None, False, id="ignore_default_index"),
3457+
pytest.param([5, 1, 3, 2], True, id="unordered_index"),
3458+
pytest.param([5, 1, 3, 2], False, id="ignore_unordered_index"),
3459+
pytest.param(["z", "x", "a", "b"], True, id="str_index"),
3460+
pytest.param(["z", "x", "a", "b"], False, id="ignore_str_index"),
34653461
],
34663462
)
3467-
def test_series_explode_w_unordered_index(ignore_index):
3463+
def test_series_explode_w_index(index, ignore_index):
34683464
data = [[], [200.0, 23.12], [4.5, -9.0], [1.0]]
3469-
index = [5, 1, 3, 2]
34703465
s = bigframes.pandas.Series(data, index=index)
34713466
pd_s = pd.Series(data, index=index)
34723467
pd.testing.assert_series_equal(

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2804,6 +2804,57 @@ def combine_first(self, other) -> DataFrame:
28042804
"""
28052805
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
28062806

2807+
def explode(
2808+
self, column: Union[str, Sequence[str]], *, ignore_index: Optional[bool] = False
2809+
) -> DataFrame:
2810+
"""
2811+
Transform each element of a list-like to a row, replicating index values.
2812+
2813+
**Examples:**
2814+
2815+
>>> import bigframes.pandas as bpd
2816+
>>> bpd.options.display.progress_bar = None
2817+
2818+
>>> df = bpd.DataFrame({'A': [[0, 1, 2], [], [], [3, 4]],
2819+
... 'B': 1,
2820+
... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]})
2821+
>>> df.explode('A')
2822+
A B C
2823+
0 0 1 ['a' 'b' 'c']
2824+
0 1 1 ['a' 'b' 'c']
2825+
0 2 1 ['a' 'b' 'c']
2826+
1 <NA> 1 []
2827+
2 <NA> 1 []
2828+
3 3 1 ['d' 'e']
2829+
3 4 1 ['d' 'e']
2830+
<BLANKLINE>
2831+
[7 rows x 3 columns]
2832+
>>> df.explode(list('AC'))
2833+
A B C
2834+
0 0 1 a
2835+
0 1 1 b
2836+
0 2 1 c
2837+
1 <NA> 1 <NA>
2838+
2 <NA> 1 <NA>
2839+
3 3 1 d
2840+
3 4 1 e
2841+
<BLANKLINE>
2842+
[7 rows x 3 columns]
2843+
2844+
Args:
2845+
column (str, Sequence[str]):
2846+
Column(s) to explode. For multiple columns, specify a non-empty list
2847+
with each element be str or tuple, and all specified columns their
2848+
list-like data on same row of the frame must have matching length.
2849+
ignore_index (bool, default False):
2850+
If True, the resulting index will be labeled 0, 1, …, n - 1.
2851+
2852+
Returns:
2853+
bigframes.series.DataFrame: Exploded lists to rows of the subset columns;
2854+
index will be duplicated for these rows.
2855+
"""
2856+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
2857+
28072858
def corr(self, method, min_periods, numeric_only) -> DataFrame:
28082859
"""
28092860
Compute pairwise correlation of columns, excluding NA/null values.

third_party/bigframes_vendored/pandas/core/series.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
from __future__ import annotations
55

6-
from typing import Hashable, IO, Literal, Mapping, Sequence, TYPE_CHECKING
6+
from typing import Hashable, IO, Literal, Mapping, Optional, Sequence, TYPE_CHECKING
77

88
from bigframes_vendored.pandas.core.generic import NDFrame
99
import numpy as np
@@ -751,6 +751,34 @@ def round(self, decimals: int = 0) -> Series:
751751
"""
752752
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
753753

754+
def explode(self, *, ignore_index: Optional[bool] = False) -> Series:
755+
"""
756+
Transform each element of a list-like to a row.
757+
758+
**Examples:**
759+
760+
>>> import bigframes.pandas as bpd
761+
>>> bpd.options.display.progress_bar = None
762+
763+
>>> s = bpd.Series([[1, 2, 3], [], [3, 4]])
764+
>>> s.explode()
765+
0 1
766+
0 2
767+
0 3
768+
1 <NA>
769+
2 3
770+
2 4
771+
dtype: Int64
772+
773+
Args:
774+
ignore_index (bool, default False):
775+
If True, the resulting index will be labeled 0, 1, …, n - 1.
776+
777+
Returns:
778+
bigframes.series.Series: Exploded lists to rows; index will be duplicated for these rows.
779+
"""
780+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
781+
754782
def corr(self, other, method="pearson", min_periods=None) -> float:
755783
"""
756784
Compute the correlation with the other Series. Non-number values are ignored in the

0 commit comments

Comments
 (0)