Skip to content

Commit 4af7b09

Browse files
committed
do not support sequence, and support column position
1 parent 3340ccc commit 4af7b09

File tree

3 files changed

+12
-56
lines changed

3 files changed

+12
-56
lines changed

bigframes/operations/_matplotlib/core.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import pandas as pd
2020

21+
import bigframes.constants as constants
2122
import bigframes.dtypes as dtypes
2223

2324
DEFAULT_SAMPLING_N = 1000
@@ -92,32 +93,25 @@ def __init__(self, data, **kwargs) -> None:
9293
super().__init__(data, **kwargs)
9394

9495
c = self.kwargs.get("c", None)
95-
if self._is_sequence_arg(c) and len(c) != self.data.shape[0]:
96-
raise ValueError(
97-
f"'c' argument has {len(c)} elements, which is "
98-
+ f"inconsistent with 'x' and 'y' with size {self.data.shape[0]}"
96+
if self._is_sequence_arg(c):
97+
raise NotImplementedError(
98+
f"Only support a single color string or a column name/posision. {constants.FEEDBACK_LINK}"
9999
)
100100

101101
def _compute_plot_data(self):
102-
data = self.data.copy()
103-
104-
c = self.kwargs.get("c", None)
105-
c_id = None
106-
if self._is_sequence_arg(c):
107-
c_id = self._generate_new_column_name(data)
108-
data[c_id] = c
109-
110-
sample = self._compute_sample_data(data)
102+
sample = self._compute_sample_data(self.data)
111103

112104
# Works around a pandas bug:
113105
# https://github.com/pandas-dev/pandas/commit/45b937d64f6b7b6971856a47e379c7c87af7e00a
106+
c = self.kwargs.get("c", None)
107+
if (
108+
pd.core.dtypes.common.is_integer(c)
109+
and not self.data.columns._holds_integer()
110+
):
111+
c = self.data.columns[c]
114112
if self._is_column_name(c, sample) and sample[c].dtype == dtypes.STRING_DTYPE:
115113
sample[c] = sample[c].astype("object")
116114

117-
if c_id is not None:
118-
self.kwargs["c"] = sample[c_id]
119-
sample = sample.drop(columns=[c_id])
120-
121115
return sample
122116

123117
def _is_sequence_arg(self, arg):

tests/system/small/operations/test_plotting.py

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -215,15 +215,7 @@ def test_scatter(scalars_dfs):
215215
pytest.param("red", id="red"),
216216
pytest.param("c", id="int_column"),
217217
pytest.param("species", id="color_column"),
218-
pytest.param(["red", "green", "blue"], id="color_sequence"),
219-
pytest.param([3.4, 5.3, 2.0], id="number_sequence"),
220-
pytest.param(
221-
[3.4, 5.3],
222-
id="length_mismatches_sequence",
223-
marks=pytest.mark.xfail(
224-
raises=ValueError,
225-
),
226-
),
218+
pytest.param(3, id="column_index"),
227219
],
228220
)
229221
def test_scatter_args_c(c):
@@ -248,32 +240,6 @@ def test_scatter_args_c(c):
248240
)
249241

250242

251-
def test_scatter_args_c_sampling():
252-
data = {
253-
"plot_temp_0": [1, 2, 3, 4, 5],
254-
"plot_temp_1": [5, 4, 3, 2, 1],
255-
}
256-
c = ["red", "green", "blue", "orange", "black"]
257-
258-
df = bpd.DataFrame(data)
259-
pd_df = pd.DataFrame(data)
260-
261-
ax = df.plot.scatter(x="plot_temp_0", y="plot_temp_1", c=c, sampling_n=3)
262-
263-
sampling_index = [0, 1, 2]
264-
pd_ax = pd_df.iloc[sampling_index].plot.scatter(
265-
x="plot_temp_0", y="plot_temp_1", c=[c[i] for i in sampling_index]
266-
)
267-
assert len(ax.collections[0].get_facecolor()) == len(
268-
pd_ax.collections[0].get_facecolor()
269-
)
270-
for idx in range(len(ax.collections[0].get_facecolor())):
271-
tm.assert_numpy_array_equal(
272-
ax.collections[0].get_facecolor()[idx],
273-
pd_ax.collections[0].get_facecolor()[idx],
274-
)
275-
276-
277243
def test_sampling_plot_args_n():
278244
df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"])
279245
ax = df.plot.line()

third_party/bigframes_vendored/pandas/plotting/_core.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,6 @@ def scatter(
266266
267267
- A single color string referred to by name, RGB or RGBA code,
268268
for instance 'red' or '#a98d19'.
269-
- A sequence of color strings referred to by name, RGB or RGBA
270-
code, which will be used for each point's color recursively. For
271-
instance ['green','yellow'] all points will be filled in green or
272-
yellow, alternatively.
273269
- A column name or position whose values will be used to color the
274270
marker points according to a colormap.
275271

0 commit comments

Comments
 (0)