Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,26 @@ def label_to_col_id(self) -> typing.Mapping[Label, typing.Sequence[str]]:
mapping[label] = (*mapping.get(label, ()), id)
return mapping

def resolve_label_exact(self, label: Label) -> Optional[str]:
"""Returns the column id matching the label if there is exactly
one such column. If there are multiple columns with the same name,
raises an error. If there is no such a column, returns None."""
matches = self.label_to_col_id.get(label, [])
if len(matches) > 1:
raise ValueError(
f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
)
return matches[0] if len(matches) != 0 else None

def resolve_label_exact_or_error(self, label: Label) -> str:
"""Returns the column id matching the label if there is exactly
one such column. If there are multiple columns with the same name,
raises an error. If there is no such a column, raises an error too."""
col_id = self.resolve_label_exact(label)
if col_id is None:
raise ValueError(f"Label {label} not found. {constants.FEEDBACK_LINK}")
return col_id

@functools.cached_property
def col_id_to_index_name(self) -> typing.Mapping[str, Label]:
"""Get column label for value columns, or index name for index columns"""
Expand Down
15 changes: 5 additions & 10 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,10 @@ def __init__(
)
block = block.set_index([r_mapping[idx_col] for idx_col in idx_cols])
if columns:
block = block.select_columns(list(columns)) # type:ignore
column_ids = [
block.resolve_label_exact_or_error(label) for label in list(columns)
]
block = block.select_columns(column_ids) # type:ignore
if dtype:
bf_dtype = bigframes.dtypes.bigframes_type(dtype)
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
Expand Down Expand Up @@ -238,15 +241,7 @@ def _find_indices(
return [self._block.value_columns.index(col_id) for col_id in col_ids]

def _resolve_label_exact(self, label) -> Optional[str]:
"""Returns the column id matching the label if there is exactly
one such column. If there are multiple columns with the same name,
raises an error. If there is no such column, returns None."""
matches = self._block.label_to_col_id.get(label, [])
if len(matches) > 1:
raise ValueError(
f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
)
return matches[0] if len(matches) != 0 else None
return self._block.resolve_label_exact(label)

def _sql_names(
self,
Expand Down
11 changes: 9 additions & 2 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,15 @@
def test_df_construct_copy(scalars_dfs):
columns = ["int64_col", "string_col", "float64_col"]
scalars_df, scalars_pandas_df = scalars_dfs
bf_result = dataframe.DataFrame(scalars_df, columns=columns).to_pandas()
pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
# Make the mapping from label to col_id non-trivial
bf_df = scalars_df.copy()
bf_df["int64_col"] = bf_df["int64_col"] / 2
pd_df = scalars_pandas_df.copy()
pd_df["int64_col"] = pd_df["int64_col"] / 2

bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas()

pd_result = pd.DataFrame(pd_df, columns=columns)
pandas.testing.assert_frame_equal(bf_result, pd_result)


Expand Down