diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index b1f4ed35cc..c6e3096e51 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -276,6 +276,26 @@ def label_to_col_id(self) -> typing.Mapping[Label, typing.Sequence[str]]: mapping[label] = (*mapping.get(label, ()), id) return mapping + def resolve_label_exact(self, label: Label) -> Optional[str]: + """Returns the column id matching the label if there is exactly + one such column. If there are multiple columns with the same name, + raises an error. If there is no such a column, returns None.""" + matches = self.label_to_col_id.get(label, []) + if len(matches) > 1: + raise ValueError( + f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}" + ) + return matches[0] if len(matches) != 0 else None + + def resolve_label_exact_or_error(self, label: Label) -> str: + """Returns the column id matching the label if there is exactly + one such column. If there are multiple columns with the same name, + raises an error. If there is no such a column, raises an error too.""" + col_id = self.resolve_label_exact(label) + if col_id is None: + raise ValueError(f"Label {label} not found. {constants.FEEDBACK_LINK}") + return col_id + @functools.cached_property def col_id_to_index_name(self) -> typing.Mapping[str, Label]: """Get column label for value columns, or index name for index columns""" diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 20f636b681..4ffa56c2e5 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -180,7 +180,10 @@ def __init__( ) block = block.set_index([r_mapping[idx_col] for idx_col in idx_cols]) if columns: - block = block.select_columns(list(columns)) # type:ignore + column_ids = [ + block.resolve_label_exact_or_error(label) for label in list(columns) + ] + block = block.select_columns(column_ids) # type:ignore if dtype: bf_dtype = bigframes.dtypes.bigframes_type(dtype) block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype)) @@ -238,15 +241,7 @@ def _find_indices( return [self._block.value_columns.index(col_id) for col_id in col_ids] def _resolve_label_exact(self, label) -> Optional[str]: - """Returns the column id matching the label if there is exactly - one such column. If there are multiple columns with the same name, - raises an error. If there is no such column, returns None.""" - matches = self._block.label_to_col_id.get(label, []) - if len(matches) > 1: - raise ValueError( - f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}" - ) - return matches[0] if len(matches) != 0 else None + return self._block.resolve_label_exact(label) def _sql_names( self, diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index e7556043af..1db89a074a 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -44,8 +44,15 @@ def test_df_construct_copy(scalars_dfs): columns = ["int64_col", "string_col", "float64_col"] scalars_df, scalars_pandas_df = scalars_dfs - bf_result = dataframe.DataFrame(scalars_df, columns=columns).to_pandas() - pd_result = pd.DataFrame(scalars_pandas_df, columns=columns) + # Make the mapping from label to col_id non-trivial + bf_df = scalars_df.copy() + bf_df["int64_col"] = bf_df["int64_col"] / 2 + pd_df = scalars_pandas_df.copy() + pd_df["int64_col"] = pd_df["int64_col"] / 2 + + bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas() + + pd_result = pd.DataFrame(pd_df, columns=columns) pandas.testing.assert_frame_equal(bf_result, pd_result)