From 196cad6ca8425a61ccabf18cc75e371af0814c63 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 22 Mar 2024 22:49:02 +0000 Subject: [PATCH 1/2] fix: use actual bigframes types rather than ibis types in to_pandas --- bigframes/dtypes.py | 7 +++++++ bigframes/session/__init__.py | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 954dd270ee..861c2f4f62 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -682,6 +682,13 @@ def ibis_type_from_python_type(t: type) -> ibis_dtypes.DataType: def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType: + """Convert bq type to ibis. Only to be used for remote functions, does not handle all types.""" if tk not in SUPPORTED_IO_BIGQUERY_TYPEKINDS: raise UnsupportedTypeError(tk, SUPPORTED_IO_BIGQUERY_TYPEKINDS) return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk) + + +def bf_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> Dtype: + """Converts bigquery sql type to the default bigframes dtype.""" + ibis_type = third_party_ibis_bqtypes.BigQueryType.to_ibis(tk) + return ibis_dtype_to_bigframes_dtype(ibis_type) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 74a8325dac..a23fd4426a 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1889,8 +1889,13 @@ def _get_table_size(self, destination_table): def _rows_to_dataframe( self, row_iterator: bigquery.table.RowIterator, dtypes: Dict ) -> pandas.DataFrame: + # Can ignore inferred datatype until dtype emulation breaks 1:1 mapping between BQ types and bigframes types + dtypes_from_bq = { + field.name: bigframes.dtypes.bf_type_from_type_kind(field.field_type) + for field in row_iterator.schema + } arrow_table = row_iterator.to_arrow() - return bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes) + return bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes_from_bq) def _start_generic_job(self, job: formatting_helpers.GenericJob): if bigframes.options.display.progress_bar is not None: From 8e825a9141c0d567b320caec46be112672b65c18 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Sat, 23 Mar 2024 00:56:58 +0000 Subject: [PATCH 2/2] Use ibis function that properly converts nested bq datatypes to ibis type --- bigframes/dtypes.py | 10 +++++++--- bigframes/session/__init__.py | 5 +---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 861c2f4f62..63adc059f3 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -688,7 +688,11 @@ def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.D return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk) -def bf_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> Dtype: +def bf_type_from_type_kind(bf_schema) -> Dict[str, Dtype]: """Converts bigquery sql type to the default bigframes dtype.""" - ibis_type = third_party_ibis_bqtypes.BigQueryType.to_ibis(tk) - return ibis_dtype_to_bigframes_dtype(ibis_type) + ibis_schema: ibis.Schema = third_party_ibis_bqtypes.BigQuerySchema.to_ibis( + bf_schema + ) + return { + name: ibis_dtype_to_bigframes_dtype(type) for name, type in ibis_schema.items() + } diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index a23fd4426a..479b3a7bac 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1890,10 +1890,7 @@ def _rows_to_dataframe( self, row_iterator: bigquery.table.RowIterator, dtypes: Dict ) -> pandas.DataFrame: # Can ignore inferred datatype until dtype emulation breaks 1:1 mapping between BQ types and bigframes types - dtypes_from_bq = { - field.name: bigframes.dtypes.bf_type_from_type_kind(field.field_type) - for field in row_iterator.schema - } + dtypes_from_bq = bigframes.dtypes.bf_type_from_type_kind(row_iterator.schema) arrow_table = row_iterator.to_arrow() return bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes_from_bq)