diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py index 8d8f247185..6b1cb99c65 100644 --- a/bigframes/session/_io/bigquery/read_gbq_table.py +++ b/bigframes/session/_io/bigquery/read_gbq_table.py @@ -101,7 +101,16 @@ def validate_table( # Anonymous dataset, does not support snapshot ever if table.dataset_id.startswith("_"): pass + # Only true tables support time travel + elif table.table_id.endswith("*"): + msg = bfe.format_message( + "Wildcard tables do not support FOR SYSTEM_TIME AS OF queries. " + "Attempting query without time travel. Be aware that " + "modifications to the underlying data may result in errors or " + "unexpected behavior." + ) + warnings.warn(msg, category=bfe.TimeTravelDisabledWarning) elif table.table_type != "TABLE": if table.table_type == "MATERIALIZED_VIEW": msg = bfe.format_message( @@ -137,7 +146,7 @@ def validate_table( sql_predicate=filter_str, time_travel_timestamp=None, ) - # Any erorrs here should just be raised to user + # Any errors here should just be raised to user bqclient.query_and_wait( snapshot_sql, job_config=bigquery.QueryJobConfig(dry_run=True) ) diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index f748f0fd76..4924037f89 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -518,11 +518,7 @@ def read_gbq_table( # clustered tables, so fallback to a query. We do this here so that # the index is consistent with tables that have primary keys, even # when max_results is set. - # TODO(b/338419730): We don't need to fallback to a query for wildcard - # tables if we allow some non-determinism when time travel isn't supported. - if max_results is not None or bf_io_bigquery.is_table_with_wildcard_suffix( - table_id - ): + if max_results is not None: # TODO(b/338111344): If we are running a query anyway, we might as # well generate ROW_NUMBER() at the same time. all_columns: Iterable[str] = ( @@ -540,7 +536,7 @@ def read_gbq_table( time_travel_timestamp=None, ) - return self.read_gbq_query( # type: ignore # for dry_run overload + df = self.read_gbq_query( # type: ignore # for dry_run overload query, index_col=index_cols, columns=columns, @@ -548,6 +544,7 @@ def read_gbq_table( use_cache=use_cache, dry_run=dry_run, ) + return df if dry_run: return dry_runs.get_table_stats(table) diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index ad01a95509..ab460d5bc9 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -449,11 +449,15 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id): @pytest.mark.parametrize( "source_table", [ + # Wildcard tables + "bigquery-public-data.noaa_gsod.gsod194*", + # Linked datasets "bigframes-dev.thelook_ecommerce.orders", + # Materialized views "bigframes-dev.bigframes_tests_sys.base_table_mat_view", ], ) -def test_read_gbq_on_linked_dataset_warns(session, source_table): +def test_read_gbq_warns_time_travel_disabled(session, source_table): with warnings.catch_warnings(record=True) as warned: session.read_gbq(source_table, use_cache=False) assert len(warned) == 1