diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index 027b3a4236..1162217fc1 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -41,3 +41,7 @@ class PreviewWarning(Warning): class NullIndexError(ValueError): """Object has no index.""" + + +class TimeTravelDisabledWarning(Warning): + """A query was reattempted without time travel.""" diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 226af9ec5c..45c1d15c5a 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -713,7 +713,8 @@ def _read_gbq_table( # Fetch table metadata and validate # --------------------------------- - (time_travel_timestamp, table,) = bf_read_gbq_table.get_table_metadata( + time_travel_timestamp: Optional[datetime.datetime] = None + time_travel_timestamp, table = bf_read_gbq_table.get_table_metadata( self.bqclient, table_ref=table_ref, api_name=api_name, @@ -795,9 +796,34 @@ def _read_gbq_table( # Use a time travel to make sure the DataFrame is deterministic, even # if the underlying table changes. - # TODO(b/340540991): If a dry run query fails with time travel but + + # If a dry run query fails with time travel but # succeeds without it, omit the time travel clause and raise a warning # about potential non-determinism if the underlying tables are modified. + sql = bigframes.session._io.bigquery.to_query( + f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}", + index_cols=index_cols, + columns=columns, + filters=filters, + time_travel_timestamp=time_travel_timestamp, + max_results=None, + ) + dry_run_config = bigquery.QueryJobConfig() + dry_run_config.dry_run = True + try: + self._start_query(sql, job_config=dry_run_config) + except google.api_core.exceptions.NotFound: + # note that a notfound caused by a simple typo will be + # caught above when the metadata is fetched, not here + time_travel_timestamp = None + warnings.warn( + "NotFound error when reading table with time travel." + " Attempting query without time travel. Warning: Without" + " time travel, modifications to the underlying table may" + " result in errors or unexpected behavior.", + category=bigframes.exceptions.TimeTravelDisabledWarning, + ) + table_expression = bf_read_gbq_table.get_ibis_time_travel_table( ibis_client=self.ibis_client, table_ref=table_ref, diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 2b7c6178ff..c617eab7f5 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -20,6 +20,7 @@ import time import typing from typing import List, Optional, Sequence +import warnings import google import google.cloud.bigquery as bigquery @@ -386,6 +387,13 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id): assert df3 is not None +def test_read_gbq_on_linked_dataset_warns(session): + with warnings.catch_warnings(record=True) as warned: + session.read_gbq("bigframes-dev.thelook_ecommerce.orders") + assert len(warned) == 1 + assert warned[0].category == bigframes.exceptions.TimeTravelDisabledWarning + + def test_read_gbq_table_clustered_with_filter(session: bigframes.Session): df = session.read_gbq_table( "bigquery-public-data.cloud_storage_geo_index.landsat_index",