googleapis · tswast · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025
@@ -42,17 +42,53 @@
     import bigframes.session
 
 
+def get_information_schema_metadata(
+    bqclient: bigquery.Client,
+    table_id: str,
+    default_project: Optional[str],
+) -> bigquery.Table:
+    job_config = bigquery.QueryJobConfig(dry_run=True)
+    job = bqclient.query(
+        # TODO: better escaping?
+        f"SELECT * FROM `{table_id}`",
+        job_config=job_config,
+    )
+    parts = table_id.split(".")
+    if len(parts) < 3:
+        project = default_project
+        dataset = parts[0]
+        table_id_short = ".".join(parts[1:])
+    else:
+        project = parts[0]
+        dataset = parts[1]
+        table_id_short = ".".join(parts[2:])
+
+    table = bigquery.Table.from_api_repr(
+        {
+            "tableReference": {
+                "projectId": project,
+                "datasetId": dataset,
+                "tableId": table_id_short,
+            },
+            "location": job.location,
+        }
+    )
+    table.schema = job.schema
+    return table
+
+
 def get_table_metadata(
     bqclient: bigquery.Client,
-    table_ref: google.cloud.bigquery.table.TableReference,
-    bq_time: datetime.datetime,
     *,
-    cache: Dict[bigquery.TableReference, Tuple[datetime.datetime, bigquery.Table]],
+    table_id: str,
+    default_project: Optional[str],
+    bq_time: datetime.datetime,
+    cache: Dict[str, Tuple[datetime.datetime, bigquery.Table]],
     use_cache: bool = True,
 ) -> Tuple[datetime.datetime, google.cloud.bigquery.table.Table]:
     """Get the table metadata, either from cache or via REST API."""
 
-    cached_table = cache.get(table_ref)
+    cached_table = cache.get(table_id)
     if use_cache and cached_table is not None:
         snapshot_timestamp, _ = cached_table
 
@@ -76,15 +112,24 @@ def get_table_metadata(
         warnings.warn(msg, stacklevel=7)
         return cached_table
 
-    table = bqclient.get_table(table_ref)
+    if "INFORMATION_SCHEMA".casefold() in table_id.casefold():
+        table = get_information_schema_metadata(
+            bqclient=bqclient, table_id=table_id, default_project=default_project
+        )
+    else:
+        table_ref = google.cloud.bigquery.table.TableReference.from_string(
+            table_id, default_project=default_project
+        )
+        table = bqclient.get_table(table_ref)
+
     # local time will lag a little bit do to network latency
     # make sure it is at least table creation time.
     # This is relevant if the table was created immediately before loading it here.
     if (table.created is not None) and (table.created > bq_time):
         bq_time = table.created
 
     cached_table = (bq_time, table)
-    cache[table_ref] = cached_table
+    cache[table_id] = cached_table
     return cached_table
 
 

@@ -268,9 +268,7 @@ def __init__(
         self._default_index_type = default_index_type
         self._scan_index_uniqueness = scan_index_uniqueness
         self._force_total_order = force_total_order
-        self._df_snapshot: Dict[
-            bigquery.TableReference, Tuple[datetime.datetime, bigquery.Table]
-        ] = {}
+        self._df_snapshot: Dict[str, Tuple[datetime.datetime, bigquery.Table]] = {}
         self._metrics = metrics
         # Unfortunate circular reference, but need to pass reference when constructing objects
         self._session = session
@@ -617,10 +615,6 @@ def read_gbq_table(
 
         _check_duplicates("columns", columns)
 
-        table_ref = google.cloud.bigquery.table.TableReference.from_string(
-            table_id, default_project=self._bqclient.project
-        )
-
         columns = list(columns)
         include_all_columns = columns is None or len(columns) == 0
         filters = typing.cast(list, list(filters))
@@ -631,7 +625,8 @@ def read_gbq_table(
 
         time_travel_timestamp, table = bf_read_gbq_table.get_table_metadata(
             self._bqclient,
-            table_ref=table_ref,
+            table_id=table_id,
+            default_project=self._bqclient.project,
             bq_time=self._clock.get_time(),
             cache=self._df_snapshot,
             use_cache=use_cache,

@@ -0,0 +1,18 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_read_gbq_jobs_by_user_returns_schema(session):
+    df = session.read_gbq("region-US.INFORMATION_SCHEMA.JOBS_BY_USER")
+    assert df.dtypes is not None