googleapis · milkshakeiii · Jun 7, 2024 · Jun 4, 2024 · Jun 5, 2024 · Jun 5, 2024
@@ -512,9 +512,17 @@ def to_pandas_batches(self):
         """Download results one message at a time."""
         dtypes = dict(zip(self.index_columns, self.index.dtypes))
         dtypes.update(zip(self.value_columns, self.dtypes))
-        results_iterator, _ = self.session._execute(self.expr, sorted=True)
+        _, query_job = self.session._query_to_destination(
+            self.session._to_sql(self.expr, sorted=True),
+            list(self.index_columns),
+            api_name="cached",
+            do_clustering=False,
+        )
+        results_iterator = query_job.result()
         for arrow_table in results_iterator.to_arrow_iterable(
-            bqstorage_client=self.session.bqstoragereadclient
+            # we can't pass bqstorage_client=self.session.bqstoragereadclient
+            # because large results will take too long to be downloaded to the
+            # storage and won't be streamed
         ):
             df = bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes)
             self._copy_index_to_pandas(df)

@@ -75,22 +75,16 @@ def test_index_repr_large_table():
 
 
 def test_to_pandas_batches_large_table():
-    df = bpd.read_gbq("load_testing.scalars_10gb")
-    # df will be downloaded locally
-    expected_row_count, expected_column_count = df.shape
+    df = bpd.read_gbq("load_testing.scalars_100gb")
+    _, expected_column_count = df.shape
 
-    row_count = 0
-    # TODO(b/340890167): fix type error
-    for df in df.to_pandas_batches():  # type: ignore
-        batch_row_count, batch_column_count = df.shape
+    # download only a few batches, since 1tb would be too much
+    iterator = iter(df.to_pandas_batches())
+    for _ in range(3):
+        pdf = next(iterator)
+        batch_row_count, batch_column_count = pdf.shape
         assert batch_column_count == expected_column_count
-        row_count += batch_row_count
-
-        # Attempt to save on memory by manually removing the batch df
-        # from local memory after finishing with processing.
-        del df
-
-    assert row_count == expected_row_count
+        assert batch_row_count > 0
 
 
 @pytest.mark.skip(reason="See if it caused kokoro build aborted.")