Skip to content

Commit 3c1be14

Browse files
authored
fix: no longer raise a warning in to_dataframe if max_results set (#815)
That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set.
1 parent 3b70891 commit 3c1be14

File tree

2 files changed

+179
-11
lines changed

2 files changed

+179
-11
lines changed

google/cloud/bigquery/table.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,11 +1552,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client):
15521552
return False
15531553

15541554
if self.max_results is not None:
1555-
warnings.warn(
1556-
"Cannot use bqstorage_client if max_results is set, "
1557-
"reverting to fetching data with the REST endpoint.",
1558-
stacklevel=2,
1559-
)
15601555
return False
15611556

15621557
try:
@@ -1604,6 +1599,25 @@ def total_rows(self):
16041599
"""int: The total number of rows in the table."""
16051600
return self._total_rows
16061601

1602+
def _maybe_warn_max_results(
1603+
self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"],
1604+
):
1605+
"""Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set.
1606+
1607+
This helper method should be used directly in the relevant top-level public
1608+
methods, so that the warning is issued for the correct line in user code.
1609+
1610+
Args:
1611+
bqstorage_client:
1612+
The BigQuery Storage client intended to use for downloading result rows.
1613+
"""
1614+
if bqstorage_client is not None and self.max_results is not None:
1615+
warnings.warn(
1616+
"Cannot use bqstorage_client if max_results is set, "
1617+
"reverting to fetching data with the REST endpoint.",
1618+
stacklevel=3,
1619+
)
1620+
16071621
def _to_page_iterable(
16081622
self, bqstorage_download, tabledata_list_download, bqstorage_client=None
16091623
):
@@ -1700,6 +1714,8 @@ def to_arrow(
17001714
if pyarrow is None:
17011715
raise ValueError(_NO_PYARROW_ERROR)
17021716

1717+
self._maybe_warn_max_results(bqstorage_client)
1718+
17031719
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
17041720
create_bqstorage_client = False
17051721
bqstorage_client = None
@@ -1790,6 +1806,8 @@ def to_dataframe_iterable(
17901806
if dtypes is None:
17911807
dtypes = {}
17921808

1809+
self._maybe_warn_max_results(bqstorage_client)
1810+
17931811
column_names = [field.name for field in self._schema]
17941812
bqstorage_download = functools.partial(
17951813
_pandas_helpers.download_dataframe_bqstorage,
@@ -1896,6 +1914,8 @@ def to_dataframe(
18961914
if dtypes is None:
18971915
dtypes = {}
18981916

1917+
self._maybe_warn_max_results(bqstorage_client)
1918+
18991919
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
19001920
create_bqstorage_client = False
19011921
bqstorage_client = None

tests/unit/test_table.py

Lines changed: 154 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import datetime
1616
import logging
1717
import time
18+
import types
1819
import unittest
1920
import warnings
2021

@@ -1862,6 +1863,15 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self):
18621863
)
18631864
)
18641865

1866+
def test__validate_bqstorage_returns_false_if_max_results_set(self):
1867+
iterator = self._make_one(
1868+
max_results=10, first_page_response=None # not cached
1869+
)
1870+
result = iterator._validate_bqstorage(
1871+
bqstorage_client=None, create_bqstorage_client=True
1872+
)
1873+
self.assertFalse(result)
1874+
18651875
def test__validate_bqstorage_returns_false_if_missing_dependency(self):
18661876
iterator = self._make_one(first_page_response=None) # not cached
18671877

@@ -2105,7 +2115,7 @@ def test_to_arrow_w_empty_table(self):
21052115
@unittest.skipIf(
21062116
bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
21072117
)
2108-
def test_to_arrow_max_results_w_create_bqstorage_warning(self):
2118+
def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self):
21092119
from google.cloud.bigquery.schema import SchemaField
21102120

21112121
schema = [
@@ -2119,6 +2129,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self):
21192129
path = "/foo"
21202130
api_request = mock.Mock(return_value={"rows": rows})
21212131
mock_client = _mock_client()
2132+
mock_bqstorage_client = mock.sentinel.bq_storage_client
21222133

21232134
row_iterator = self._make_one(
21242135
client=mock_client,
@@ -2129,7 +2140,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self):
21292140
)
21302141

21312142
with warnings.catch_warnings(record=True) as warned:
2132-
row_iterator.to_arrow(create_bqstorage_client=True)
2143+
row_iterator.to_arrow(bqstorage_client=mock_bqstorage_client)
21332144

21342145
matches = [
21352146
warning
@@ -2139,6 +2150,49 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self):
21392150
and "REST" in str(warning)
21402151
]
21412152
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
2153+
self.assertIn(
2154+
__file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel"
2155+
)
2156+
mock_client._ensure_bqstorage_client.assert_not_called()
2157+
2158+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
2159+
@unittest.skipIf(
2160+
bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
2161+
)
2162+
def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self):
2163+
from google.cloud.bigquery.schema import SchemaField
2164+
2165+
schema = [
2166+
SchemaField("name", "STRING", mode="REQUIRED"),
2167+
SchemaField("age", "INTEGER", mode="REQUIRED"),
2168+
]
2169+
rows = [
2170+
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
2171+
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
2172+
]
2173+
path = "/foo"
2174+
api_request = mock.Mock(return_value={"rows": rows})
2175+
mock_client = _mock_client()
2176+
2177+
row_iterator = self._make_one(
2178+
client=mock_client,
2179+
api_request=api_request,
2180+
path=path,
2181+
schema=schema,
2182+
max_results=42,
2183+
)
2184+
2185+
with warnings.catch_warnings(record=True) as warned:
2186+
row_iterator.to_arrow(create_bqstorage_client=True)
2187+
2188+
matches = [
2189+
warning
2190+
for warning in warned
2191+
if warning.category is UserWarning
2192+
and "cannot use bqstorage_client" in str(warning).lower()
2193+
and "REST" in str(warning)
2194+
]
2195+
self.assertFalse(matches)
21422196
mock_client._ensure_bqstorage_client.assert_not_called()
21432197

21442198
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
@@ -2372,7 +2426,6 @@ def test_to_arrow_w_pyarrow_none(self):
23722426
@unittest.skipIf(pandas is None, "Requires `pandas`")
23732427
def test_to_dataframe_iterable(self):
23742428
from google.cloud.bigquery.schema import SchemaField
2375-
import types
23762429

23772430
schema = [
23782431
SchemaField("name", "STRING", mode="REQUIRED"),
@@ -2415,7 +2468,6 @@ def test_to_dataframe_iterable(self):
24152468
@unittest.skipIf(pandas is None, "Requires `pandas`")
24162469
def test_to_dataframe_iterable_with_dtypes(self):
24172470
from google.cloud.bigquery.schema import SchemaField
2418-
import types
24192471

24202472
schema = [
24212473
SchemaField("name", "STRING", mode="REQUIRED"),
@@ -2527,6 +2579,61 @@ def test_to_dataframe_iterable_w_bqstorage(self):
25272579
# Don't close the client if it was passed in.
25282580
bqstorage_client._transport.grpc_channel.close.assert_not_called()
25292581

2582+
@unittest.skipIf(pandas is None, "Requires `pandas`")
2583+
@unittest.skipIf(
2584+
bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
2585+
)
2586+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
2587+
def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self):
2588+
from google.cloud.bigquery import schema
2589+
from google.cloud.bigquery import table as mut
2590+
2591+
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
2592+
2593+
iterator_schema = [
2594+
schema.SchemaField("name", "STRING", mode="REQUIRED"),
2595+
schema.SchemaField("age", "INTEGER", mode="REQUIRED"),
2596+
]
2597+
path = "/foo"
2598+
api_request = mock.Mock(
2599+
side_effect=[
2600+
{
2601+
"rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}],
2602+
"pageToken": "NEXTPAGE",
2603+
},
2604+
{"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]},
2605+
]
2606+
)
2607+
row_iterator = mut.RowIterator(
2608+
_mock_client(),
2609+
api_request,
2610+
path,
2611+
iterator_schema,
2612+
table=mut.TableReference.from_string("proj.dset.tbl"),
2613+
selected_fields=iterator_schema,
2614+
max_results=25,
2615+
)
2616+
2617+
with warnings.catch_warnings(record=True) as warned:
2618+
dfs = row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client)
2619+
2620+
# Was a warning emitted?
2621+
matches = [
2622+
warning
2623+
for warning in warned
2624+
if warning.category is UserWarning
2625+
and "cannot use bqstorage_client" in str(warning).lower()
2626+
and "REST" in str(warning)
2627+
]
2628+
assert len(matches) == 1, "User warning was not emitted."
2629+
assert __file__ in str(matches[0]), "Warning emitted with incorrect stacklevel"
2630+
2631+
# Basic check of what we got as a result.
2632+
dataframes = list(dfs)
2633+
assert len(dataframes) == 2
2634+
assert isinstance(dataframes[0], pandas.DataFrame)
2635+
assert isinstance(dataframes[1], pandas.DataFrame)
2636+
25302637
@mock.patch("google.cloud.bigquery.table.pandas", new=None)
25312638
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
25322639
from google.cloud.bigquery.schema import SchemaField
@@ -2926,7 +3033,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self):
29263033
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
29273034

29283035
@unittest.skipIf(pandas is None, "Requires `pandas`")
2929-
def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
3036+
def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self):
29303037
from google.cloud.bigquery.schema import SchemaField
29313038

29323039
schema = [
@@ -2940,6 +3047,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
29403047
path = "/foo"
29413048
api_request = mock.Mock(return_value={"rows": rows})
29423049
mock_client = _mock_client()
3050+
mock_bqstorage_client = mock.sentinel.bq_storage_client
29433051

29443052
row_iterator = self._make_one(
29453053
client=mock_client,
@@ -2950,7 +3058,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
29503058
)
29513059

29523060
with warnings.catch_warnings(record=True) as warned:
2953-
row_iterator.to_dataframe(create_bqstorage_client=True)
3061+
row_iterator.to_dataframe(bqstorage_client=mock_bqstorage_client)
29543062

29553063
matches = [
29563064
warning
@@ -2960,6 +3068,46 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
29603068
and "REST" in str(warning)
29613069
]
29623070
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
3071+
self.assertIn(
3072+
__file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel"
3073+
)
3074+
mock_client._ensure_bqstorage_client.assert_not_called()
3075+
3076+
@unittest.skipIf(pandas is None, "Requires `pandas`")
3077+
def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self):
3078+
from google.cloud.bigquery.schema import SchemaField
3079+
3080+
schema = [
3081+
SchemaField("name", "STRING", mode="REQUIRED"),
3082+
SchemaField("age", "INTEGER", mode="REQUIRED"),
3083+
]
3084+
rows = [
3085+
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
3086+
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
3087+
]
3088+
path = "/foo"
3089+
api_request = mock.Mock(return_value={"rows": rows})
3090+
mock_client = _mock_client()
3091+
3092+
row_iterator = self._make_one(
3093+
client=mock_client,
3094+
api_request=api_request,
3095+
path=path,
3096+
schema=schema,
3097+
max_results=42,
3098+
)
3099+
3100+
with warnings.catch_warnings(record=True) as warned:
3101+
row_iterator.to_dataframe(create_bqstorage_client=True)
3102+
3103+
matches = [
3104+
warning
3105+
for warning in warned
3106+
if warning.category is UserWarning
3107+
and "cannot use bqstorage_client" in str(warning).lower()
3108+
and "REST" in str(warning)
3109+
]
3110+
self.assertFalse(matches)
29633111
mock_client._ensure_bqstorage_client.assert_not_called()
29643112

29653113
@unittest.skipIf(pandas is None, "Requires `pandas`")

0 commit comments

Comments
 (0)