Skip to content

Commit 090c5d5

Browse files
authored
Merge pull request #2594 from dhermes/separate-pages-iter
Implement Iterator.pages and simplify items iteration
2 parents 43c017e + 9a504c6 commit 090c5d5

File tree

9 files changed

+184
-235
lines changed

9 files changed

+184
-235
lines changed

bigquery/unit_tests/test_client.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def test_ctor(self):
3535
self.assertIs(client.connection.http, http)
3636

3737
def test_list_projects_defaults(self):
38+
import six
3839
from google.cloud.bigquery.client import Project
3940
PROJECT_1 = 'PROJECT_ONE'
4041
PROJECT_2 = 'PROJECT_TWO'
@@ -60,8 +61,8 @@ def test_list_projects_defaults(self):
6061
conn = client.connection = _Connection(DATA)
6162

6263
iterator = client.list_projects()
63-
iterator.update_page()
64-
projects = list(iterator.page)
64+
page = six.next(iterator.pages)
65+
projects = list(page)
6566
token = iterator.next_page_token
6667

6768
self.assertEqual(len(projects), len(DATA['projects']))
@@ -78,6 +79,8 @@ def test_list_projects_defaults(self):
7879
self.assertEqual(req['path'], '/%s' % PATH)
7980

8081
def test_list_projects_explicit_response_missing_projects_key(self):
82+
import six
83+
8184
PROJECT = 'PROJECT'
8285
PATH = 'projects'
8386
TOKEN = 'TOKEN'
@@ -87,8 +90,8 @@ def test_list_projects_explicit_response_missing_projects_key(self):
8790
conn = client.connection = _Connection(DATA)
8891

8992
iterator = client.list_projects(max_results=3, page_token=TOKEN)
90-
iterator.update_page()
91-
projects = list(iterator.page)
93+
page = six.next(iterator.pages)
94+
projects = list(page)
9295
token = iterator.next_page_token
9396

9497
self.assertEqual(len(projects), 0)
@@ -102,6 +105,7 @@ def test_list_projects_explicit_response_missing_projects_key(self):
102105
{'maxResults': 3, 'pageToken': TOKEN})
103106

104107
def test_list_datasets_defaults(self):
108+
import six
105109
from google.cloud.bigquery.dataset import Dataset
106110
PROJECT = 'PROJECT'
107111
DATASET_1 = 'dataset_one'
@@ -128,8 +132,8 @@ def test_list_datasets_defaults(self):
128132
conn = client.connection = _Connection(DATA)
129133

130134
iterator = client.list_datasets()
131-
iterator.update_page()
132-
datasets = list(iterator.page)
135+
page = six.next(iterator.pages)
136+
datasets = list(page)
133137
token = iterator.next_page_token
134138

135139
self.assertEqual(len(datasets), len(DATA['datasets']))
@@ -145,6 +149,8 @@ def test_list_datasets_defaults(self):
145149
self.assertEqual(req['path'], '/%s' % PATH)
146150

147151
def test_list_datasets_explicit_response_missing_datasets_key(self):
152+
import six
153+
148154
PROJECT = 'PROJECT'
149155
PATH = 'projects/%s/datasets' % PROJECT
150156
TOKEN = 'TOKEN'
@@ -155,8 +161,8 @@ def test_list_datasets_explicit_response_missing_datasets_key(self):
155161

156162
iterator = client.list_datasets(
157163
include_all=True, max_results=3, page_token=TOKEN)
158-
iterator.update_page()
159-
datasets = list(iterator.page)
164+
page = six.next(iterator.pages)
165+
datasets = list(page)
160166
token = iterator.next_page_token
161167

162168
self.assertEqual(len(datasets), 0)
@@ -189,6 +195,7 @@ def test_job_from_resource_unknown_type(self):
189195
client.job_from_resource({'configuration': {'nonesuch': {}}})
190196

191197
def test_list_jobs_defaults(self):
198+
import six
192199
from google.cloud.bigquery.job import LoadTableFromStorageJob
193200
from google.cloud.bigquery.job import CopyJob
194201
from google.cloud.bigquery.job import ExtractTableToStorageJob
@@ -301,8 +308,8 @@ def test_list_jobs_defaults(self):
301308
conn = client.connection = _Connection(DATA)
302309

303310
iterator = client.list_jobs()
304-
iterator.update_page()
305-
jobs = list(iterator.page)
311+
page = six.next(iterator.pages)
312+
jobs = list(page)
306313
token = iterator.next_page_token
307314

308315
self.assertEqual(len(jobs), len(DATA['jobs']))
@@ -319,6 +326,7 @@ def test_list_jobs_defaults(self):
319326
self.assertEqual(req['query_params'], {'projection': 'full'})
320327

321328
def test_list_jobs_load_job_wo_sourceUris(self):
329+
import six
322330
from google.cloud.bigquery.job import LoadTableFromStorageJob
323331
PROJECT = 'PROJECT'
324332
DATASET = 'test_dataset'
@@ -356,8 +364,8 @@ def test_list_jobs_load_job_wo_sourceUris(self):
356364
conn = client.connection = _Connection(DATA)
357365

358366
iterator = client.list_jobs()
359-
iterator.update_page()
360-
jobs = list(iterator.page)
367+
page = six.next(iterator.pages)
368+
jobs = list(page)
361369
token = iterator.next_page_token
362370

363371
self.assertEqual(len(jobs), len(DATA['jobs']))
@@ -374,6 +382,7 @@ def test_list_jobs_load_job_wo_sourceUris(self):
374382
self.assertEqual(req['query_params'], {'projection': 'full'})
375383

376384
def test_list_jobs_explicit_missing(self):
385+
import six
377386
PROJECT = 'PROJECT'
378387
PATH = 'projects/%s/jobs' % PROJECT
379388
DATA = {}
@@ -384,8 +393,8 @@ def test_list_jobs_explicit_missing(self):
384393

385394
iterator = client.list_jobs(max_results=1000, page_token=TOKEN,
386395
all_users=True, state_filter='done')
387-
iterator.update_page()
388-
jobs = list(iterator.page)
396+
page = six.next(iterator.pages)
397+
jobs = list(page)
389398
token = iterator.next_page_token
390399

391400
self.assertEqual(len(jobs), 0)

core/google/cloud/iterator.py

Lines changed: 60 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -43,78 +43,44 @@
4343
... break
4444
4545
When iterating, not every new item will send a request to the server.
46-
To monitor these requests, track the current page of the iterator::
46+
To iterate based on each page of items (where a page corresponds to
47+
a request)::
4748
4849
>>> iterator = Iterator(...)
49-
>>> iterator.page_number
50-
0
51-
>>> next(iterator)
52-
<MyItemClass at 0x7f1d3cccf690>
53-
>>> iterator.page_number
54-
1
55-
>>> iterator.page.remaining
56-
1
57-
>>> next(iterator)
58-
<MyItemClass at 0x7f1d3cccfe90>
59-
>>> iterator.page_number
60-
1
61-
>>> iterator.page.remaining
62-
0
63-
>>> next(iterator)
64-
<MyItemClass at 0x7f1d3cccffd0>
65-
>>> iterator.page_number
66-
2
67-
>>> iterator.page.remaining
68-
19
69-
70-
It's also possible to consume an entire page and handle the paging process
71-
manually::
72-
73-
>>> iterator = Iterator(...)
74-
>>> # Manually pull down the first page.
75-
>>> iterator.update_page()
76-
>>> items = list(iterator.page)
77-
>>> items
50+
>>> for page in iterator.pages:
51+
... print('=' * 20)
52+
... print(' Page number: %d' % (iterator.page_number,))
53+
... print(' Items in page: %d' % (page.num_items,))
54+
... print(' First item: %r' % (next(page),))
55+
... print('Items remaining: %d' % (page.remaining,))
56+
... print('Next page token: %s' % (iterator.next_page_token,))
57+
====================
58+
Page number: 1
59+
Items in page: 1
60+
First item: <MyItemClass at 0x7f1d3cccf690>
61+
Items remaining: 0
62+
Next page token: eav1OzQB0OM8rLdGXOEsyQWSG
63+
====================
64+
Page number: 2
65+
Items in page: 19
66+
First item: <MyItemClass at 0x7f1d3cccffd0>
67+
Items remaining: 18
68+
Next page token: None
69+
70+
To consume an entire page::
71+
72+
>>> list(page)
7873
[
7974
<MyItemClass at 0x7fd64a098ad0>,
8075
<MyItemClass at 0x7fd64a098ed0>,
8176
<MyItemClass at 0x7fd64a098e90>,
8277
]
83-
>>> iterator.page.remaining
84-
0
85-
>>> iterator.page.num_items
86-
3
87-
>>> iterator.next_page_token
88-
'eav1OzQB0OM8rLdGXOEsyQWSG'
89-
>>>
90-
>>> # Ask for the next page to be grabbed.
91-
>>> iterator.update_page()
92-
>>> list(iterator.page)
93-
[
94-
<MyItemClass at 0x7fea740abdd0>,
95-
<MyItemClass at 0x7fea740abe50>,
96-
]
97-
>>>
98-
>>> # When there are no more results
99-
>>> iterator.next_page_token is None
100-
True
101-
>>> iterator.update_page()
102-
>>> iterator.page is None
103-
True
10478
"""
10579

10680

10781
import six
10882

10983

110-
_UNSET = object()
111-
_NO_MORE_PAGES_ERR = 'Iterator has no more pages.'
112-
_UNSTARTED_ERR = (
113-
'Iterator has not been started. Either begin iterating, '
114-
'call next(my_iter) or call my_iter.update_page().')
115-
_PAGE_ERR_TEMPLATE = (
116-
'Tried to update the page while current page (%r) still has %d '
117-
'items remaining.')
11884
DEFAULT_ITEMS_KEY = 'items'
11985
"""The dictionary key used to retrieve items from each response."""
12086

@@ -261,7 +227,6 @@ def __init__(self, client, path, item_to_value,
261227
self.page_number = 0
262228
self.next_page_token = page_token
263229
self.num_results = 0
264-
self._page = _UNSET
265230

266231
def _verify_params(self):
267232
"""Verifies the parameters don't use any reserved parameter.
@@ -274,82 +239,53 @@ def _verify_params(self):
274239
raise ValueError('Using a reserved parameter',
275240
reserved_in_use)
276241

277-
@property
278-
def page(self):
279-
"""The current page of results that has been retrieved.
280-
281-
If there are no more results, will return :data:`None`.
242+
def _pages_iter(self):
243+
"""Generator of pages of API responses.
282244
283-
:rtype: :class:`Page`
284-
:returns: The page of items that has been retrieved.
285-
:raises AttributeError: If the page has not been set.
245+
Yields :class:`Page` instances.
286246
"""
287-
if self._page is _UNSET:
288-
raise AttributeError(_UNSTARTED_ERR)
289-
return self._page
247+
while self._has_next_page():
248+
response = self._get_next_page_response()
249+
page = Page(self, response, self._items_key,
250+
self._item_to_value)
251+
self._page_start(self, page, response)
252+
self.num_results += page.num_items
253+
yield page
290254

291-
def __iter__(self):
292-
"""The :class:`Iterator` is an iterator.
255+
@property
256+
def pages(self):
257+
"""Iterator of pages in the response.
293258
294-
:rtype: :class:`Iterator`
295-
:returns: Current instance.
259+
:rtype: :class:`~types.GeneratorType`
260+
:returns: A generator of :class:`Page` instances.
296261
:raises ValueError: If the iterator has already been started.
297262
"""
298263
if self._started:
299264
raise ValueError('Iterator has already started', self)
300265
self._started = True
301-
return self
302-
303-
def update_page(self, require_empty=True):
304-
"""Move to the next page in the result set.
305-
306-
If the current page is not empty and ``require_empty`` is :data:`True`
307-
then an exception will be raised. If the current page is not empty
308-
and ``require_empty`` is :data:`False`, then this will return
309-
without updating the current page.
310-
311-
If the current page **is** empty, but there are no more results,
312-
sets the current page to :data:`None`.
266+
return self._pages_iter()
267+
268+
def _items_iter(self):
269+
"""Iterator for each item returned."""
270+
for page in self._pages_iter():
271+
# Decrement the total results since the pages iterator adds
272+
# to it when each page is encountered.
273+
self.num_results -= page.num_items
274+
for item in page:
275+
self.num_results += 1
276+
yield item
313277

314-
If there are no more pages, throws an exception.
315-
316-
:type require_empty: bool
317-
:param require_empty: (Optional) Flag to indicate if the current page
318-
must be empty before updating.
278+
def __iter__(self):
279+
"""Iterator for each item returned.
319280
320-
:raises ValueError: If ``require_empty`` is :data:`True` but the
321-
current page is not empty.
322-
:raises ValueError: If there are no more pages.
281+
:rtype: :class:`~types.GeneratorType`
282+
:returns: A generator of items from the API.
283+
:raises ValueError: If the iterator has already been started.
323284
"""
324-
if self._page is None:
325-
raise ValueError(_NO_MORE_PAGES_ERR)
326-
327-
# NOTE: This assumes Page.remaining can never go below 0.
328-
page_empty = self._page is _UNSET or self._page.remaining == 0
329-
if page_empty:
330-
if self._has_next_page():
331-
response = self._get_next_page_response()
332-
self._page = Page(self, response, self._items_key,
333-
self._item_to_value)
334-
self._page_start(self, self._page, response)
335-
else:
336-
self._page = None
337-
else:
338-
if require_empty:
339-
msg = _PAGE_ERR_TEMPLATE % (self._page, self.page.remaining)
340-
raise ValueError(msg)
341-
342-
def next(self):
343-
"""Get the next item from the request."""
344-
self.update_page(require_empty=False)
345-
if self.page is None:
346-
raise StopIteration
347-
item = six.next(self.page)
348-
self.num_results += 1
349-
return item
350-
351-
# Alias needed for Python 2/3 support.
352-
__next__ = next
285+
if self._started:
286+
raise ValueError('Iterator has already started', self)
287+
self._started = True
288+
return self._items_iter()
353289

354290
def _has_next_page(self):
355291
"""Determines whether or not there are more pages with results.

0 commit comments

Comments
 (0)