Skip to content

'offset' (for queries) not implemented correctly  #20

@crwilcox

Description

@crwilcox

If you have a large collection, with large objects, and a large offset, limits and offsets aren't respected as expected.

The below sample shows how this can happen.

b/146579743


# Imports the Google Cloud client library
from google.cloud import datastore
import six
import string


MAX_STRING = (string.ascii_lowercase * 58)[:1500]
TOTAL_OBJECTS = 1500

# Instantiates a client
datastore_client = datastore.Client()
KIND = "LargeKind"
def put_objects(count):
    # The name/ID for the new entity
    for i in range(count):
        name = f'sampletask{i:05d}'
        # The Cloud Datastore key for the new entity
        task_key = datastore_client.key(KIND, name)

        # Prepares the new entity
        task = datastore.Entity(key=task_key)
        task['name'] = f"{i:05d}"
        task['family'] = 'Stark'
        task['alive'] = False

        for i in string.ascii_lowercase:
            task[f'space-{i}'] = MAX_STRING
            
        # Saves the entity
        datastore_client.put(task)


def query_objects(total_entities):
    page_query = datastore_client.query(kind=KIND)
    page_query.add_filter("family", "=", "Stark")
    page_query.add_filter("alive",  "=", False)
    # page_query.order = "name"
    offset = 100 # This is greater than the number of entries.
    limit = 100
    
    def verify(limit, offset, expected):
        iterator = page_query.fetch(limit=limit, offset=offset)
        entities = [e for e in iterator]
        if len(entities) != expected:
            print(f"{limit}, {offset}, {expected}. Returned: {len(entities)}")
            #breakpoint()

    print("Verify that with no offset there are the correct # of results")
    verify(limit=None, offset=None, expected=TOTAL_OBJECTS)
    
    print("Verify that with no limit there are results (offset provided)")
    verify(limit=None, offset=900, expected=TOTAL_OBJECTS-900)    

    print("offset beyond items larger Verify 200 items found")
    verify(limit=200, offset=1100, expected=200)

    print("Offset within range, expect 50 despite larger limit")
    verify(limit=100, offset=TOTAL_OBJECTS-50, expected=50)

    print("offset beyond items larger Verify no items found")
    verify(limit=200, offset=TOTAL_OBJECTS+1000, expected=0)

# put_objects(TOTAL_OBJECTS)
query_objects(TOTAL_OBJECTS)

Metadata

Metadata

Assignees

Labels

api: datastoreIssues related to the googleapis/python-datastore API.priority: p1Important issue which blocks shipping the next release. Will be fixed prior to next release.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions