diff --git a/tableauserverclient/server/endpoint/exceptions.py b/tableauserverclient/server/endpoint/exceptions.py index 757ca5552..3c9226f0f 100644 --- a/tableauserverclient/server/endpoint/exceptions.py +++ b/tableauserverclient/server/endpoint/exceptions.py @@ -50,6 +50,10 @@ class NonXMLResponseError(Exception): pass +class InvalidGraphQLQuery(Exception): + pass + + class GraphQLError(Exception): def __init__(self, error_payload): self.error = error_payload diff --git a/tableauserverclient/server/endpoint/metadata_endpoint.py b/tableauserverclient/server/endpoint/metadata_endpoint.py index 900b16fb2..9a265a019 100644 --- a/tableauserverclient/server/endpoint/metadata_endpoint.py +++ b/tableauserverclient/server/endpoint/metadata_endpoint.py @@ -1,27 +1,63 @@ from .endpoint import Endpoint, api -from .exceptions import GraphQLError - +from .exceptions import GraphQLError, InvalidGraphQLQuery import logging import json logger = logging.getLogger('tableau.endpoint.metadata') +def is_valid_paged_query(parsed_query): + """Check that the required $first and $afterToken variables are present in the query. + Also check that we are asking for the pageInfo object, so we get the endCursor. There + is no way to do this relilably without writing a GraphQL parser, so simply check that + that the string contains 'hasNextPage' and 'endCursor'""" + return all(k in parsed_query['variables'] for k in ('first', 'afterToken')) and \ + 'hasNextPage' in parsed_query['query'] and \ + 'endCursor' in parsed_query['query'] + + +def extract_values(obj, key): + """Pull all values of specified key from nested JSON. + Taken from: https://hackersandslackers.com/extract-data-from-complex-json-python/""" + arr = [] + + def extract(obj, arr, key): + """Recursively search for values of key in JSON tree.""" + if isinstance(obj, dict): + for k, v in obj.items(): + if isinstance(v, (dict, list)): + extract(v, arr, key) + elif k == key: + arr.append(v) + elif isinstance(obj, list): + for item in obj: + extract(item, arr, key) + return arr + + results = extract(obj, arr, key) + return results + + +def get_page_info(result): + next_page = extract_values(result, 'hasNextPage').pop() + cursor = extract_values(result, 'endCursor').pop() + return next_page, cursor + + class Metadata(Endpoint): @property def baseurl(self): return "{0}/api/metadata/graphql".format(self.parent_srv.server_address) - @api("3.2") + @api("3.5") def query(self, query, variables=None, abort_on_error=False): logger.info('Querying Metadata API') url = self.baseurl try: graphql_query = json.dumps({'query': query, 'variables': variables}) - except Exception: - # Place holder for now - raise Exception('Must provide a string') + except Exception as e: + raise InvalidGraphQLQuery('Must provide a string') # Setting content type because post_reuqest defaults to text/xml server_response = self.post_request(url, graphql_query, content_type='text/json') @@ -31,3 +67,55 @@ def query(self, query, variables=None, abort_on_error=False): raise GraphQLError(results['errors']) return results + + @api("3.5") + def paginated_query(self, query, variables=None, abort_on_error=False): + logger.info('Querying Metadata API using a Paged Query') + url = self.baseurl + + if variables is None: + # default paramaters + variables = {'first': 100, 'afterToken': None} + elif (('first' in variables) and ('afterToken' not in variables)): + # they passed a page size but not a token, probably because they're starting at `null` token + variables.update({'afterToken': None}) + + graphql_query = json.dumps({'query': query, 'variables': variables}) + parsed_query = json.loads(graphql_query) + + if not is_valid_paged_query(parsed_query): + raise InvalidGraphQLQuery('Paged queries must have a `$first` and `$afterToken` variables as well as ' + 'a pageInfo object with `endCursor` and `hasNextPage`') + + results_dict = {'pages': []} + paginated_results = results_dict['pages'] + + # get first page + server_response = self.post_request(url, graphql_query, content_type='text/json') + results = server_response.json() + + if abort_on_error and results.get('errors', None): + raise GraphQLError(results['errors']) + + paginated_results.append(results) + + # repeat + has_another_page, cursor = get_page_info(results) + + while has_another_page: + # Update the page + variables.update({'afterToken': cursor}) + # make the call + logger.debug("Calling Token: " + cursor) + graphql_query = json.dumps({'query': query, 'variables': variables}) + server_response = self.post_request(url, graphql_query, content_type='text/json') + results = server_response.json() + # verify response + if abort_on_error and results.get('errors', None): + raise GraphQLError(results['errors']) + # save results and repeat + paginated_results.append(results) + has_another_page, cursor = get_page_info(results) + + logger.info('Sucessfully got all results for paged query') + return results_dict diff --git a/test/assets/metadata_paged_1.json b/test/assets/metadata_paged_1.json new file mode 100644 index 000000000..c1cc0318e --- /dev/null +++ b/test/assets/metadata_paged_1.json @@ -0,0 +1,15 @@ +{ + "data": { + "publishedDatasourcesConnection": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "eyJ0eXBlIjoiUHVibGlzaGVkRGF0YXNvdXJjZSIsInNjb3BlIjoic2l0ZXMvMSIsInNvcnRPcmRlclZhbHVlIjp7Imxhc3RJZCI6IjAwMzllNWQ1LTI1ZmEtMTk2Yi1jNjZlLWMwNjc1ODM5ZTBiMCJ9fQ==" + }, + "nodes": [ + { + "id": "0039e5d5-25fa-196b-c66e-c0675839e0b0" + } + ] + } + } +} \ No newline at end of file diff --git a/test/assets/metadata_paged_2.json b/test/assets/metadata_paged_2.json new file mode 100644 index 000000000..af9601d59 --- /dev/null +++ b/test/assets/metadata_paged_2.json @@ -0,0 +1,15 @@ +{ + "data": { + "publishedDatasourcesConnection": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "eyJ0eXBlIjoiUHVibGlzaGVkRGF0YXNvdXJjZSIsInNjb3BlIjoic2l0ZXMvMSIsInNvcnRPcmRlclZhbHVlIjp7Imxhc3RJZCI6IjAwYjE5MWNlLTYwNTUtYWZmNS1lMjc1LWMyNjYxMGM4YzRkNiJ9fQ==" + }, + "nodes": [ + { + "id": "00b191ce-6055-aff5-e275-c26610c8c4d6" + } + ] + } + } +} \ No newline at end of file diff --git a/test/assets/metadata_paged_3.json b/test/assets/metadata_paged_3.json new file mode 100644 index 000000000..958a408ea --- /dev/null +++ b/test/assets/metadata_paged_3.json @@ -0,0 +1,15 @@ +{ + "data": { + "publishedDatasourcesConnection": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "eyJ0eXBlIjoiUHVibGlzaGVkRGF0YXNvdXJjZSIsInNjb3BlIjoic2l0ZXMvMSIsInNvcnRPcmRlclZhbHVlIjp7Imxhc3RJZCI6IjAyZjNlNGQ4LTg1NmEtZGEzNi1mNmM1LWM5MDA5NDVjNTdiOSJ9fQ==" + }, + "nodes": [ + { + "id": "02f3e4d8-856a-da36-f6c5-c900945c57b9" + } + ] + } + } +} \ No newline at end of file diff --git a/test/assets/metadata_query_expected_dict.dict b/test/assets/metadata_query_expected_dict.dict new file mode 100644 index 000000000..241b333d4 --- /dev/null +++ b/test/assets/metadata_query_expected_dict.dict @@ -0,0 +1,9 @@ +{'pages': [{'data': {'publishedDatasourcesConnection': {'nodes': [{'id': '0039e5d5-25fa-196b-c66e-c0675839e0b0'}], + 'pageInfo': {'endCursor': 'eyJ0eXBlIjoiUHVibGlzaGVkRGF0YXNvdXJjZSIsInNjb3BlIjoic2l0ZXMvMSIsInNvcnRPcmRlclZhbHVlIjp7Imxhc3RJZCI6IjAwMzllNWQ1LTI1ZmEtMTk2Yi1jNjZlLWMwNjc1ODM5ZTBiMCJ9fQ==', + 'hasNextPage': True}}}}, + {'data': {'publishedDatasourcesConnection': {'nodes': [{'id': '00b191ce-6055-aff5-e275-c26610c8c4d6'}], + 'pageInfo': {'endCursor': 'eyJ0eXBlIjoiUHVibGlzaGVkRGF0YXNvdXJjZSIsInNjb3BlIjoic2l0ZXMvMSIsInNvcnRPcmRlclZhbHVlIjp7Imxhc3RJZCI6IjAwYjE5MWNlLTYwNTUtYWZmNS1lMjc1LWMyNjYxMGM4YzRkNiJ9fQ==', + 'hasNextPage': True}}}}, + {'data': {'publishedDatasourcesConnection': {'nodes': [{'id': '02f3e4d8-856a-da36-f6c5-c900945c57b9'}], + 'pageInfo': {'endCursor': 'eyJ0eXBlIjoiUHVibGlzaGVkRGF0YXNvdXJjZSIsInNjb3BlIjoic2l0ZXMvMSIsInNvcnRPcmRlclZhbHVlIjp7Imxhc3RJZCI6IjAyZjNlNGQ4LTg1NmEtZGEzNi1mNmM1LWM5MDA5NDVjNTdiOSJ9fQ==', + 'hasNextPage': False}}}}]} \ No newline at end of file diff --git a/test/test_metadata.py b/test/test_metadata.py index e2a44734c..1c0846d73 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -10,6 +10,11 @@ METADATA_QUERY_SUCCESS = os.path.join(TEST_ASSET_DIR, 'metadata_query_success.json') METADATA_QUERY_ERROR = os.path.join(TEST_ASSET_DIR, 'metadata_query_error.json') +EXPECTED_PAGED_DICT = os.path.join(TEST_ASSET_DIR, 'metadata_query_expected_dict.dict') + +METADATA_PAGE_1 = os.path.join(TEST_ASSET_DIR, 'metadata_paged_1.json') +METADATA_PAGE_2 = os.path.join(TEST_ASSET_DIR, 'metadata_paged_2.json') +METADATA_PAGE_3 = os.path.join(TEST_ASSET_DIR, 'metadata_paged_3.json') EXPECTED_DICT = {'publishedDatasources': [{'id': '01cf92b2-2d17-b656-fc48-5c25ef6d5352', 'name': 'Batters (TestV1)'}, @@ -30,7 +35,7 @@ class MetadataTests(unittest.TestCase): def setUp(self): self.server = TSC.Server('http://test') self.baseurl = self.server.metadata.baseurl - self.server.version = "3.2" + self.server.version = "3.5" self.server._site_id = 'dad65087-b08b-4603-af4e-2887b8aafc67' self.server._auth_token = 'j80k54ll2lfMZ0tv97mlPvvSCRyD0DOM' @@ -46,6 +51,30 @@ def test_metadata_query(self): self.assertDictEqual(EXPECTED_DICT, datasources) + def test_paged_metadata_query(self): + with open(EXPECTED_PAGED_DICT, 'rb') as f: + expected = eval(f.read()) + + # prepare the 3 pages of results + with open(METADATA_PAGE_1, 'rb') as f: + result_1 = f.read().decode() + with open(METADATA_PAGE_2, 'rb') as f: + result_2 = f.read().decode() + with open(METADATA_PAGE_3, 'rb') as f: + result_3 = f.read().decode() + + with requests_mock.mock() as m: + m.post(self.baseurl, [{'text': result_1, 'status_code': 200}, + {'text': result_2, 'status_code': 200}, + {'text': result_3, 'status_code': 200}]) + + # validation checks for endCursor and hasNextPage, + # but the query text doesn't matter for the test + actual = self.server.metadata.paginated_query('fake query endCursor hasNextPage', + variables={'first': 1, 'afterToken': None}) + + self.assertDictEqual(expected, actual) + def test_metadata_query_ignore_error(self): with open(METADATA_QUERY_ERROR, 'rb') as f: response_json = json.loads(f.read().decode())