Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions samples/update_datasource_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
####
# This script demonstrates how to update the data within a published
# live-to-Hyper datasource on server.
#
# The sample is hardcoded against the `World Indicators` dataset and
# expects to receive the LUID of a published datasource containing
# that data. To create such a published datasource, you can use:
# ./publish_datasource.py --file ../test/assets/World\ Indicators.hyper
# which will print you the LUID of the datasource.
#
# Before running this script, the datasource will contain a region `Europe`.
# After running this script, that region will be gone.
#
####

import argparse
import uuid
import logging

import tableauserverclient as TSC


def main():
parser = argparse.ArgumentParser(description='Delete the `Europe` region from a published `World Indicators` datasource.')
# Common options; please keep those in sync across all samples
parser.add_argument('--server', '-s', required=True, help='server address')
parser.add_argument('--site', '-S', help='site name')
parser.add_argument('--token-name', '-p', required=True,
help='name of the personal access token used to sign into the server')
parser.add_argument('--token-value', '-v', required=True,
help='value of the personal access token used to sign into the server')
parser.add_argument('--logging-level', '-l', choices=['debug', 'info', 'error'], default='error',
help='desired logging level (set to error by default)')
# Options specific to this sample
parser.add_argument('datasource_id', help="The LUID of the `World Indicators` datasource")

args = parser.parse_args()

# Set logging level based on user input, or error by default
logging_level = getattr(logging, args.logging_level.upper())
logging.basicConfig(level=logging_level)

tableau_auth = TSC.PersonalAccessTokenAuth(args.token_name, args.token_value, site_id=args.site)
server = TSC.Server(args.server, use_server_version=True)
with server.auth.sign_in(tableau_auth):
# We use a unique `request_id` for every request.
# In case the submission of the update job fails, we won't know wether the job was submitted
# or not. It could be that the server received the request, changed the data, but then the
# network connection broke down.
# If you want to have a way to retry, e.g., inserts while making sure they aren't duplicated,
# you need to use `request_id` for that purpose.
# In our case, we don't care about retries. And the delete is idempotent anyway.
# Hence, we simply use a randomly generated request id.
request_id = str(uuid.uuid4())

# This action will delete all rows with `Region=Europe` from the published data source.
# Other actions (inserts, updates, ...) are also available. For more information see
# https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_how_to_update_data_to_hyper.htm
actions = [
{
"action": "delete",
"target-table": "Extract",
"target-schema": "Extract",
"condition": {"op": "eq", "target-col": "Region", "const": {"type": "string", "v": "Europe"}}
}
]

job = server.datasources.update_data(args.datasource_id, request_id=request_id, actions=actions)

# TODO: Add a flag that will poll and wait for the returned job to be done
print(job)

if __name__ == '__main__':
main()
29 changes: 29 additions & 0 deletions tableauserverclient/server/endpoint/datasources_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import copy
import cgi
from contextlib import closing
import json

# The maximum size of a file that can be published in a single request is 64MB
FILESIZE_LIMIT = 1024 * 1024 * 64 # 64MB
Expand Down Expand Up @@ -282,6 +283,34 @@ def publish(
logger.info("Published {0} (ID: {1})".format(filename, new_datasource.id))
return new_datasource

@api(version="3.13")
def update_data(self, datasource_or_connection_item, *, request_id, actions, payload = None):
Comment on lines +286 to +287

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some documentation about the parameters and what this method does + links to the documentation of the API?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see #893

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this work? Do we split the documentation from the code or is this somehow merged?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for TSC, documentation and code are separated.
Not sure why, but that's how it is...
(Personally, I would prefer if functions would be documented in the source code itself, and the docs were generated from the source code, but that's not how TSC is currently handling its documentation)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this use BackgroundJobItem instead of JobItem?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what the difference between JobItem and BackgroundJobItem is.
@t8y8, @bcantoni which one should I use here?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been trying to work out if the update_data method should wait for complete or return the jobitem and it looks like returning the jobitem is more consistent as this matches the other async method here (refresh).

To make this easier to use would you be open to adding a convenience method to the Jobs endpoint to wait for job completion - (e.g.
`
@api(version="3.13")
def wait_for_async_job(self, async_job_id, timeout=None):

    ASYNC_JOB_POLL_INTERVAL=30
    completed_at = None
    finish_code = None
    jobinfo = None

    wait_start_time = time.time()
    
    while completed_at is None:
        time.sleep(ASYNC_JOB_POLL_INTERVAL)
        jobinfo = self.get_by_id(async_job_id)
        completed_at = jobinfo.completed_at
        if completed_at is None:
            logger.debug("Job {} ... progress={}".format(async_job_id, jobinfo.progress))
            if timeout is not None:
                wait_elapsed_time = (time.time() - wait_start_time) * 1000
                if wait_elapsed_time > timeout:
                    raise TimeoutError(f"Timeout after {wait_elapsed_time} seconds waiting for asynchronous job: {async_job_id}")
        else:
            finish_code = jobinfo.finish_code
        
    logger.info("Job {} Completed: Finish Code: {} - Notes:{}".format(async_job_id, finish_code, jobinfo.notes))
    return finish_code

`

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think that the need to write the json for the "actions" parameter makes this harder to use than it needs to be? - I think we should keep this method so that you have the flexibility of writing your own 'actions' json but also create some convenience methods to pass the options as parameters - e.g.

def update_datasource(
    self, datasource_or_connection_item, *, 
    payload, 
    matching_columns=[['ROWID','ROWID']], 
    source_schema="Extract", 
    source_table="Extract", 
    target_schema="Extract", 
    target_table="Extract"
):
    match_conditions_args = []
    for match_pair in matching_columns:
        match_conditions_args.append(
            {
                "op": "eq",
                "source-col": match_pair[0],
                "target-col": match_pair[1],
            }
        )
    if len(match_conditions_args) > 1:
        match_conditions_json = {"op": "and", "args": match_conditions_args}
    else:
        match_conditions_json = match_conditions_args[0]

    json_request = [
        # UPDATE action
        {
            "action": "update",
            "source-schema": source_schema,
            "source-table": source_table,
            "target-schema": target_schema,
            "target-table": target_table,
            "condition": match_conditions_json,
        },
    ]

    request_id = str(uuid.uuid4())
    return self.update_data(datasource_or_connection_item, request_id, json_request, payload)

i.e. hide some of the internals and allow an update by just calling:
server.datasources.update_datasource(datasource_id, path_to_database,[[keycol1,keycol1],[keycol2,keycol2]])

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding a convenience method to the Jobs endpoint to wait for job completion

Implemented a proposal in #903. Please provide feedback on the proposed wait_for_job in that other PR.

create some convenience methods to pass the options as parameters

I would prefer not to do so. We have a documented and powerful "actions" language, i.e. the JSON structures. We shouldn't invent yet another "wrapper mini-language" here in this Python library.

Common cases (such as a short-hand notation for matching_columns which doesn't need explicit op: eq elements) should rather be added on the server-side. That way, all clients (also JavaScript clients, etc.) can benefit from it.

(actually, there is a proposal for a matching_colums short-hand syntax. It's just that we never implemented it... @jonas-eckhardt do you think we should re-prioritize that convenience feature on the server-side?)

if isinstance(datasource_or_connection_item, DatasourceItem):
datasource_id = datasource_or_connection_item.id
url = "{0}/{1}/data".format(self.baseurl, datasource_id)
elif isinstance(datasource_or_connection_item, ConnectionItem):
datasource_id = datasource_or_connection_item.datasource_id
connection_id = datasource_or_connection_item.id
url = "{0}/{1}/connections/{2}/data".format(self.baseurl, datasource_id, connection_id)
else:
assert isinstance(datasource_or_connection_item, str)
url = "{0}/{1}/data".format(self.baseurl, datasource_or_connection_item)

if payload is not None:
if not os.path.isfile(payload):
error = "File path does not lead to an existing file."
raise IOError(error)

logger.info("Uploading {0} to server with chunking method for Update job".format(payload))
upload_session_id = self.parent_srv.fileuploads.upload(payload)
url = "{0}?uploadSessionId={1}".format(url, upload_session_id)

json_request = json.dumps({"actions": actions})
parameters = {"headers": {"requestid": request_id}}
server_response = self.patch_request(url, json_request, "application/json", parameters=parameters)
new_job = JobItem.from_response(server_response.content, self.parent_srv.namespace)[0]
return new_job

@api(version="2.0")
def populate_permissions(self, item):
self._permissions.populate(item)
Expand Down
17 changes: 15 additions & 2 deletions tableauserverclient/server/endpoint/endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ def _make_request(
):
parameters = parameters or {}
parameters.update(self.parent_srv.http_options)
parameters["headers"] = Endpoint._make_common_headers(auth_token, content_type)
if not "headers" in parameters:
parameters["headers"] = {}
parameters["headers"].update(Endpoint._make_common_headers(auth_token, content_type))

if content is not None:
parameters["data"] = content
Expand Down Expand Up @@ -118,13 +120,14 @@ def delete_request(self, url):
# We don't return anything for a delete
self._make_request(self.parent_srv.session.delete, url, auth_token=self.parent_srv.auth_token)

def put_request(self, url, xml_request=None, content_type="text/xml"):
def put_request(self, url, xml_request=None, content_type="text/xml", parameters=None):
return self._make_request(
self.parent_srv.session.put,
url,
content=xml_request,
auth_token=self.parent_srv.auth_token,
content_type=content_type,
parameters=parameters,
)

def post_request(self, url, xml_request, content_type="text/xml", parameters=None):
Expand All @@ -137,6 +140,16 @@ def post_request(self, url, xml_request, content_type="text/xml", parameters=Non
parameters=parameters,
)

def patch_request(self, url, xml_request, content_type="text/xml", parameters=None):
return self._make_request(
self.parent_srv.session.patch,
url,
content=xml_request,
auth_token=self.parent_srv.auth_token,
content_type=content_type,
parameters=parameters,
)


def api(version):
"""Annotate the minimum supported version for an endpoint.
Expand Down
9 changes: 9 additions & 0 deletions test/assets/datasource_data_update.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version='1.0' encoding='UTF-8'?>
<tsResponse xmlns="http://tableau.com/api" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://tableau.com/api https://help.tableau.com/samples/en-us/rest_api/ts-api_3_14.xsd">
<job id="5c0ba560-c959-424e-b08a-f32ef0bfb737" mode="Asynchronous" type="UpdateUploadedFile" createdAt="2021-09-18T09:40:12Z">
<updateUploadedFileJob>
<datasource id="9dbd2263-16b5-46e1-9c43-a76bb8ab65fb" name="test datasource"/>
<connectionLuid>7ecaccd8-39b0-4875-a77d-094f6e930019</connectionLuid>
</updateUploadedFileJob>
</job>
</tsResponse>
82 changes: 82 additions & 0 deletions test/test_datasource.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from tableauserverclient.server.endpoint.fileuploads_endpoint import Fileuploads
import unittest
from io import BytesIO
import os
Expand All @@ -22,6 +23,7 @@
PUBLISH_XML_ASYNC = 'datasource_publish_async.xml'
REFRESH_XML = 'datasource_refresh.xml'
UPDATE_XML = 'datasource_update.xml'
UPDATE_DATA_XML = 'datasource_data_update.xml'
UPDATE_CONNECTION_XML = 'datasource_connection_update.xml'


Expand Down Expand Up @@ -355,6 +357,86 @@ def test_refresh_object(self):
# We only check the `id`; remaining fields are already tested in `test_refresh_id`
self.assertEqual('7c3d599e-949f-44c3-94a1-f30ba85757e4', new_job.id)

def test_update_data_datasource_object(self):
"""Calling `update_data` with a `DatasourceItem` should update that datasource"""
self.server.version = "3.13"
self.baseurl = self.server.datasources.baseurl

datasource = TSC.DatasourceItem('')
datasource._id = '9dbd2263-16b5-46e1-9c43-a76bb8ab65fb'
response_xml = read_xml_asset(UPDATE_DATA_XML)
with requests_mock.mock() as m:
m.patch(self.baseurl + '/9dbd2263-16b5-46e1-9c43-a76bb8ab65fb/data',
status_code=202, headers={"requestid": "test_id"}, text=response_xml)
new_job = self.server.datasources.update_data(datasource, request_id="test_id", actions=[])

self.assertEqual('5c0ba560-c959-424e-b08a-f32ef0bfb737', new_job.id)
self.assertEqual('UpdateUploadedFile', new_job.type)
self.assertEqual(None, new_job.progress)
self.assertEqual('2021-09-18T09:40:12Z', format_datetime(new_job.created_at))
self.assertEqual(-1, new_job.finish_code)

def test_update_data_connection_object(self):
"""Calling `update_data` with a `ConnectionItem` should update that connection"""
self.server.version = "3.13"
self.baseurl = self.server.datasources.baseurl

connection = TSC.ConnectionItem()
connection._datasource_id = '9dbd2263-16b5-46e1-9c43-a76bb8ab65fb'
connection._id = '7ecaccd8-39b0-4875-a77d-094f6e930019'
response_xml = read_xml_asset(UPDATE_DATA_XML)
with requests_mock.mock() as m:
m.patch(self.baseurl + '/9dbd2263-16b5-46e1-9c43-a76bb8ab65fb/connections/7ecaccd8-39b0-4875-a77d-094f6e930019/data',
status_code=202, headers={"requestid": "test_id"}, text=response_xml)
new_job = self.server.datasources.update_data(connection, request_id="test_id", actions=[])

# We only check the `id`; remaining fields are already tested in `test_update_data_datasource_object`
self.assertEqual('5c0ba560-c959-424e-b08a-f32ef0bfb737', new_job.id)

def test_update_data_datasource_string(self):
"""For convenience, calling `update_data` with a `str` should update the datasource with the corresponding UUID"""
self.server.version = "3.13"
self.baseurl = self.server.datasources.baseurl

datasource_id = '9dbd2263-16b5-46e1-9c43-a76bb8ab65fb'
response_xml = read_xml_asset(UPDATE_DATA_XML)
with requests_mock.mock() as m:
m.patch(self.baseurl + '/9dbd2263-16b5-46e1-9c43-a76bb8ab65fb/data',
status_code=202, headers={"requestid": "test_id"}, text=response_xml)
new_job = self.server.datasources.update_data(datasource_id, request_id="test_id", actions=[])

# We only check the `id`; remaining fields are already tested in `test_update_data_datasource_object`
self.assertEqual('5c0ba560-c959-424e-b08a-f32ef0bfb737', new_job.id)

def test_update_data_datasource_payload_file(self):
"""If `payload` is present, we upload it and associate the job with it"""
self.server.version = "3.13"
self.baseurl = self.server.datasources.baseurl

datasource_id = '9dbd2263-16b5-46e1-9c43-a76bb8ab65fb'
mock_upload_id = '10051:c3e56879876842d4b3600f20c1f79876-0:0'
response_xml = read_xml_asset(UPDATE_DATA_XML)
with requests_mock.mock() as rm, \
unittest.mock.patch.object(Fileuploads, "upload", return_value=mock_upload_id):
rm.patch(self.baseurl + '/9dbd2263-16b5-46e1-9c43-a76bb8ab65fb/data?uploadSessionId=' + mock_upload_id,
status_code=202, headers={"requestid": "test_id"}, text=response_xml)
new_job = self.server.datasources.update_data(datasource_id, request_id="test_id",
actions=[], payload=asset('World Indicators.hyper'))

# We only check the `id`; remaining fields are already tested in `test_update_data_datasource_object`
self.assertEqual('5c0ba560-c959-424e-b08a-f32ef0bfb737', new_job.id)

def test_update_data_datasource_invalid_payload_file(self):
"""If `payload` points to a non-existing file, we report an error"""
self.server.version = "3.13"
self.baseurl = self.server.datasources.baseurl
datasource_id = '9dbd2263-16b5-46e1-9c43-a76bb8ab65fb'
with self.assertRaises(IOError) as cm:
self.server.datasources.update_data(datasource_id, request_id="test_id",
actions=[], payload='no/such/file.missing')
exception = cm.exception
self.assertEqual(str(exception), "File path does not lead to an existing file.")

def test_delete(self):
with requests_mock.mock() as m:
m.delete(self.baseurl + '/9dbd2263-16b5-46e1-9c43-a76bb8ab65fb', status_code=204)
Expand Down