diff --git a/tableauserverclient/filesys_helpers.py b/tableauserverclient/filesys_helpers.py index 3d6417464..11051fdf4 100644 --- a/tableauserverclient/filesys_helpers.py +++ b/tableauserverclient/filesys_helpers.py @@ -30,13 +30,33 @@ def get_file_object_size(file): return file_size -def file_is_compressed(file): - # Determine if file is a zip file or not - # This reference lists magic file signatures: https://www.garykessler.net/library/file_sigs.html - - zip_file_signature = b'PK\x03\x04' +def get_file_type(file): + # Tableau workbooks (twb) and data sources (tds) are both stored as xml files. + # Packaged workbooks (twbx) and data sources (tdsx) are zip files + # containing original files accompanied with supporting local files. - is_zip_file = file.read(len(zip_file_signature)) == zip_file_signature + # This reference lists magic file signatures: https://www.garykessler.net/library/file_sigs.html + MAGIC_BYTES = { + 'zip': bytes.fromhex("504b0304"), + 'tde': bytes.fromhex("20020162"), + 'xml': bytes.fromhex("3c3f786d6c20"), + 'hyper': bytes.fromhex("487970657208000001000000") + } + + # Peek first bytes of a file + first_bytes = file.read(32) + + file_type = None + for ft, signature in MAGIC_BYTES.items(): + if first_bytes.startswith(signature): + file_type = ft + break + + # Return pointer back to start file.seek(0) - return is_zip_file + if file_type is None: + error = "Unknown file type!" + raise ValueError(error) + + return file_type diff --git a/tableauserverclient/server/endpoint/datasources_endpoint.py b/tableauserverclient/server/endpoint/datasources_endpoint.py index 7916d7571..0d1e497b4 100644 --- a/tableauserverclient/server/endpoint/datasources_endpoint.py +++ b/tableauserverclient/server/endpoint/datasources_endpoint.py @@ -5,7 +5,7 @@ from .resource_tagger import _ResourceTagger from .. import RequestFactory, DatasourceItem, PaginationItem, ConnectionItem from ..query import QuerySet -from ...filesys_helpers import to_filename, make_download_path +from ...filesys_helpers import to_filename, make_download_path, get_file_type, get_file_object_size from ...models.job_item import JobItem import os @@ -173,22 +173,45 @@ def delete_extract(self, datasource_item): @api(version="2.0") @parameter_added_in(connections="2.8") @parameter_added_in(as_job='3.0') - def publish(self, datasource_item, file_path, mode, connection_credentials=None, connections=None, as_job=False): - if not os.path.isfile(file_path): - error = "File path does not lead to an existing file." - raise IOError(error) - if not mode or not hasattr(self.parent_srv.PublishMode, mode): - error = 'Invalid mode defined.' - raise ValueError(error) + def publish(self, datasource_item, file, mode, connection_credentials=None, connections=None, as_job=False): + + try: + + if not os.path.isfile(file): + error = "File path does not lead to an existing file." + raise IOError(error) + + filename = os.path.basename(file) + file_extension = os.path.splitext(filename)[1][1:] + file_size = os.path.getsize(file) + + # If name is not defined, grab the name from the file to publish + if not datasource_item.name: + datasource_item.name = os.path.splitext(filename)[0] + if file_extension not in ALLOWED_FILE_EXTENSIONS: + error = "Only {} files can be published as datasources.".format(', '.join(ALLOWED_FILE_EXTENSIONS)) + raise ValueError(error) - filename = os.path.basename(file_path) - file_extension = os.path.splitext(filename)[1][1:] + except TypeError: - # If name is not defined, grab the name from the file to publish - if not datasource_item.name: - datasource_item.name = os.path.splitext(filename)[0] - if file_extension not in ALLOWED_FILE_EXTENSIONS: - error = "Only {} files can be published as datasources.".format(', '.join(ALLOWED_FILE_EXTENSIONS)) + if not datasource_item.name: + error = "Datasource item must have a name when passing a file object" + raise ValueError(error) + + file_type = get_file_type(file) + if file_type == 'zip': + file_extension = 'tdsx' + elif file_type == 'xml': + file_extension = 'tds' + else: + error = "Unsupported file type {}".format(file_type) + raise ValueError(error) + + filename = "{}.{}".format(datasource_item.name, file_extension) + file_size = get_file_object_size(file) + + if not mode or not hasattr(self.parent_srv.PublishMode, mode): + error = 'Invalid mode defined.' raise ValueError(error) # Construct the url with the defined mode @@ -200,17 +223,22 @@ def publish(self, datasource_item, file_path, mode, connection_credentials=None, url += '&{0}=true'.format('asJob') # Determine if chunking is required (64MB is the limit for single upload method) - if os.path.getsize(file_path) >= FILESIZE_LIMIT: + if file_size >= FILESIZE_LIMIT: logger.info('Publishing {0} to server with chunking method (datasource over 64MB)'.format(filename)) - upload_session_id = Fileuploads.upload_chunks(self.parent_srv, file_path) + upload_session_id = Fileuploads.upload_chunks(self.parent_srv, file) url = "{0}&uploadSessionId={1}".format(url, upload_session_id) xml_request, content_type = RequestFactory.Datasource.publish_req_chunked(datasource_item, connection_credentials, connections) else: logger.info('Publishing {0} to server'.format(filename)) - with open(file_path, 'rb') as f: - file_contents = f.read() + + try: + with open(file, 'rb') as f: + file_contents = f.read() + except TypeError: + file_contents = file.read() + xml_request, content_type = RequestFactory.Datasource.publish_req(datasource_item, filename, file_contents, diff --git a/tableauserverclient/server/endpoint/workbooks_endpoint.py b/tableauserverclient/server/endpoint/workbooks_endpoint.py index c59ae9cce..62f94f99a 100644 --- a/tableauserverclient/server/endpoint/workbooks_endpoint.py +++ b/tableauserverclient/server/endpoint/workbooks_endpoint.py @@ -5,7 +5,7 @@ from .resource_tagger import _ResourceTagger from .. import RequestFactory, WorkbookItem, ConnectionItem, ViewItem, PaginationItem from ...models.job_item import JobItem -from ...filesys_helpers import to_filename, make_download_path, file_is_compressed, get_file_object_size +from ...filesys_helpers import to_filename, make_download_path, get_file_type, get_file_object_size import os import logging @@ -284,7 +284,16 @@ def publish( except TypeError: # Expect file to be a file object file_size = get_file_object_size(file) - file_extension = 'twbx' if file_is_compressed(file) else 'twb' + + file_type = get_file_type(file) + + if file_type == 'zip': + file_extension = 'twbx' + elif file_type == 'xml': + file_extension = 'twb' + else: + error = 'Unsupported file type {}!'.format(file_type) + raise ValueError(error) if not workbook_item.name: error = "Workbook item must have a name when passing a file object" diff --git a/test/assets/World Indicators.hyper b/test/assets/World Indicators.hyper new file mode 100644 index 000000000..b6b3b543a Binary files /dev/null and b/test/assets/World Indicators.hyper differ diff --git a/test/assets/World Indicators.tds b/test/assets/World Indicators.tds new file mode 100644 index 000000000..958127103 --- /dev/null +++ b/test/assets/World Indicators.tds @@ -0,0 +1,406 @@ + + + + + + <_.fcp.ObjectModelEncapsulateLegacy.true...ObjectModelEncapsulateLegacy /> + <_.fcp.ObjectModelTableType.true...ObjectModelTableType /> + <_.fcp.SchemaViewerObjectModel.true...SchemaViewerObjectModel /> + + + + + + + + + + + + + <_.fcp.ObjectModelEncapsulateLegacy.false...relation connection='World Indicators newleaf' name='Extract' table='[Extract].[Extract]' type='table' /> + <_.fcp.ObjectModelEncapsulateLegacy.true...relation connection='World Indicators newleaf' name='Extract' table='[Extract].[Extract]' type='table' /> + + + Country / Region + 129 + [Country / Region] + [Extract] + Country / Region + 0 + DATA$ + string + Count + 209 + false + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + Date + 135 + [Date] + [Extract] + Date + 1 + DATA$ + datetime + Year + 11 + false + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + F: Deposit interest rate (%) + 5 + [F: Deposit interest rate (%)] + [Extract] + F: Deposit interest rate (%) + 2 + DATA$ + real + Sum + 50 + true + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + F: GDP (curr $) + 5 + [F: GDP (curr $)] + [Extract] + F: GDP (curr $) + 3 + DATA$ + real + Sum + 2120 + true + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + F: GDP per capita (curr $) + 5 + [F: GDP per capita (curr $)] + [Extract] + F: GDP per capita (curr $) + 4 + DATA$ + real + Sum + 1877 + true + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + F: Lending interest rate (%) + 5 + [F: Lending interest rate (%)] + [Extract] + F: Lending interest rate (%) + 5 + DATA$ + real + Sum + 72 + true + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + H: Health exp (% GDP) + 5 + [H: Health exp (% GDP)] + [Extract] + H: Health exp (% GDP) + 6 + DATA$ + real + Sum + 22 + true + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + H: Health exp/cap (curr $) + 5 + [H: Health exp/cap (curr $)] + [Extract] + H: Health exp/cap (curr $) + 7 + DATA$ + real + Sum + 936 + true + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + H: Life exp (years) + 5 + [H: Life exp (years)] + [Extract] + H: Life exp (years) + 8 + DATA$ + real + Sum + 45 + true + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + Number of Records + 2 + [Number of Records] + [Extract] + Number of Records + 9 + integer + Sum + 1 + false + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + P: Population (count) + 5 + [P: Population (count)] + [Extract] + P: Population (count) + 10 + DATA$ + real + Sum + 2295 + false + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + Region + 129 + [Region] + [Extract] + Region + 11 + DATA$ + string + Count + 6 + false + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + Subregion + 129 + [Subregion] + [Extract] + Subregion + 12 + DATA$ + string + Count + 12 + true + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-id>[Migrated Data] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Gross Domestic Product + in current US Dollars + + + + + + + Gross Domestic Product + per capita + + + + + + + + + + + + + + + + + + <_.fcp.ObjectModelTableType.true...column caption='Migrated Data' datatype='table' name='[__tableau_internal_object_id__].[Migrated Data]' role='measure' type='quantitative' /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Region] + [Subregion] + [Country / Region] + + + <_.fcp.SchemaViewerObjectModel.false...folder name='Financial' role='measures'> + + + + + + + + + <_.fcp.SchemaViewerObjectModel.false...folder name='Health' role='measures'> + + + + + <_.fcp.SchemaViewerObjectModel.false...folder name='Population' role='measures'> + + + <_.fcp.SchemaViewerObjectModel.true...folders-common> + + + + + + + + + + + + + + + + + + + + + + + + + + + "Europe" + "Middle East" + "The Americas" + "Oceania" + "Asia" + "Africa" + + + + <_.fcp.ObjectModelEncapsulateLegacy.true...object-graph> + + + + + + + + + diff --git a/test/assets/World Indicators.tdsx b/test/assets/World Indicators.tdsx new file mode 100644 index 000000000..6e041442b Binary files /dev/null and b/test/assets/World Indicators.tdsx differ diff --git a/test/test_datasource.py b/test/test_datasource.py index 7d3ca0d61..1156069be 100644 --- a/test/test_datasource.py +++ b/test/test_datasource.py @@ -1,7 +1,10 @@ import unittest +from io import BytesIO import os import requests_mock import xml.etree.ElementTree as ET +from zipfile import ZipFile + import tableauserverclient as TSC from tableauserverclient.datetime_helpers import format_datetime from tableauserverclient.server.endpoint.exceptions import InternalServerError @@ -240,6 +243,56 @@ def test_publish(self): self.assertEqual('default', new_datasource.project_name) self.assertEqual('5de011f8-5aa9-4d5b-b991-f462c8dd6bb7', new_datasource.owner_id) + def test_publish_a_non_packaged_file_object(self): + response_xml = read_xml_asset(PUBLISH_XML) + with requests_mock.mock() as m: + m.post(self.baseurl, text=response_xml) + new_datasource = TSC.DatasourceItem('SampleDS', 'ee8c6e70-43b6-11e6-af4f-f7b0d8e20760') + publish_mode = self.server.PublishMode.CreateNew + + with open(asset('SampleDS.tds'), 'rb') as file_object: + new_datasource = self.server.datasources.publish(new_datasource, + file_object, + mode=publish_mode) + + self.assertEqual('e76a1461-3b1d-4588-bf1b-17551a879ad9', new_datasource.id) + self.assertEqual('SampleDS', new_datasource.name) + self.assertEqual('SampleDS', new_datasource.content_url) + self.assertEqual('dataengine', new_datasource.datasource_type) + self.assertEqual('2016-08-11T21:22:40Z', format_datetime(new_datasource.created_at)) + self.assertEqual('2016-08-17T23:37:08Z', format_datetime(new_datasource.updated_at)) + self.assertEqual('ee8c6e70-43b6-11e6-af4f-f7b0d8e20760', new_datasource.project_id) + self.assertEqual('default', new_datasource.project_name) + self.assertEqual('5de011f8-5aa9-4d5b-b991-f462c8dd6bb7', new_datasource.owner_id) + + def test_publish_a_packaged_file_object(self): + response_xml = read_xml_asset(PUBLISH_XML) + with requests_mock.mock() as m: + m.post(self.baseurl, text=response_xml) + new_datasource = TSC.DatasourceItem('SampleDS', 'ee8c6e70-43b6-11e6-af4f-f7b0d8e20760') + publish_mode = self.server.PublishMode.CreateNew + + # Create a dummy tdsx file in memory + with BytesIO() as zip_archive: + with ZipFile(zip_archive, 'w') as zf: + zf.write(asset('SampleDS.tds')) + + zip_archive.seek(0) + + new_datasource = self.server.datasources.publish(new_datasource, + zip_archive, + mode=publish_mode) + + self.assertEqual('e76a1461-3b1d-4588-bf1b-17551a879ad9', new_datasource.id) + self.assertEqual('SampleDS', new_datasource.name) + self.assertEqual('SampleDS', new_datasource.content_url) + self.assertEqual('dataengine', new_datasource.datasource_type) + self.assertEqual('2016-08-11T21:22:40Z', format_datetime(new_datasource.created_at)) + self.assertEqual('2016-08-17T23:37:08Z', format_datetime(new_datasource.updated_at)) + self.assertEqual('ee8c6e70-43b6-11e6-af4f-f7b0d8e20760', new_datasource.project_id) + self.assertEqual('default', new_datasource.project_name) + self.assertEqual('5de011f8-5aa9-4d5b-b991-f462c8dd6bb7', new_datasource.owner_id) + def test_publish_async(self): self.server.version = "3.0" baseurl = self.server.datasources.baseurl @@ -260,6 +313,15 @@ def test_publish_async(self): self.assertEqual('2018-06-30T00:54:54Z', format_datetime(new_job.created_at)) self.assertEqual('1', new_job.finish_code) + def test_publish_unnamed_file_object(self): + new_datasource = TSC.DatasourceItem('test') + publish_mode = self.server.PublishMode.CreateNew + + with open(asset('SampleDS.tds'), 'rb') as file_object: + self.assertRaises(ValueError, self.server.datasources.publish, + new_datasource, file_object, publish_mode + ) + def test_refresh_id(self): self.server.version = '2.8' self.baseurl = self.server.datasources.baseurl @@ -336,6 +398,29 @@ def test_publish_invalid_file_type(self): self.assertRaises(ValueError, self.server.datasources.publish, new_datasource, asset('SampleWB.twbx'), self.server.PublishMode.Append) + def test_publish_hyper_file_object_raises_exception(self): + new_datasource = TSC.DatasourceItem('test', 'ee8c6e70-43b6-11e6-af4f-f7b0d8e20760') + with open(asset('World Indicators.hyper')) as file_object: + self.assertRaises(ValueError, self.server.datasources.publish, new_datasource, + file_object, self.server.PublishMode.Append) + + def test_publish_tde_file_object_raises_exception(self): + + new_datasource = TSC.DatasourceItem('test', 'ee8c6e70-43b6-11e6-af4f-f7b0d8e20760') + tds_asset = asset(os.path.join('Data', 'Tableau Samples', 'World Indicators.tde')) + with open(tds_asset) as file_object: + self.assertRaises(ValueError, self.server.datasources.publish, new_datasource, + file_object, self.server.PublishMode.Append) + + def test_publish_file_object_of_unknown_type_raises_exception(self): + new_datasource = TSC.DatasourceItem('test', 'ee8c6e70-43b6-11e6-af4f-f7b0d8e20760') + + with BytesIO() as file_object: + file_object.write(bytes.fromhex('89504E470D0A1A0A')) + file_object.seek(0) + self.assertRaises(ValueError, self.server.datasources.publish, new_datasource, + file_object, self.server.PublishMode.Append) + def test_publish_multi_connection(self): new_datasource = TSC.DatasourceItem(name='Sample', project_id='ee8c6e70-43b6-11e6-af4f-f7b0d8e20760') connection1 = TSC.ConnectionItem() diff --git a/test/test_filesys_helpers.py b/test/test_filesys_helpers.py new file mode 100644 index 000000000..82fce8476 --- /dev/null +++ b/test/test_filesys_helpers.py @@ -0,0 +1,107 @@ +import unittest +from io import BytesIO +import os +from xml.etree import ElementTree as ET +from zipfile import ZipFile + +from tableauserverclient.filesys_helpers import get_file_object_size, get_file_type +from ._utils import asset, TEST_ASSET_DIR + + +class FilesysTests(unittest.TestCase): + + def test_get_file_size_returns_correct_size(self): + + target_size = 1000 # bytes + + with BytesIO() as f: + f.seek(target_size - 1) + f.write(b"\0") + file_size = get_file_object_size(f) + + self.assertEqual(file_size, target_size) + + def test_get_file_size_returns_zero_for_empty_file(self): + + with BytesIO() as f: + file_size = get_file_object_size(f) + + self.assertEqual(file_size, 0) + + def test_get_file_size_coincides_with_built_in_method(self): + + asset_path = asset('SampleWB.twbx') + target_size = os.path.getsize(asset_path) + with open(asset_path, 'rb') as f: + file_size = get_file_object_size(f) + + self.assertEqual(file_size, target_size) + + def test_get_file_type_identifies_a_zip_file(self): + + with BytesIO() as file_object: + with ZipFile(file_object, 'w') as zf: + with BytesIO() as stream: + stream.write('This is a zip file'.encode()) + zf.writestr('dummy_file', stream.getbuffer()) + file_object.seek(0) + file_type = get_file_type(file_object) + + self.assertEqual(file_type, 'zip') + + def test_get_file_type_identifies_tdsx_as_zip_file(self): + with open(asset('World Indicators.tdsx'), 'rb') as file_object: + file_type = get_file_type(file_object) + self.assertEqual(file_type, 'zip') + + def test_get_file_type_identifies_twbx_as_zip_file(self): + with open(asset('SampleWB.twbx'), 'rb') as file_object: + file_type = get_file_type(file_object) + self.assertEqual(file_type, 'zip') + + def test_get_file_type_identifies_xml_file(self): + + root = ET.Element('root') + child = ET.SubElement(root, 'child') + child.text = "This is a child element" + etree = ET.ElementTree(root) + + with BytesIO() as file_object: + etree.write(file_object, encoding='utf-8', + xml_declaration=True) + + file_object.seek(0) + file_type = get_file_type(file_object) + + self.assertEqual(file_type, 'xml') + + def test_get_file_type_identifies_tds_as_xml_file(self): + with open(asset('World Indicators.tds'), 'rb') as file_object: + file_type = get_file_type(file_object) + self.assertEqual(file_type, 'xml') + + def test_get_file_type_identifies_twb_as_xml_file(self): + with open(asset('RESTAPISample.twb'), 'rb') as file_object: + file_type = get_file_type(file_object) + self.assertEqual(file_type, 'xml') + + def test_get_file_type_identifies_hyper_file(self): + with open(asset('World Indicators.hyper'), 'rb') as file_object: + file_type = get_file_type(file_object) + self.assertEqual(file_type, 'hyper') + + def test_get_file_type_identifies_tde_file(self): + asset_path = os.path.join(TEST_ASSET_DIR, 'Data', 'Tableau Samples', 'World Indicators.tde') + with open(asset_path, 'rb') as file_object: + file_type = get_file_type(file_object) + self.assertEqual(file_type, 'tde') + + def test_get_file_type_handles_unknown_file_type(self): + + # Create a dummy png file + with BytesIO() as file_object: + png_signature = bytes.fromhex("89504E470D0A1A0A") + file_object.write(png_signature) + file_object.seek(0) + + self.assertRaises(ValueError, get_file_type, file_object) diff --git a/test/test_workbook.py b/test/test_workbook.py index 2613a56d6..f14e4d96f 100644 --- a/test/test_workbook.py +++ b/test/test_workbook.py @@ -1,4 +1,5 @@ import unittest +from io import BytesIO import os import re import requests_mock @@ -583,12 +584,20 @@ def test_publish_invalid_file_type(self): def test_publish_unnamed_file_object(self): new_workbook = TSC.WorkbookItem('test') - with open(os.path.join(TEST_ASSET_DIR, 'SampleDS.tds')) as f: + with open(os.path.join(TEST_ASSET_DIR, 'SampleWB.twbx')) as f: self.assertRaises(ValueError, self.server.workbooks.publish, new_workbook, f, self.server.PublishMode.CreateNew ) + def test_publish_file_object_of_unknown_type_raises_exception(self): + new_workbook = TSC.WorkbookItem('test') + with BytesIO() as file_object: + file_object.write(bytes.fromhex('89504E470D0A1A0A')) + file_object.seek(0) + self.assertRaises(ValueError, self.server.workbooks.publish, new_workbook, + file_object, self.server.PublishMode.CreateNew) + def test_publish_multi_connection(self): new_workbook = TSC.WorkbookItem(name='Sample', show_tabs=False, project_id='ee8c6e70-43b6-11e6-af4f-f7b0d8e20760')