Skip to content

Commit c0c09d4

Browse files
author
Jesse Whitehouse
committed
Restrict local file operations to descendents of uploads_base_path
Fix lifecycle e2e test so it honours these requirements Signed-off-by: Jesse Whitehouse <[email protected]>
1 parent 36885a4 commit c0c09d4

File tree

2 files changed

+44
-12
lines changed

2 files changed

+44
-12
lines changed

src/databricks/sql/client.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pyarrow
55
import requests
66
import json
7+
import os
78

89
from databricks.sql import __version__
910
from databricks.sql import *
@@ -301,15 +302,23 @@ def _check_not_closed(self):
301302

302303
def _handle_staging_operation(self, uploads_base_path: str):
303304
"""Fetch the HTTP request instruction from a staging ingestion command
304-
and call the designated handler."""
305+
and call the designated handler.
306+
307+
Raise an exception if localFile is specified by the server but the localFile
308+
is not descended from uploads_base_path.
309+
"""
305310

306311
if uploads_base_path is None:
307312
raise Error(
308313
"You must provide an uploads_base_path when initialising a connection to perform ingestion commands"
309314
)
310-
315+
311316
row = self.active_result_set.fetchone()
312317

318+
if getattr(row, "localFile", None):
319+
if os.path.commonpath([row.localFile, uploads_base_path]) != uploads_base_path:
320+
raise Error("Local file operations are restricted to paths within the configured uploads_base_path")
321+
313322
# TODO: Experiment with DBR sending real headers.
314323
# The specification says headers will be in JSON format but the current null value is actually an empty list []
315324
handler_args = {

tests/e2e/driver_tests.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -667,29 +667,31 @@ def test_staging_ingestion_life_cycle(self):
667667
query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE"
668668
cursor.execute(query)
669669

670-
# GET should succeed
670+
# GET should succeed
671671

672-
new_fh, new_temp_path = tempfile.mkstemp()
672+
new_fh, new_temp_path = tempfile.mkstemp()
673673

674+
with self.connection(extra_params={"uploads_base_path": new_temp_path}) as conn:
674675
cursor = conn.cursor()
675676
query = f"GET 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'"
676677
cursor.execute(query)
677678

678-
with open(new_fh, "rb") as fp:
679-
fetched_text = fp.read()
679+
with open(new_fh, "rb") as fp:
680+
fetched_text = fp.read()
680681

681-
assert fetched_text == original_text
682+
assert fetched_text == original_text
682683

683-
# REMOVE should succeed
684+
# REMOVE should succeed
684685

685-
remove_query = (
686-
f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv'"
687-
)
686+
remove_query = (
687+
f"REMOVE 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv'"
688+
)
688689

690+
with self.connection(extra_params={"uploads_base_path": "/"}) as conn:
689691
cursor = conn.cursor()
690692
cursor.execute(remove_query)
691693

692-
# GET after REMOVE should fail
694+
# GET after REMOVE should fail
693695

694696
with pytest.raises(Error):
695697
cursor = conn.cursor()
@@ -718,6 +720,27 @@ def test_staging_ingestion_put_fails_without_uploadsbasepath(self):
718720
query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE"
719721
cursor.execute(query)
720722

723+
def test_staging_ingestion_put_fails_if_localFile_not_in_uploads_base_path(self):
724+
725+
726+
fh, temp_path = tempfile.mkstemp()
727+
728+
original_text = "hello world!".encode("utf-8")
729+
730+
with open(fh, "wb") as fp:
731+
fp.write(original_text)
732+
733+
base_path, filename = os.path.split(temp_path)
734+
735+
# Add junk to base_path
736+
base_path = os.path.join(base_path, "temp")
737+
738+
with pytest.raises(Error):
739+
with self.connection(extra_params={"uploads_base_path": base_path}) as conn:
740+
cursor = conn.cursor()
741+
query = f"PUT '{temp_path}' INTO 'stage://tmp/{self.staging_ingestion_user}/tmp/11/15/file1.csv' OVERWRITE"
742+
cursor.execute(query)
743+
721744

722745
def main(cli_args):
723746
global get_args_from_env

0 commit comments

Comments
 (0)