From 1128752b28835be04b4031590e63673316ce2317 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 27 Sep 2022 14:39:07 -0500 Subject: [PATCH 01/10] Copied in examples from public documentation Signed-off-by: Jesse Whitehouse --- examples/insert_data.py | 21 +++++++++++++++++++++ examples/query_execute.py | 13 +++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 examples/insert_data.py create mode 100644 examples/query_execute.py diff --git a/examples/insert_data.py b/examples/insert_data.py new file mode 100644 index 000000000..511986aa5 --- /dev/null +++ b/examples/insert_data.py @@ -0,0 +1,21 @@ +from databricks import sql +import os + +with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), + http_path = os.getenv("DATABRICKS_HTTP_PATH"), + access_token = os.getenv("DATABRICKS_TOKEN")) as connection: + + with connection.cursor() as cursor: + cursor.execute("CREATE TABLE IF NOT EXISTS squares (x int, x_squared int)") + + squares = [(i, i * i) for i in range(100)] + values = ",".join([f"({x}, {y})" for (x, y) in squares]) + + cursor.execute(f"INSERT INTO squares VALUES {values}") + + cursor.execute("SELECT * FROM squares LIMIT 10") + + result = cursor.fetchall() + + for row in result: + print(row) \ No newline at end of file diff --git a/examples/query_execute.py b/examples/query_execute.py new file mode 100644 index 000000000..ec79fd0e6 --- /dev/null +++ b/examples/query_execute.py @@ -0,0 +1,13 @@ +from databricks import sql +import os + +with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), + http_path = os.getenv("DATABRICKS_HTTP_PATH"), + access_token = os.getenv("DATABRICKS_TOKEN")) as connection: + + with connection.cursor() as cursor: + cursor.execute("SELECT * FROM default.diamonds LIMIT 2") + result = cursor.fetchall() + + for row in result: + print(row) \ No newline at end of file From 624157e726c06b04d41351536964ffdc3b498c7b Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 27 Sep 2022 14:39:34 -0500 Subject: [PATCH 02/10] Adapted the unit test tests/e2e/driver_tests::PySQLCoreTestSuite::test_cancel_during_execute into an example script. Signed-off-by: Jesse Whitehouse --- examples/query_cancel.py | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 examples/query_cancel.py diff --git a/examples/query_cancel.py b/examples/query_cancel.py new file mode 100644 index 000000000..041d42f19 --- /dev/null +++ b/examples/query_cancel.py @@ -0,0 +1,49 @@ +from databricks import sql +import os, threading, time + +# The current operation of a cursor may be cancelled by calling its `.cancel()` method as shown in the example below. + +with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), + http_path = os.getenv("DATABRICKS_HTTP_PATH"), + access_token = os.getenv("DATABRICKS_TOKEN")) as connection: + + with connection.cursor() as cursor: + def execute_really_long_query(): + try: + cursor.execute("SELECT SUM(A.id - B.id) " + + "FROM range(1000000000) A CROSS JOIN range(100000000) B " + + "GROUP BY (A.id - B.id)") + except sql.exc.RequestError: + print("It looks like this query was cancelled.") + + exec_thread = threading.Thread(target=execute_really_long_query) + + print("\n Beginning to execute long query") + exec_thread.start() + + # Make sure the query has started before cancelling + print("\n Waiting 15 seconds before canceling", end="", flush=True) + + seconds_waited = 0 + while seconds_waited < 15: + seconds_waited += 1 + print(".", end="", flush=True) + time.sleep(1) + + print("\n Cancelling the cursor's operation. This can take a few seconds.") + cursor.cancel() + + print("\n Now checking the cursor status:") + exec_thread.join(5) + + assert not exec_thread.is_alive() + print("\n The previous command was successfully canceled") + + print("\n Now reusing the cursor to run a separate query.") + + # We can still execute a new command on the cursor + cursor.execute("SELECT * FROM range(3)") + + print("\n Execution was successful. Results appear below:") + + print(cursor.fetchall()) From 0a747f553bec4c9280751efd8ddbbb60a873509e Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 08:26:15 -0500 Subject: [PATCH 03/10] Add oauth examples Signed-off-by: Jesse Whitehouse --- examples/interactive_oauth.py | 41 +++++++++++++++++++++ examples/persistent_oauth.py | 69 +++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 examples/interactive_oauth.py create mode 100644 examples/persistent_oauth.py diff --git a/examples/interactive_oauth.py b/examples/interactive_oauth.py new file mode 100644 index 000000000..1969732a2 --- /dev/null +++ b/examples/interactive_oauth.py @@ -0,0 +1,41 @@ +from databricks import sql +import os + +"""Bring Your Own Identity Provider with fined grained OAuth scopes is currently public preview on +Databricks in AWS. databricks-sql-connector supports user to machine OAuth login which means the +end user has to be present to login in a browser which will be popped up by the Python process. You +must enable OAuth in your Databricks account to run this example. More information on how to enable +OAuth in your Databricks Account in AWS can be found here: + +https://docs.databricks.com/administration-guide/account-settings-e2/single-sign-on.html + +Pre-requisites: +- You have a Databricks account in AWS. +- You have configured OAuth in Databricks account in AWS using the link above. +- You have installed a browser (Chrome, Firefox, Safari, Internet Explorer, etc) that will be + accessible on the machine for performing OAuth login. + +This code does not persist the auth token. Hence after the Python process terminates the +end user will have to login again. See examples/persistent_oauth.py to learn about persisting the +token across script executions. + +Bring Your Own Identity Provider is in public preview. You can monitor these two links to find out +when it will become generally available: + + 1. https://docs.databricks.com/administration-guide/account-settings-e2/single-sign-on.html + 2. https://docs.databricks.com/dev-tools/python-sql-connector.html +""" + +with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), + http_path = os.getenv("DATABRICKS_HTTP_PATH"), + auth_type="databricks-oauth") as connection: + + for x in range(1, 100): + cursor = connection.cursor() + cursor.execute('SELECT 1+1') + result = cursor.fetchall() + for row in result: + print(row) + cursor.close() + + connection.close() diff --git a/examples/persistent_oauth.py b/examples/persistent_oauth.py new file mode 100644 index 000000000..0aabcaf17 --- /dev/null +++ b/examples/persistent_oauth.py @@ -0,0 +1,69 @@ +"""Bring Your Own Identity Provider with fined grained OAuth scopes is currently public preview on +Databricks in AWS. databricks-sql-connector supports user to machine OAuth login which means the +end user has to be present to login in a browser which will be popped up by the Python process. You +must enable OAuth in your Databricks account to run this example. More information on how to enable +OAuth in your Databricks Account in AWS can be found here: + +https://docs.databricks.com/administration-guide/account-settings-e2/single-sign-on.html + +Pre-requisites: +- You have a Databricks account in AWS. +- You have configured OAuth in Databricks account in AWS using the link above. +- You have installed a browser (Chrome, Firefox, Safari, Internet Explorer, etc) that will be + accessible on the machine for performing OAuth login. + +For security, databricks-sql-connector does not persist OAuth tokens automatically. Hence, after +the Python process terminates the end user will have to log-in again. We provide APIs to be +implemented by the end user for persisting the OAuth token. The SampleOAuthPersistence reference +shows which methods you may implement. + +For this example, the DevOnlyFilePersistence class is provided. Do not use this in production. + +Bring Your Own Identity Provider is in public preview. You can monitor these two links to find out +when it will become generally available: + + 1. https://docs.databricks.com/administration-guide/account-settings-e2/single-sign-on.html + 2. https://docs.databricks.com/dev-tools/python-sql-connector.html +""" + +import os +from typing import Optional + +from databricks import sql +from databricks.sql.experimental.oauth_persistence import OAuthPersistence, OAuthToken, DevOnlyFilePersistence + + +class SampleOAuthPersistence(OAuthPersistence): + def persist(self, hostname: str, oauth_token: OAuthToken): + """To be implemented by the end user to persist in the preferred storage medium. + + OAuthToken has two properties: + 1. OAuthToken.access_token + 2. OAuthToken.refresh_token + + Both should be persisted. + """ + pass + + def read(self, hostname: str) -> Optional[OAuthToken]: + """To be implemented by the end user to fetch token from the preferred storage + + Fetch the access_token and refresh_token for the given hostname. + Return OAuthToken(access_token, refresh_token) + """ + pass + +with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), + http_path = os.getenv("DATABRICKS_HTTP_PATH"), + auth_type="databricks-oauth", + experimental_oauth_persistence=DevOnlyFilePersistence("./sample.json")) as connection: + + for x in range(1, 100): + cursor = connection.cursor() + cursor.execute('SELECT 1+1') + result = cursor.fetchall() + for row in result: + print(row) + cursor.close() + + connection.close() From 92345083fd92290f7486512407be0814a2c9cf56 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 09:16:30 -0500 Subject: [PATCH 04/10] Add README for examples. Signed-off-by: Jesse Whitehouse --- examples/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 examples/README.md diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 000000000..1a9231f77 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,15 @@ +# `databricks-sql-connector` Example Usage + +We provide example scripts so you can see the connector in action for basic usage. You need a Databricks account to run them. The scripts expect to find your Databricks account credentials in these environment variables: + + - DATABRICKS_SERVER_HOSTNAME + - DATABRICKS_HTTP_PATH + - DATABRICKS_TOKEN + +# Table of Contents + +- **`query_execute.py`** connects to the `samples` database of your default catalog, runs a small query, and prints the result to screen. +- **`insert_data.py`** adds a tables called `squares` to your default catalog and inserts one hundred rows of example data. Then it fetches this data and prints it to the screen. +- **`query_cancel.py`** shows how to cancel a query assuming that you can access the `Cursor` executing that query from a different thread. This is necessary because `databricks-sql-connector` does not yet implement an asynchronous API; calling `.execute()` blocks the current thread until execution completes. Therefore, the connector can't cancel queries from the same thread where they began. +- **`interactive_oauth.py`** shows the simplest example of authenticating by OAuth (no need for a PAT generated in the DBSQL UI). When you run the script it will open a browser window so you can authenticate. Afterward, the script fetches some sample data from Databricks and prints it to the screen. For this script, the OAuth token is not persisted which means you need to authenticate every time you run the script. +- **`persistent_oauth.py`** shows a more advanced example of authenticating by OAuth. In this case, it shows how to use a sublcass of `OAuthPersistence` to reuse an OAuth token across script executions. \ No newline at end of file From f90b7fb9f754c730de17c5aa1b5c41bdd600a23d Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 13:45:05 -0500 Subject: [PATCH 05/10] Add user_agent set example Signed-off-by: Jesse Whitehouse --- examples/set_user_agent.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 examples/set_user_agent.py diff --git a/examples/set_user_agent.py b/examples/set_user_agent.py new file mode 100644 index 000000000..449692cf6 --- /dev/null +++ b/examples/set_user_agent.py @@ -0,0 +1,14 @@ +from databricks import sql +import os + +with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), + http_path = os.getenv("DATABRICKS_HTTP_PATH"), + access_token = os.getenv("DATABRICKS_TOKEN"), + _user_agent_entry="ExamplePartnerTag") as connection: + + with connection.cursor() as cursor: + cursor.execute("SELECT * FROM default.diamonds LIMIT 2") + result = cursor.fetchall() + + for row in result: + print(row) From b81a2222c40427be5c233555765090e0307f25f4 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 13:46:53 -0500 Subject: [PATCH 06/10] Add user agent entry to README Signed-off-by: Jesse Whitehouse --- examples/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 1a9231f77..624f605d6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -12,4 +12,6 @@ We provide example scripts so you can see the connector in action for basic usag - **`insert_data.py`** adds a tables called `squares` to your default catalog and inserts one hundred rows of example data. Then it fetches this data and prints it to the screen. - **`query_cancel.py`** shows how to cancel a query assuming that you can access the `Cursor` executing that query from a different thread. This is necessary because `databricks-sql-connector` does not yet implement an asynchronous API; calling `.execute()` blocks the current thread until execution completes. Therefore, the connector can't cancel queries from the same thread where they began. - **`interactive_oauth.py`** shows the simplest example of authenticating by OAuth (no need for a PAT generated in the DBSQL UI). When you run the script it will open a browser window so you can authenticate. Afterward, the script fetches some sample data from Databricks and prints it to the screen. For this script, the OAuth token is not persisted which means you need to authenticate every time you run the script. -- **`persistent_oauth.py`** shows a more advanced example of authenticating by OAuth. In this case, it shows how to use a sublcass of `OAuthPersistence` to reuse an OAuth token across script executions. \ No newline at end of file +- **`persistent_oauth.py`** shows a more advanced example of authenticating by OAuth. In this case, it shows how to use a sublcass of `OAuthPersistence` to reuse an OAuth token across script executions. +- **`set_user_agent.py`** shows how to customize the user agent header used for Thrift commands. In +this example the string `ExamplePartnerTag` will be added to the the user agent on every request. \ No newline at end of file From abc5882bc6220701b1b19ae26456d0597ff4d893 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 13:47:01 -0500 Subject: [PATCH 07/10] Format fix for query cancel Signed-off-by: Jesse Whitehouse --- examples/query_cancel.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/query_cancel.py b/examples/query_cancel.py index 041d42f19..59202088a 100644 --- a/examples/query_cancel.py +++ b/examples/query_cancel.py @@ -1,7 +1,9 @@ from databricks import sql import os, threading, time -# The current operation of a cursor may be cancelled by calling its `.cancel()` method as shown in the example below. +""" +The current operation of a cursor may be cancelled by calling its `.cancel()` method as shown in the example below. +""" with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"), http_path = os.getenv("DATABRICKS_HTTP_PATH"), From af55a7bef97da7eb77481865cd4555766b702404 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 13:50:47 -0500 Subject: [PATCH 08/10] Add more detail to examples/README.md Signed-off-by: Jesse Whitehouse --- examples/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/README.md b/examples/README.md index 624f605d6..8f786eb6d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -6,6 +6,9 @@ We provide example scripts so you can see the connector in action for basic usag - DATABRICKS_HTTP_PATH - DATABRICKS_TOKEN +Follow the quick start in our [README](../README.md) to install `databricks-sql-connector` and see +how to find the hostname, http path, and access token. Note that for the OAuth examples below a +personal access token is not needed. # Table of Contents - **`query_execute.py`** connects to the `samples` database of your default catalog, runs a small query, and prints the result to screen. From b96ea5781b1aeadb0fed94149b14cd7653a10211 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 13:58:18 -0500 Subject: [PATCH 09/10] Add segment about how to run examples Signed-off-by: Jesse Whitehouse --- examples/README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/examples/README.md b/examples/README.md index 8f786eb6d..2390af3d2 100644 --- a/examples/README.md +++ b/examples/README.md @@ -9,6 +9,24 @@ We provide example scripts so you can see the connector in action for basic usag Follow the quick start in our [README](../README.md) to install `databricks-sql-connector` and see how to find the hostname, http path, and access token. Note that for the OAuth examples below a personal access token is not needed. + + +## How to run an example script + +To run all of these examples you can clone the entire repository to your disk. Or you can use `curl` to fetch an individual script. + +### Clone the repo +1. Clone this repository to your local system +2. Follow the quick start in the [README](../README.md) to install the connector and obtain authentication credentials. +3. `cd examples/` +4. Then run any script using the `python` CLI. For example `python query_execute.py` + +### Fetch with `curl` + +1. Follow the quick start in the [README](../README.md) to install the connector and obtain authentication credentials. +2. Use the GitHub UI to find the URL to the **Raw** version of one of these examples. For example: `https://raw.githubusercontent.com/databricks/databricks-sql-python/main/examples/query_execute.py` +3. `curl` this URL to your local file-system: `curl https://raw.githubusercontent.com/databricks/databricks-sql-python/main/examples/query_execute.py > query_execute.py` +4. Then run the script with the `python` CLI. `python query_execute.py` # Table of Contents - **`query_execute.py`** connects to the `samples` database of your default catalog, runs a small query, and prints the result to screen. From f8a81e8605448c48f65e769da79f8086d4d3d82b Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 30 Sep 2022 16:43:49 -0500 Subject: [PATCH 10/10] Add clarifying wording that OAuth is experimental with links to see when it becomes GA Signed-off-by: Jesse Whitehouse --- examples/README.md | 4 ++-- examples/interactive_oauth.py | 4 ++-- examples/persistent_oauth.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/README.md b/examples/README.md index 2390af3d2..74446adeb 100644 --- a/examples/README.md +++ b/examples/README.md @@ -32,7 +32,7 @@ To run all of these examples you can clone the entire repository to your disk. O - **`query_execute.py`** connects to the `samples` database of your default catalog, runs a small query, and prints the result to screen. - **`insert_data.py`** adds a tables called `squares` to your default catalog and inserts one hundred rows of example data. Then it fetches this data and prints it to the screen. - **`query_cancel.py`** shows how to cancel a query assuming that you can access the `Cursor` executing that query from a different thread. This is necessary because `databricks-sql-connector` does not yet implement an asynchronous API; calling `.execute()` blocks the current thread until execution completes. Therefore, the connector can't cancel queries from the same thread where they began. -- **`interactive_oauth.py`** shows the simplest example of authenticating by OAuth (no need for a PAT generated in the DBSQL UI). When you run the script it will open a browser window so you can authenticate. Afterward, the script fetches some sample data from Databricks and prints it to the screen. For this script, the OAuth token is not persisted which means you need to authenticate every time you run the script. -- **`persistent_oauth.py`** shows a more advanced example of authenticating by OAuth. In this case, it shows how to use a sublcass of `OAuthPersistence` to reuse an OAuth token across script executions. +- **`interactive_oauth.py`** shows the simplest example of authenticating by OAuth (no need for a PAT generated in the DBSQL UI) while Bring Your Own IDP is in public preview. When you run the script it will open a browser window so you can authenticate. Afterward, the script fetches some sample data from Databricks and prints it to the screen. For this script, the OAuth token is not persisted which means you need to authenticate every time you run the script. +- **`persistent_oauth.py`** shows a more advanced example of authenticating by OAuth while Bring Your Own IDP is in public preview. In this case, it shows how to use a sublcass of `OAuthPersistence` to reuse an OAuth token across script executions. - **`set_user_agent.py`** shows how to customize the user agent header used for Thrift commands. In this example the string `ExamplePartnerTag` will be added to the the user agent on every request. \ No newline at end of file diff --git a/examples/interactive_oauth.py b/examples/interactive_oauth.py index 1969732a2..c520d96a5 100644 --- a/examples/interactive_oauth.py +++ b/examples/interactive_oauth.py @@ -19,8 +19,8 @@ end user will have to login again. See examples/persistent_oauth.py to learn about persisting the token across script executions. -Bring Your Own Identity Provider is in public preview. You can monitor these two links to find out -when it will become generally available: +Bring Your Own Identity Provider is in public preview. The API may change prior to becoming GA. +You can monitor these two links to find out when it will become generally available: 1. https://docs.databricks.com/administration-guide/account-settings-e2/single-sign-on.html 2. https://docs.databricks.com/dev-tools/python-sql-connector.html diff --git a/examples/persistent_oauth.py b/examples/persistent_oauth.py index 0aabcaf17..b5b14d155 100644 --- a/examples/persistent_oauth.py +++ b/examples/persistent_oauth.py @@ -19,8 +19,8 @@ For this example, the DevOnlyFilePersistence class is provided. Do not use this in production. -Bring Your Own Identity Provider is in public preview. You can monitor these two links to find out -when it will become generally available: +Bring Your Own Identity Provider is in public preview. The API may change prior to becoming GA. +You can monitor these two links to find out when it will become generally available: 1. https://docs.databricks.com/administration-guide/account-settings-e2/single-sign-on.html 2. https://docs.databricks.com/dev-tools/python-sql-connector.html