Adding and modifying tests

mohitsingla-db · mohitsingla-db · commit f3cdf24e86ce · 2022-09-27T23:54:52.000+02:00
Signed-off-by: Mohit Singla &lt;mohit.singla@databricks.com&gt;
diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py
@@ -49,11 +49,13 @@ def test_query_with_large_wide_result_set(self):
         # This is used by PyHive tests to determine the buffer size
         self.arraysize = 1000
         with self.cursor() as cursor:
-            uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)])
-            cursor.execute("SELECT id, {uuids} FROM RANGE({rows})".format(uuids=uuids, rows=rows))
-            for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)):
-                self.assertEqual(row[0], row_id)  # Verify no rows are dropped in the middle.
-                self.assertEqual(len(row[1]), 36)
+            for lz4_compression in [False, True]:
+                cursor.setLZ4Compression(lz4_compression)
+                uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)])
+                cursor.execute("SELECT id, {uuids} FROM RANGE({rows})".format(uuids=uuids, rows=rows))
+                for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)):
+                    self.assertEqual(row[0], row_id)  # Verify no rows are dropped in the middle.
+                    self.assertEqual(len(row[1]), 36)
 
     def test_query_with_large_narrow_result_set(self):
         resultSize = 300 * 1000 * 1000  # 300 MB
@@ -65,9 +67,11 @@ def test_query_with_large_narrow_result_set(self):
         # This is used by PyHive tests to determine the buffer size
         self.arraysize = 10000000
         with self.cursor() as cursor:
-            cursor.execute("SELECT * FROM RANGE({rows})".format(rows=rows))
-            for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)):
-                self.assertEqual(row[0], row_id)
+            for lz4_compression in [False, True]:
+                cursor.setLZ4Compression(lz4_compression)
+                cursor.execute("SELECT * FROM RANGE({rows})".format(rows=rows))
+                for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)):
+                    self.assertEqual(row[0], row_id)
 
     def test_long_running_query(self):
         """ Incrementally increase query size until it takes at least 5 minutes,
@@ -80,21 +84,23 @@ def test_long_running_query(self):
         scale0 = 10000
         scale_factor = 1
         with self.cursor() as cursor:
-            while duration < min_duration:
-                self.assertLess(scale_factor, 512, msg="Detected infinite loop")
-                start = time.time()
+            for lz4_compression in [False, True]:
+                cursor.setLZ4Compression(lz4_compression)
+                while duration < min_duration:
+                    self.assertLess(scale_factor, 512, msg="Detected infinite loop")
+                    start = time.time()
 
-                cursor.execute("""SELECT count(*)
-                         FROM RANGE({scale}) x
-                         JOIN RANGE({scale0}) y
-                         ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" 
-                         """.format(scale=scale_factor * scale0, scale0=scale0))
+                    cursor.execute("""SELECT count(*)
+                            FROM RANGE({scale}) x
+                            JOIN RANGE({scale0}) y
+                            ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" 
+                            """.format(scale=scale_factor * scale0, scale0=scale0))
 
-                n, = cursor.fetchone()
-                self.assertEqual(n, 0)
+                    n, = cursor.fetchone()
+                    self.assertEqual(n, 0)
 
-                duration = time.time() - start
-                current_fraction = duration / min_duration
-                print('Took {} s with scale factor={}'.format(duration, scale_factor))
-                # Extrapolate linearly to reach 5 min and add 50% padding to push over the limit
-                scale_factor = math.ceil(1.5 * scale_factor / current_fraction)
+                    duration = time.time() - start
+                    current_fraction = duration / min_duration
+                    print('Took {} s with scale factor={}'.format(duration, scale_factor))
+                    # Extrapolate linearly to reach 5 min and add 50% padding to push over the limit
+                    scale_factor = math.ceil(1.5 * scale_factor / current_fraction)
diff --git a/tests/e2e/driver_tests.py b/tests/e2e/driver_tests.py
@@ -510,6 +510,20 @@ def test_timezone_with_timestamp(self):
                 self.assertEqual(arrow_result_table.field(0).type, ts_type)
                 self.assertEqual(arrow_result_value, expected.timestamp() * 1000000)
 
+    @skipUnless(pysql_supports_arrow(), 'arrow test needs arrow support')
+    def test_can_flip_compression(self):
+        with self.cursor() as cursor:
+            cursor.execute("SELECT array(1,2,3,4)")
+            cursor.fetchall()
+            lz4_compressed = cursor.active_result_set.lz4_compressed
+            #The endpoint should support compression
+            self.assertEqual(lz4_compressed, True)
+            cursor.setLZ4Compression(False)
+            cursor.execute("SELECT array(1,2,3,4)")
+            cursor.fetchall()
+            lz4_compressed = cursor.active_result_set.lz4_compressed
+            self.assertEqual(lz4_compressed, False)
+
     def _should_have_native_complex_types(self):
         return pysql_has_version(">=", 2) and is_thrift_v5_plus(self.arguments)
 
diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py
@@ -309,6 +309,29 @@ def test_handle_execute_response_checks_operation_state_in_direct_results(self):
                     thrift_backend._handle_execute_response(t_execute_resp, Mock())
                 self.assertIn("some information about the error", str(cm.exception))
 
+    def test_handle_execute_response_sets_compression_in_direct_results(self):
+        for resp_type in self.execute_response_types:
+            lz4Compressed=Mock()
+            resultSet=MagicMock()
+            resultSet.results.startRowOffset = 0
+            t_execute_resp = resp_type(
+                status=Mock(),
+                operationHandle=Mock(),
+                directResults=ttypes.TSparkDirectResults(
+                    operationStatus= Mock(),
+                    resultSetMetadata=ttypes.TGetResultSetMetadataResp(
+                        status=self.okay_status,
+                        resultFormat=ttypes.TSparkRowSetType.ARROW_BASED_SET,
+                        schema=MagicMock(),
+                        arrowSchema=MagicMock(),
+                        lz4Compressed=lz4Compressed),
+                    resultSet=resultSet,
+                    closeOperation=None))
+            thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider())
+
+            execute_response = thrift_backend._handle_execute_response(t_execute_resp, Mock())
+            self.assertEqual(execute_response.lz4_compressed, lz4Compressed)
+
     @patch("databricks.sql.thrift_backend.TCLIService.Client")
     def test_handle_execute_response_checks_operation_state_in_polls(self, tcli_service_class):
         tcli_service_instance = tcli_service_class.return_value