Added variant to sqlalchemy_example and added a test for literal_processor for variant

msrathore-db · msrathore-db · commit ed7cd9432b59 · 2025-09-05T12:21:11.000+05:30
diff --git a/sqlalchemy_example.py b/sqlalchemy_example.py
@@ -17,11 +17,12 @@
 from datetime import date, datetime, time, timedelta, timezone
 from decimal import Decimal
 from uuid import UUID
+import json
 
 # By convention, backend-specific SQLA types are defined in uppercase
-# This dialect exposes Databricks SQL's TIMESTAMP and TINYINT types
+# This dialect exposes Databricks SQL's TIMESTAMP, TINYINT, and VARIANT types
 # as these are not covered by the generic, camelcase types shown below
-from databricks.sqlalchemy import TIMESTAMP, TINYINT
+from databricks.sqlalchemy import TIMESTAMP, TINYINT, DatabricksVariant
 
 # Beside the CamelCase types shown below, line comments reflect
 # the underlying Databricks SQL / Delta table type
@@ -82,6 +83,12 @@ class SampleObject(Base):
 	datetime_col_ntz = Column(DateTime)
 	time_col = Column(Time)
 	uuid_col = Column(Uuid)
+	variant_col = Column(DatabricksVariant)
+
+Base.metadata.drop_all(engine)
+
+# Output SQL is:
+# DROP TABLE pysql_sqlalchemy_example_table
 
 # This generates a CREATE TABLE statement against the catalog and schema
 # specified in the connection string
@@ -100,6 +107,7 @@ class SampleObject(Base):
 #         datetime_col_ntz TIMESTAMP_NTZ,
 #         time_col STRING,
 #         uuid_col STRING,
+#         variant_col VARIANT,
 #         PRIMARY KEY (bigint_col)
 # ) USING DELTA
 
@@ -120,6 +128,23 @@ class SampleObject(Base):
 	"datetime_col_ntz": datetime(1990, 12, 4, 6, 33, 41),
 	"time_col": time(23, 59, 59),
 	"uuid_col": UUID(int=255),
+	"variant_col": {
+		"name": "John Doe",
+		"age": 30,
+		"address": {
+			"street": "123 Main St",
+			"city": "San Francisco",
+			"state": "CA",
+			"zip": "94105"
+		},
+		"hobbies": ["reading", "hiking", "cooking"],
+		"is_active": True,
+		"metadata": {
+			"created_at": "2024-01-15T10:30:00Z",
+			"version": 1.2,
+			"tags": ["premium", "verified"]
+		}
+	},
 }
 sa_obj = SampleObject(**sample_object)
 
@@ -140,7 +165,8 @@ class SampleObject(Base):
 #     datetime_col,
 #     datetime_col_ntz,
 #     time_col,
-#     uuid_col
+#     uuid_col,
+#     variant_col
 #   )
 # VALUES
 #   (
@@ -154,7 +180,8 @@ class SampleObject(Base):
 #     :datetime_col,
 #     :datetime_col_ntz,
 #     :time_col,
-#     :uuid_col
+#     :uuid_col,
+#     PARSE_JSON(:variant_col)
 #   )
 
 # Here we build a SELECT query using ORM
@@ -165,6 +192,7 @@ class SampleObject(Base):
 
 # Finally, we read out the input data and compare it to the output
 compare = {key: getattr(result, key) for key in sample_object.keys()}
+compare['variant_col'] = json.loads(compare['variant_col'])
 assert compare == sample_object
 
 # Then we drop the demonstration table
diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py
@@ -446,7 +446,7 @@ def process(value):
             except (TypeError, ValueError) as e:
                 raise ValueError(f"Cannot serialize value {value} to JSON: {e}")
             
-        return f"PARSE_JSON('{process}')"
+        return process
 
 @compiles(DatabricksVariant, "databricks")
 def compile_variant(type_, compiler, **kw):
diff --git a/tests/test_local/e2e/test_complex_types.py b/tests/test_local/e2e/test_complex_types.py
@@ -257,7 +257,7 @@ def test_insert_variant_table_sqlalchemy(self):
                 if compare[key] is not None:
                     compare[key] = json.loads(compare[key])
 
-            assert self._recursive_compare(compare, sample_data)
+            assert compare == sample_data
 
     def test_variant_table_creation_pandas(self):
         table, sample_data = self.sample_variant_table()
@@ -280,4 +280,39 @@ def test_variant_table_creation_pandas(self):
             for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']:
                 if result_dict[key] is not None:
                     result_dict[key] = json.loads(result_dict[key])
-            assert self._recursive_compare(result_dict, sample_data)
+
+            assert result_dict == sample_data
+
+    def test_variant_literal_processor(self):
+        table, sample_data = self.sample_variant_table()
+
+        with self.table_context(table) as engine:
+            stmt = table.__table__.insert().values(**sample_data)
+
+            try:
+                compiled = stmt.compile(
+                    dialect=engine.dialect,
+                    compile_kwargs={"literal_binds": True}
+                )
+                sql_str = str(compiled)
+
+                # Assert that JSON actually got inlined
+                assert '{"key":"value","number":42}' in sql_str
+            except NotImplementedError:
+                raise
+
+            with engine.begin() as conn:
+                conn.execute(stmt)
+
+            session = Session(engine)
+            stmt_select = select(table).where(table.int_col == sample_data["int_col"])
+            result = session.scalar(stmt_select)
+
+            compare = {key: getattr(result, key) for key in sample_data.keys()}
+
+            # Parse JSON values back to original Python objects
+            for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']:
+                if compare[key] is not None:
+                    compare[key] = json.loads(compare[key])
+
+            assert compare == sample_data