|
11 | 11 | DateTime, |
12 | 12 | ) |
13 | 13 | from collections.abc import Sequence |
14 | | -from databricks.sqlalchemy import TIMESTAMP, TINYINT, DatabricksArray, DatabricksMap |
| 14 | +from databricks.sqlalchemy import TIMESTAMP, TINYINT, DatabricksArray, DatabricksMap, DatabricksVariant |
15 | 15 | from sqlalchemy.orm import DeclarativeBase, Session |
16 | 16 | from sqlalchemy import select |
17 | 17 | from datetime import date, datetime, time, timedelta, timezone |
18 | 18 | import pandas as pd |
19 | 19 | import numpy as np |
20 | 20 | import decimal |
21 | | - |
| 21 | +import json |
22 | 22 |
|
23 | 23 | class TestComplexTypes(TestSetup): |
24 | 24 | def _parse_to_common_type(self, value): |
@@ -46,7 +46,7 @@ def _parse_to_common_type(self, value): |
46 | 46 | ): |
47 | 47 | return tuple(value) |
48 | 48 | elif isinstance(value, dict): |
49 | | - return tuple(value.items()) |
| 49 | + return tuple(sorted(value.items())) |
50 | 50 | elif isinstance(value, np.generic): |
51 | 51 | return value.item() |
52 | 52 | elif isinstance(value, decimal.Decimal): |
@@ -152,6 +152,35 @@ class MapTable(Base): |
152 | 152 |
|
153 | 153 | return MapTable, sample_data |
154 | 154 |
|
| 155 | + def sample_variant_table(self) -> tuple[DeclarativeBase, dict]: |
| 156 | + class Base(DeclarativeBase): |
| 157 | + pass |
| 158 | + |
| 159 | + class VariantTable(Base): |
| 160 | + __tablename__ = "sqlalchemy_variant_table" |
| 161 | + |
| 162 | + int_col = Column(Integer, primary_key=True) |
| 163 | + variant_simple_col = Column(DatabricksVariant()) |
| 164 | + variant_nested_col = Column(DatabricksVariant()) |
| 165 | + variant_array_col = Column(DatabricksVariant()) |
| 166 | + variant_mixed_col = Column(DatabricksVariant()) |
| 167 | + |
| 168 | + sample_data = { |
| 169 | + "int_col": 1, |
| 170 | + "variant_simple_col": {"key": "value", "number": 42}, |
| 171 | + "variant_nested_col": {"user": {"name": "John", "age": 30}, "active": True}, |
| 172 | + "variant_array_col": [1, 2, 3, "hello", {"nested": "data"}], |
| 173 | + "variant_mixed_col": { |
| 174 | + "string": "test", |
| 175 | + "number": 123, |
| 176 | + "boolean": True, |
| 177 | + "array": [1, 2, 3], |
| 178 | + "object": {"nested": "value"} |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + return VariantTable, sample_data |
| 183 | + |
155 | 184 | def test_insert_array_table_sqlalchemy(self): |
156 | 185 | table, sample_data = self.sample_array_table() |
157 | 186 |
|
@@ -209,3 +238,81 @@ def test_map_table_creation_pandas(self): |
209 | 238 | stmt = select(table) |
210 | 239 | df_result = pd.read_sql(stmt, engine) |
211 | 240 | assert self._recursive_compare(df_result.iloc[0].to_dict(), sample_data) |
| 241 | + |
| 242 | + def test_insert_variant_table_sqlalchemy(self): |
| 243 | + table, sample_data = self.sample_variant_table() |
| 244 | + |
| 245 | + with self.table_context(table) as engine: |
| 246 | + |
| 247 | + sa_obj = table(**sample_data) |
| 248 | + session = Session(engine) |
| 249 | + session.add(sa_obj) |
| 250 | + session.commit() |
| 251 | + |
| 252 | + stmt = select(table).where(table.int_col == 1) |
| 253 | + result = session.scalar(stmt) |
| 254 | + compare = {key: getattr(result, key) for key in sample_data.keys()} |
| 255 | + # Parse JSON values back to original format for comparison |
| 256 | + for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']: |
| 257 | + if compare[key] is not None: |
| 258 | + compare[key] = json.loads(compare[key]) |
| 259 | + |
| 260 | + assert compare == sample_data |
| 261 | + |
| 262 | + def test_variant_table_creation_pandas(self): |
| 263 | + table, sample_data = self.sample_variant_table() |
| 264 | + |
| 265 | + with self.table_context(table) as engine: |
| 266 | + |
| 267 | + df = pd.DataFrame([sample_data]) |
| 268 | + dtype_mapping = { |
| 269 | + "variant_simple_col": DatabricksVariant, |
| 270 | + "variant_nested_col": DatabricksVariant, |
| 271 | + "variant_array_col": DatabricksVariant, |
| 272 | + "variant_mixed_col": DatabricksVariant |
| 273 | + } |
| 274 | + df.to_sql(table.__tablename__, engine, if_exists="append", index=False, dtype=dtype_mapping) |
| 275 | + |
| 276 | + stmt = select(table) |
| 277 | + df_result = pd.read_sql(stmt, engine) |
| 278 | + result_dict = df_result.iloc[0].to_dict() |
| 279 | + # Parse JSON values back to original format for comparison |
| 280 | + for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']: |
| 281 | + if result_dict[key] is not None: |
| 282 | + result_dict[key] = json.loads(result_dict[key]) |
| 283 | + |
| 284 | + assert result_dict == sample_data |
| 285 | + |
| 286 | + def test_variant_literal_processor(self): |
| 287 | + table, sample_data = self.sample_variant_table() |
| 288 | + |
| 289 | + with self.table_context(table) as engine: |
| 290 | + stmt = table.__table__.insert().values(**sample_data) |
| 291 | + |
| 292 | + try: |
| 293 | + compiled = stmt.compile( |
| 294 | + dialect=engine.dialect, |
| 295 | + compile_kwargs={"literal_binds": True} |
| 296 | + ) |
| 297 | + sql_str = str(compiled) |
| 298 | + |
| 299 | + # Assert that JSON actually got inlined |
| 300 | + assert '{"key":"value","number":42}' in sql_str |
| 301 | + except NotImplementedError: |
| 302 | + raise |
| 303 | + |
| 304 | + with engine.begin() as conn: |
| 305 | + conn.execute(stmt) |
| 306 | + |
| 307 | + session = Session(engine) |
| 308 | + stmt_select = select(table).where(table.int_col == sample_data["int_col"]) |
| 309 | + result = session.scalar(stmt_select) |
| 310 | + |
| 311 | + compare = {key: getattr(result, key) for key in sample_data.keys()} |
| 312 | + |
| 313 | + # Parse JSON values back to original Python objects |
| 314 | + for key in ['variant_simple_col', 'variant_nested_col', 'variant_array_col', 'variant_mixed_col']: |
| 315 | + if compare[key] is not None: |
| 316 | + compare[key] = json.loads(compare[key]) |
| 317 | + |
| 318 | + assert compare == sample_data |
0 commit comments