Skip to content

Commit 02050b8

Browse files
fix: Add recursive type conversion for nested Mapping objects
- Add _to_builtin_types helper function to recursively convert Mapping-like and Sequence-like objects to plain Python types - This fixes CI test failure where nested objects (like address field) were not being properly converted for genson schema inference - Genson doesn't handle custom Mapping/Sequence implementations properly, so we need to convert everything to plain dicts, lists, and primitives Co-Authored-By: AJ Steers <[email protected]>
1 parent 32b512a commit 02050b8

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

airbyte_cdk/sources/declarative/schema/inferred_schema_loader.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#
44

55
from collections.abc import Mapping as ABCMapping
6+
from collections.abc import Sequence
67
from dataclasses import InitVar, dataclass
78
from typing import Any, Mapping, Optional
89

@@ -13,6 +14,30 @@
1314
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
1415

1516

17+
def _to_builtin_types(value: Any) -> Any:
18+
"""
19+
Recursively convert Mapping-like and Sequence-like objects to plain Python types.
20+
21+
This is necessary because genson's schema inference doesn't handle custom Mapping
22+
or Sequence implementations properly. We need to convert everything to plain dicts,
23+
lists, and primitive types.
24+
25+
Args:
26+
value: The value to convert
27+
28+
Returns:
29+
The value converted to plain Python types
30+
"""
31+
if isinstance(value, ABCMapping):
32+
return {k: _to_builtin_types(v) for k, v in value.items()}
33+
elif isinstance(value, (list, tuple)):
34+
return [_to_builtin_types(item) for item in value]
35+
elif isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
36+
return [_to_builtin_types(item) for item in value]
37+
else:
38+
return value
39+
40+
1641
@dataclass
1742
class InferredSchemaLoader(SchemaLoader):
1843
"""
@@ -63,8 +88,9 @@ def get_json_schema(self) -> Mapping[str, Any]:
6388
if record_count >= self.record_sample_size:
6489
break
6590

66-
if isinstance(record, ABCMapping) and not isinstance(record, dict):
67-
record = dict(record)
91+
# Convert all Mapping-like and Sequence-like objects to plain Python types
92+
# This is necessary because genson doesn't handle custom implementations properly
93+
record = _to_builtin_types(record)
6894

6995
airbyte_record = AirbyteRecordMessage(
7096
stream=self.stream_name,

0 commit comments

Comments
 (0)