Skip to content

Commit af700c5

Browse files
committed
fix: support JSONDtype on pandas version 1.5
1 parent 36109b1 commit af700c5

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

db_dtypes/json.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,23 @@ class JSONArray(arrays.ArrowExtensionArray):
7272

7373
_dtype = JSONDtype()
7474

75-
def __init__(self, values, dtype=None, copy=False) -> None:
75+
def __init__(self, values) -> None:
76+
super().__init__(values)
7677
self._dtype = JSONDtype()
7778
if isinstance(values, pa.Array):
78-
self._pa_array = pa.chunked_array([values])
79+
pa_data = pa.chunked_array([values])
7980
elif isinstance(values, pa.ChunkedArray):
80-
self._pa_array = values
81+
pa_data = values
8182
else:
8283
raise ValueError(f"Unsupported type '{type(values)}' for JSONArray")
84+
85+
# Ensures compatibility with pandas version 1.5.3
86+
if hasattr(self, '_data'):
87+
self._data = pa_data
88+
elif hasattr(self, '_pa_array'):
89+
self._pa_array = pa_data
90+
else:
91+
raise ValueError(f"Unsupported pandas version: {pd.__version__}")
8392

8493
@classmethod
8594
def _box_pa(
@@ -111,7 +120,7 @@ def _box_pa_scalar(cls, value) -> pa.Scalar:
111120
def _box_pa_array(cls, value, copy: bool = False) -> pa.Array | pa.ChunkedArray:
112121
"""Box value into a pyarrow Array or ChunkedArray."""
113122
if isinstance(value, cls):
114-
pa_array = value._pa_array
123+
pa_array = value.pa_data
115124
else:
116125
value = [JSONArray._serialize_json(x) for x in value]
117126
pa_array = pa.array(value, type=cls._dtype.pyarrow_dtype, from_pandas=True)
@@ -147,11 +156,20 @@ def dtype(self) -> JSONDtype:
147156
"""An instance of JSONDtype"""
148157
return self._dtype
149158

159+
@property
160+
def pa_data(self):
161+
"""An instance of stored pa data"""
162+
# Ensures compatibility with pandas version 1.5.3
163+
if hasattr(self, '_data'):
164+
return self._data
165+
elif hasattr(self, '_pa_array'):
166+
return self._pa_array
167+
150168
def _cmp_method(self, other, op):
151169
if op.__name__ == "eq":
152-
result = pyarrow.compute.equal(self._pa_array, self._box_pa(other))
170+
result = pyarrow.compute.equal(self.pa_data, self._box_pa(other))
153171
elif op.__name__ == "ne":
154-
result = pyarrow.compute.not_equal(self._pa_array, self._box_pa(other))
172+
result = pyarrow.compute.not_equal(self.pa_data, self._box_pa(other))
155173
else:
156174
# Comparison is not a meaningful one. We don't want to support sorting by JSON columns.
157175
raise TypeError(f"{op.__name__} not supported for JSONArray")
@@ -169,7 +187,7 @@ def __getitem__(self, item):
169187
else:
170188
# `check_array_indexer` should verify that the assertion hold true.
171189
assert item.dtype.kind == "b"
172-
return type(self)(self._pa_array.filter(item))
190+
return type(self)(self.pa_data.filter(item))
173191
elif isinstance(item, tuple):
174192
item = indexers.unpack_tuple_and_ellipses(item)
175193

@@ -181,7 +199,7 @@ def __getitem__(self, item):
181199
r"(`None`) and integer or boolean arrays are valid indices"
182200
)
183201

184-
value = self._pa_array[item]
202+
value = self.pa_data[item]
185203
if isinstance(value, pa.ChunkedArray):
186204
return type(self)(value)
187205
else:
@@ -193,7 +211,7 @@ def __getitem__(self, item):
193211

194212
def __iter__(self):
195213
"""Iterate over elements of the array."""
196-
for value in self._pa_array:
214+
for value in self.pa_data:
197215
val = JSONArray._deserialize_json(value.as_py())
198216
if val is None:
199217
yield self._dtype.na_value

0 commit comments

Comments
 (0)