Skip to content

Commit 0c8a1ce

Browse files
committed
Data type converter: Resolve data type ID to converter function upfront
1 parent dc8faae commit 0c8a1ce

File tree

3 files changed

+50
-17
lines changed

3 files changed

+50
-17
lines changed

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[tool.mypy]
2+
3+
# Needed until `mypy-0.990` for `ConverterDefinition` in `converter.py`.
4+
# https://github.com/python/mypy/issues/731#issuecomment-1260976955
5+
enable_recursive_aliases = true

src/crate/client/converter.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@
2727
from copy import deepcopy
2828
from datetime import datetime
2929
from enum import Enum
30-
from typing import Any, Callable, Dict, Optional, Union, List
30+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
3131

3232
InputVal = Any
33+
ConverterFunction = Callable[[Optional[InputVal]], Optional[Any]]
34+
ConverterDefinition = Union[ConverterFunction, Tuple[ConverterFunction, "ConverterDefinition"]]
3335

3436

3537
def _to_ipaddress(value: Optional[str]) -> Optional[Union[ipaddress.IPv4Address, ipaddress.IPv6Address]]:
@@ -87,7 +89,7 @@ class DataType(Enum):
8789

8890

8991
# Map data type identifier to converter function.
90-
_DEFAULT_CONVERTERS: Dict[DataType, Callable[[Optional[InputVal]], Optional[Any]]] = {
92+
_DEFAULT_CONVERTERS: Dict[DataType, ConverterFunction] = {
9193
DataType.IP: _to_ipaddress,
9294
DataType.TIMESTAMP_WITH_TZ: _to_datetime,
9395
DataType.TIMESTAMP_WITHOUT_TZ: _to_datetime,
@@ -97,37 +99,55 @@ class DataType(Enum):
9799
class Converter:
98100
def __init__(
99101
self,
100-
mappings: Dict[DataType, Callable[[Optional[InputVal]], Optional[Any]]] = None,
101-
default: Callable[[Optional[InputVal]], Optional[Any]] = _to_default,
102+
mappings: Optional[Dict[DataType, ConverterFunction]] = None,
103+
default: ConverterFunction = _to_default,
102104
) -> None:
103105
self._mappings = mappings or {}
104106
self._default = default
105107

106108
@property
107-
def mappings(self) -> Dict[DataType, Callable[[Optional[InputVal]], Optional[Any]]]:
109+
def mappings(self) -> Dict[DataType, ConverterFunction]:
108110
return self._mappings
109111

110-
def get(self, type_: DataType) -> Callable[[Optional[InputVal]], Optional[Any]]:
112+
def get(self, type_: DataType) -> ConverterFunction:
111113
return self.mappings.get(type_, self._default)
112114

113-
def set(self, type_: DataType, converter: Callable[[Optional[InputVal]], Optional[Any]]) -> None:
115+
def set(self, type_: DataType, converter: ConverterFunction) -> None:
114116
self.mappings[type_] = converter
115117

116-
def convert(self, type_: Union[DataType, int], value: Optional[Any]) -> Optional[Any]:
118+
def convert(self, converter_definition: ConverterDefinition, value: Optional[Any]) -> Optional[Any]:
117119
"""
118-
Convert a single row cell value with given data type. Invoked from `Cursor._convert_rows`.
120+
Convert a single row cell value using given converter definition.
121+
Also works recursively on nested values like `ARRAY` collections.
122+
Invoked from `Cursor._convert_rows`.
119123
"""
120-
if isinstance(type_, List):
121-
type_, inner_type = type_
122-
if DataType(type_) is not DataType.ARRAY:
123-
raise ValueError(f"Data type {type_} is not implemented as collection type")
124+
if isinstance(converter_definition, tuple):
125+
type_, inner_type = converter_definition
124126
if value is None:
125127
result = self.convert(inner_type, None)
126128
else:
127129
result = [self.convert(inner_type, item) for item in value]
128130
else:
129-
converter = self.get(DataType(type_))
130-
result = converter(value)
131+
result = converter_definition(value)
132+
return result
133+
134+
def col_type_to_converter(self, type_: Union[int, List[int]]) -> ConverterDefinition:
135+
"""
136+
Resolve integer data type identifier to its corresponding converter function.
137+
Also handles nested definitions with a *list* of data type identifiers on the
138+
right hand side, describing the inner type of `ARRAY` values.
139+
140+
It is important to resolve the converter functions first, in order not to
141+
hog the row loop with redundant lookups to the `mappings` dictionary.
142+
"""
143+
result: ConverterDefinition
144+
if isinstance(type_, list):
145+
type_, inner_type = type_
146+
if DataType(type_) is not DataType.ARRAY:
147+
raise ValueError(f"Data type {type_} is not implemented as collection type")
148+
result = (self.get(DataType(type_)), self.col_type_to_converter(inner_type))
149+
else:
150+
result = self.get(DataType(type_))
131151
return result
132152

133153

src/crate/client/cursor.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,19 @@ def _convert_rows(self):
226226
"""
227227
assert "col_types" in self._result and self._result["col_types"], \
228228
"Unable to apply type conversion without `col_types` information"
229+
230+
# Resolve `col_types` definition to converter functions. Running the lookup
231+
# redundantly on each row loop iteration would be a huge performance hog.
229232
type_id_list = self._result["col_types"]
233+
converter_definitions = [
234+
self._converter.col_type_to_converter(type_id) for type_id in type_id_list
235+
]
236+
237+
# Process result rows with conversion.
230238
for row in self._result["rows"]:
231239
yield [
232-
self._converter.convert(type_id, value)
233-
for type_id, value in zip(type_id_list, row)
240+
self._converter.convert(converter_definition, value)
241+
for converter_definition, value in zip(converter_definitions, row)
234242
]
235243

236244
@staticmethod

0 commit comments

Comments
 (0)