Skip to content

Commit e3b7be1

Browse files
committed
schema: support encoding=None connections
Several different problems are fixed here, but all have the same root. When a connection encoding is None (it is default on Python 2 and may be set explicitly on Python 3), all mp_str values are decoded into bytes, not Unicode strings (note that bytes is alias for str in Python 2). But the database schema parsing code have assumptions that _vspace / _vindex values are Unicode strings. The resolved problems are the following: 1. Default encoding in bytes#decode() method is 'ascii', however names in tarantool can contain symbols beyond ASCII symbol table. Set 'utf-8' for names decoding. 2. Convert all binary values into Unicode strings before parse or store them. This allows further correct accesses to the local schema representation. 3. Convert binary parameters like space, index or field name into Unicode strings, when a schema is accessed to don't trigger redundant schema refetching. Those problems are briefly mentioned in [1]. Tested manually with Python 2 and Python 3: my testing tarantool instance has a space with name '©' and after the changes I'm able to connect to it when the connection encoding is set to None. Also I verified that schema is not fetched each time when I do <connection>.select('©') in Python 2 (where such string literal is str / bytes, not Unicode string). Relevant test cases are added in next commits. [1]: #105 (comment)
1 parent 4f79627 commit e3b7be1

File tree

1 file changed

+55
-11
lines changed

1 file changed

+55
-11
lines changed

tarantool/schema.py

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,63 @@
1616
import tarantool.const as const
1717

1818

19+
def to_unicode(s):
20+
if isinstance(s, bytes):
21+
return s.decode(encoding='utf-8')
22+
return s
23+
24+
25+
def to_unicode_recursive(x, max_depth):
26+
"""Same as to_unicode(), but traverses over dictionaries,
27+
lists and tuples recursivery.
28+
29+
x: value to convert
30+
31+
max_depth: 1 accepts a scalar, 2 accepts a list of scalars,
32+
etc.
33+
"""
34+
assert max_depth > 0
35+
36+
if isinstance(x, dict):
37+
res = dict()
38+
for key, val in x.items():
39+
key = to_unicode_recursive(key, max_depth - 1)
40+
val = to_unicode_recursive(val, max_depth - 1)
41+
res[key] = val
42+
return res
43+
44+
if isinstance(x, list) or isinstance(x, tuple):
45+
res = []
46+
for val in x:
47+
val = to_unicode_recursive(val, max_depth - 1)
48+
res.append(val)
49+
if isinstance(x, tuple):
50+
return tuple(res)
51+
return res
52+
53+
return to_unicode(x)
54+
55+
1956
class SchemaIndex(object):
2057
def __init__(self, index_row, space):
2158
self.iid = index_row[1]
2259
self.name = index_row[2]
23-
if isinstance(self.name, bytes):
24-
self.name = self.name.decode()
60+
self.name = to_unicode(index_row[2])
2561
self.index = index_row[3]
2662
self.unique = index_row[4]
2763
self.parts = []
28-
if isinstance(index_row[5], (list, tuple)):
29-
for val in index_row[5]:
64+
parts_raw = to_unicode_recursive(index_row[5], 3)
65+
if isinstance(parts_raw, (list, tuple)):
66+
for val in parts_raw:
3067
if isinstance(val, dict):
3168
self.parts.append((val['field'], val['type']))
3269
else:
3370
self.parts.append((val[0], val[1]))
3471
else:
35-
for i in range(index_row[5]):
72+
for i in range(parts_raw):
3673
self.parts.append((
37-
index_row[5 + 1 + i * 2],
38-
index_row[5 + 2 + i * 2]
74+
to_unicode(index_row[5 + 1 + i * 2]),
75+
to_unicode(index_row[5 + 2 + i * 2])
3976
))
4077
self.space = space
4178
self.space.indexes[self.iid] = self
@@ -52,16 +89,15 @@ class SchemaSpace(object):
5289
def __init__(self, space_row, schema):
5390
self.sid = space_row[0]
5491
self.arity = space_row[1]
55-
self.name = space_row[2]
56-
if isinstance(self.name, bytes):
57-
self.name = self.name.decode()
92+
self.name = to_unicode(space_row[2])
5893
self.indexes = {}
5994
self.schema = schema
6095
self.schema[self.sid] = self
6196
if self.name:
6297
self.schema[self.name] = self
6398
self.format = dict()
64-
for part_id, part in enumerate(space_row[6]):
99+
format_raw = to_unicode_recursive(space_row[6], 3)
100+
for part_id, part in enumerate(format_raw):
65101
part['id'] = part_id
66102
self.format[part['name']] = part
67103
self.format[part_id ] = part
@@ -78,6 +114,8 @@ def __init__(self, con):
78114
self.con = con
79115

80116
def get_space(self, space):
117+
space = to_unicode(space)
118+
81119
try:
82120
return self.schema[space]
83121
except KeyError:
@@ -135,6 +173,9 @@ def fetch_space_all(self):
135173
SchemaSpace(row, self.schema)
136174

137175
def get_index(self, space, index):
176+
space = to_unicode(space)
177+
index = to_unicode(index)
178+
138179
_space = self.get_space(space)
139180
try:
140181
return _space.indexes[index]
@@ -203,6 +244,9 @@ def fetch_index_from(self, space, index):
203244
return index_row
204245

205246
def get_field(self, space, field):
247+
space = to_unicode(space)
248+
field = to_unicode(field)
249+
206250
_space = self.get_space(space)
207251
try:
208252
return _space.format[field]

0 commit comments

Comments
 (0)