Skip to content

Commit cb74848

Browse files
committed
fixed msgpack codec
1 parent ba47302 commit cb74848

File tree

1 file changed

+12
-31
lines changed

1 file changed

+12
-31
lines changed

frontera/contrib/backends/remote/codecs/msgpack.py

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,11 @@
66
from msgpack import packb, unpackb
77

88
from frontera.core.codec import BaseDecoder, BaseEncoder
9-
import six
109
from w3lib.util import to_native_str
1110

1211

1312
def _prepare_request_message(request):
14-
def serialize(obj):
15-
"""Recursively walk object's hierarchy."""
16-
if isinstance(obj, six.text_type):
17-
return obj.encode('utf8')
18-
if isinstance(obj, (bool, six.integer_types, float, six.binary_type)):
19-
return obj
20-
elif isinstance(obj, dict):
21-
obj = obj.copy()
22-
for key in obj:
23-
obj[key] = serialize(obj[key])
24-
return obj
25-
elif isinstance(obj, list):
26-
return [serialize(item) for item in obj]
27-
elif isinstance(obj, tuple):
28-
return tuple(serialize([item for item in obj]))
29-
elif hasattr(obj, '__dict__'):
30-
return serialize(obj.__dict__)
31-
else:
32-
return None
33-
return [request.url, request.method, request.headers, request.cookies, serialize(request.meta)]
13+
return [request.url, request.method, request.headers, request.cookies, request.meta]
3414

3515

3616
def _prepare_response_message(response, send_body):
@@ -42,28 +22,29 @@ def __init__(self, request_model, *a, **kw):
4222
self.send_body = True if 'send_body' in kw and kw['send_body'] else False
4323

4424
def encode_add_seeds(self, seeds):
45-
return packb([b'as', [_prepare_request_message(seed) for seed in seeds]])
25+
return packb([b'as', [_prepare_request_message(seed) for seed in seeds]], use_bin_type=True, encoding="utf-8")
4626

4727
def encode_page_crawled(self, response):
48-
return packb([b'pc', _prepare_response_message(response, self.send_body)])
28+
return packb([b'pc', _prepare_response_message(response, self.send_body)], use_bin_type=True, encoding="utf-8")
4929

5030
def encode_links_extracted(self, request, links):
51-
return packb([b'le', _prepare_request_message(request), [_prepare_request_message(link) for link in links]])
31+
return packb([b'le', _prepare_request_message(request), [_prepare_request_message(link) for link in links]],
32+
use_bin_type=True, encoding="utf-8")
5233

5334
def encode_request_error(self, request, error):
54-
return packb([b're', _prepare_request_message(request), str(error)])
35+
return packb([b're', _prepare_request_message(request), str(error)], use_bin_type=True, encoding="utf-8")
5536

5637
def encode_request(self, request):
57-
return packb(_prepare_request_message(request))
38+
return packb(_prepare_request_message(request), use_bin_type=True, encoding="utf-8")
5839

5940
def encode_update_score(self, request, score, schedule):
60-
return packb([b'us', _prepare_request_message(request), score, schedule])
41+
return packb([b'us', _prepare_request_message(request), score, schedule], use_bin_type=True, encoding="utf-8")
6142

6243
def encode_new_job_id(self, job_id):
63-
return packb([b'njid', int(job_id)])
44+
return packb([b'njid', int(job_id)], use_bin_type=True, encoding="utf-8")
6445

6546
def encode_offset(self, partition_id, offset):
66-
return packb([b'of', int(partition_id), int(offset)])
47+
return packb([b'of', int(partition_id), int(offset)], use_bin_type=True, encoding="utf-8")
6748

6849

6950
class Decoder(BaseDecoder):
@@ -87,7 +68,7 @@ def _request_from_object(self, obj):
8768
meta=obj[4])
8869

8970
def decode(self, buffer):
90-
obj = unpackb(buffer)
71+
obj = unpackb(buffer, encoding="utf-8")
9172
if obj[0] == b'pc':
9273
return ('page_crawled',
9374
self._response_from_object(obj[1]))
@@ -108,4 +89,4 @@ def decode(self, buffer):
10889
return TypeError('Unknown message type')
10990

11091
def decode_request(self, buffer):
111-
return self._request_from_object(unpackb(buffer))
92+
return self._request_from_object(unpackb(buffer, encoding="utf-8"))

0 commit comments

Comments
 (0)