6
6
from msgpack import packb , unpackb
7
7
8
8
from frontera .core .codec import BaseDecoder , BaseEncoder
9
- import six
10
9
from w3lib .util import to_native_str
11
10
12
11
13
12
def _prepare_request_message (request ):
14
- def serialize (obj ):
15
- """Recursively walk object's hierarchy."""
16
- if isinstance (obj , six .text_type ):
17
- return obj .encode ('utf8' )
18
- if isinstance (obj , (bool , six .integer_types , float , six .binary_type )):
19
- return obj
20
- elif isinstance (obj , dict ):
21
- obj = obj .copy ()
22
- for key in obj :
23
- obj [key ] = serialize (obj [key ])
24
- return obj
25
- elif isinstance (obj , list ):
26
- return [serialize (item ) for item in obj ]
27
- elif isinstance (obj , tuple ):
28
- return tuple (serialize ([item for item in obj ]))
29
- elif hasattr (obj , '__dict__' ):
30
- return serialize (obj .__dict__ )
31
- else :
32
- return None
33
- return [request .url , request .method , request .headers , request .cookies , serialize (request .meta )]
13
+ return [request .url , request .method , request .headers , request .cookies , request .meta ]
34
14
35
15
36
16
def _prepare_response_message (response , send_body ):
@@ -42,28 +22,29 @@ def __init__(self, request_model, *a, **kw):
42
22
self .send_body = True if 'send_body' in kw and kw ['send_body' ] else False
43
23
44
24
def encode_add_seeds (self , seeds ):
45
- return packb ([b'as' , [_prepare_request_message (seed ) for seed in seeds ]])
25
+ return packb ([b'as' , [_prepare_request_message (seed ) for seed in seeds ]], use_bin_type = True , encoding = "utf-8" )
46
26
47
27
def encode_page_crawled (self , response ):
48
- return packb ([b'pc' , _prepare_response_message (response , self .send_body )])
28
+ return packb ([b'pc' , _prepare_response_message (response , self .send_body )], use_bin_type = True , encoding = "utf-8" )
49
29
50
30
def encode_links_extracted (self , request , links ):
51
- return packb ([b'le' , _prepare_request_message (request ), [_prepare_request_message (link ) for link in links ]])
31
+ return packb ([b'le' , _prepare_request_message (request ), [_prepare_request_message (link ) for link in links ]],
32
+ use_bin_type = True , encoding = "utf-8" )
52
33
53
34
def encode_request_error (self , request , error ):
54
- return packb ([b're' , _prepare_request_message (request ), str (error )])
35
+ return packb ([b're' , _prepare_request_message (request ), str (error )], use_bin_type = True , encoding = "utf-8" )
55
36
56
37
def encode_request (self , request ):
57
- return packb (_prepare_request_message (request ))
38
+ return packb (_prepare_request_message (request ), use_bin_type = True , encoding = "utf-8" )
58
39
59
40
def encode_update_score (self , request , score , schedule ):
60
- return packb ([b'us' , _prepare_request_message (request ), score , schedule ])
41
+ return packb ([b'us' , _prepare_request_message (request ), score , schedule ], use_bin_type = True , encoding = "utf-8" )
61
42
62
43
def encode_new_job_id (self , job_id ):
63
- return packb ([b'njid' , int (job_id )])
44
+ return packb ([b'njid' , int (job_id )], use_bin_type = True , encoding = "utf-8" )
64
45
65
46
def encode_offset (self , partition_id , offset ):
66
- return packb ([b'of' , int (partition_id ), int (offset )])
47
+ return packb ([b'of' , int (partition_id ), int (offset )], use_bin_type = True , encoding = "utf-8" )
67
48
68
49
69
50
class Decoder (BaseDecoder ):
@@ -87,7 +68,7 @@ def _request_from_object(self, obj):
87
68
meta = obj [4 ])
88
69
89
70
def decode (self , buffer ):
90
- obj = unpackb (buffer )
71
+ obj = unpackb (buffer , encoding = "utf-8" )
91
72
if obj [0 ] == b'pc' :
92
73
return ('page_crawled' ,
93
74
self ._response_from_object (obj [1 ]))
@@ -108,4 +89,4 @@ def decode(self, buffer):
108
89
return TypeError ('Unknown message type' )
109
90
110
91
def decode_request (self , buffer ):
111
- return self ._request_from_object (unpackb (buffer ))
92
+ return self ._request_from_object (unpackb (buffer , encoding = "utf-8" ))
0 commit comments