@@ -25,24 +25,23 @@ def __init__(self, token: str):
25
25
self .token = token
26
26
self .requests_session = requests .Session ()
27
27
version = scrapingant_client .__version__
28
- user_agent = f'ScrapingAnt Client/{ version } ({ sys .platform } ; Python/{ platform .python_version ()} );'
28
+ self . user_agent = f'ScrapingAnt Client/{ version } ({ sys .platform } ; Python/{ platform .python_version ()} );'
29
29
self .requests_session .headers .update ({
30
30
'x-api-key' : self .token ,
31
- 'User-Agent' : user_agent ,
31
+ 'User-Agent' : self . user_agent ,
32
32
})
33
33
34
- def general_request (
34
+ def _form_payload (
35
35
self ,
36
36
url : str ,
37
37
cookies : Optional [List [Cookie ]] = None ,
38
- headers : Optional [Dict [str , str ]] = None ,
39
38
js_snippet : Optional [str ] = None ,
40
39
proxy_type : ProxyType = ProxyType .datacenter ,
41
40
proxy_country : Optional [str ] = None ,
42
41
return_text : bool = False ,
43
42
wait_for_selector : Optional [str ] = None ,
44
43
browser : bool = True ,
45
- ) -> Response :
44
+ ) -> Dict :
46
45
request_data = {'url' : url }
47
46
if cookies is not None :
48
47
request_data ['cookies' ] = cookies_list_to_string (cookies )
@@ -56,29 +55,97 @@ def general_request(
56
55
request_data ['wait_for_selector' ] = wait_for_selector
57
56
request_data ['return_text' ] = return_text
58
57
request_data ['browser' ] = browser
58
+ return request_data
59
59
60
- response = self .requests_session .post (
61
- SCRAPINGANT_API_BASE_URL + '/general' ,
62
- json = request_data ,
63
- headers = convert_headers (headers ),
64
- )
65
- if response .status_code == 403 :
60
+ def _parse_response (self , response_status_code : int , response_data : Dict , url : str ) -> Response :
61
+ if response_status_code == 403 :
66
62
raise ScrapingantInvalidTokenException ()
67
- elif response . status_code == 404 :
63
+ elif response_status_code == 404 :
68
64
raise ScrapingantSiteNotReachableException (url )
69
- elif response . status_code == 422 :
70
- raise ScrapingantInvalidInputException (response . text )
71
- elif response . status_code == 423 :
65
+ elif response_status_code == 422 :
66
+ raise ScrapingantInvalidInputException (response_data )
67
+ elif response_status_code == 423 :
72
68
raise ScrapingantDetectedException ()
73
- elif response . status_code == 500 :
69
+ elif response_status_code == 500 :
74
70
raise ScrapingantInternalException ()
75
- json_response = response .json ()
76
- content = json_response ['content' ]
77
- cookies_string = json_response ['cookies' ]
78
- status_code = json_response ['status_code' ]
71
+ content = response_data ['content' ]
72
+ cookies_string = response_data ['cookies' ]
73
+ status_code = response_data ['status_code' ]
79
74
cookies_list = cookies_list_from_string (cookies_string )
80
75
return Response (
81
76
content = content ,
82
77
cookies = cookies_list ,
83
78
status_code = status_code
84
79
)
80
+
81
+ def general_request (
82
+ self ,
83
+ url : str ,
84
+ cookies : Optional [List [Cookie ]] = None ,
85
+ headers : Optional [Dict [str , str ]] = None ,
86
+ js_snippet : Optional [str ] = None ,
87
+ proxy_type : ProxyType = ProxyType .datacenter ,
88
+ proxy_country : Optional [str ] = None ,
89
+ return_text : bool = False ,
90
+ wait_for_selector : Optional [str ] = None ,
91
+ browser : bool = True ,
92
+ ) -> Response :
93
+ request_data = self ._form_payload (
94
+ url = url ,
95
+ cookies = cookies ,
96
+ js_snippet = js_snippet ,
97
+ proxy_type = proxy_type ,
98
+ proxy_country = proxy_country ,
99
+ return_text = return_text ,
100
+ wait_for_selector = wait_for_selector ,
101
+ browser = browser ,
102
+ )
103
+ response = self .requests_session .post (
104
+ SCRAPINGANT_API_BASE_URL + '/general' ,
105
+ json = request_data ,
106
+ headers = convert_headers (headers ),
107
+ )
108
+ response_status_code = response .status_code
109
+ response_data = response .json ()
110
+ parsed_response : Response = self ._parse_response (response_status_code , response_data , url )
111
+ return parsed_response
112
+
113
+ async def general_request_async (
114
+ self ,
115
+ url : str ,
116
+ cookies : Optional [List [Cookie ]] = None ,
117
+ headers : Optional [Dict [str , str ]] = None ,
118
+ js_snippet : Optional [str ] = None ,
119
+ proxy_type : ProxyType = ProxyType .datacenter ,
120
+ proxy_country : Optional [str ] = None ,
121
+ return_text : bool = False ,
122
+ wait_for_selector : Optional [str ] = None ,
123
+ browser : bool = True ,
124
+ ) -> Response :
125
+ import httpx
126
+
127
+ request_data = self ._form_payload (
128
+ url = url ,
129
+ cookies = cookies ,
130
+ js_snippet = js_snippet ,
131
+ proxy_type = proxy_type ,
132
+ proxy_country = proxy_country ,
133
+ return_text = return_text ,
134
+ wait_for_selector = wait_for_selector ,
135
+ browser = browser ,
136
+ )
137
+ async with httpx .AsyncClient (
138
+ headers = {
139
+ 'x-api-key' : self .token ,
140
+ 'User-Agent' : self .user_agent ,
141
+ }
142
+ ) as client :
143
+ response = await client .post (
144
+ SCRAPINGANT_API_BASE_URL + '/general' ,
145
+ json = request_data ,
146
+ headers = convert_headers (headers ),
147
+ )
148
+ response_status_code = response .status_code
149
+ response_data = response .json ()
150
+ parsed_response : Response = self ._parse_response (response_status_code , response_data , url )
151
+ return parsed_response
0 commit comments