Skip to content

Commit 9ad7928

Browse files
committed
Review comments.
Update TCH001, TCH002, TCH003 uses.
1 parent e3e45bb commit 9ad7928

File tree

11 files changed

+115
-120
lines changed

11 files changed

+115
-120
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ docstring-quotes = "double"
142142
inline-quotes = "single"
143143

144144
[tool.ruff.lint.flake8-type-checking]
145-
runtime-evaluated-base-classes = ["pydantic.BaseModel"]
145+
runtime-evaluated-base-classes = ["pydantic.BaseModel", "crawlee.configuration.Configuration", "ApifyHttpProxyMiddleware"]
146146

147147
[tool.ruff.lint.flake8-builtins]
148148
builtins-ignorelist = ["id"]

src/apify/_configuration.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# ruff: noqa: TCH001 TCH002 TCH003 (so that pydantic annotations work)
21
from __future__ import annotations
32

43
from datetime import datetime, timedelta

src/apify/_models.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# ruff: noqa: TCH001 TCH002 TCH003 (Pydantic)
21
from __future__ import annotations
32

43
from datetime import datetime, timedelta

src/apify/scrapy/middlewares/apify_proxy.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from __future__ import annotations
22

3+
from typing import TYPE_CHECKING
34
from urllib.parse import ParseResult, urlparse
45

56
try:
6-
from scrapy import Request, Spider # noqa: TCH002
7+
if TYPE_CHECKING:
8+
from scrapy import Request, Spider
9+
from scrapy.crawler import Crawler
710
from scrapy.core.downloader.handlers.http11 import TunnelError
8-
from scrapy.crawler import Crawler # noqa: TCH002
911
from scrapy.exceptions import NotConfigured
1012
except ImportError as exc:
1113
raise ImportError(

src/apify/scrapy/pipelines/actor_dataset_push.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
from __future__ import annotations
22

3+
from typing import TYPE_CHECKING
4+
35
from itemadapter.adapter import ItemAdapter
46

57
try:
6-
from scrapy import Item, Spider # noqa: TCH002
8+
if TYPE_CHECKING:
9+
from scrapy import Item, Spider
710
except ImportError as exc:
811
raise ImportError(
912
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',

src/apify/scrapy/scheduler.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
from __future__ import annotations
22

33
import traceback
4+
from typing import TYPE_CHECKING
45

56
from apify._configuration import Configuration
67
from apify.apify_storage_client import ApifyStorageClient
78

89
try:
910
from scrapy import Spider
1011
from scrapy.core.scheduler import BaseScheduler
11-
from scrapy.http.request import Request # noqa: TCH002
1212
from scrapy.utils.reactor import is_asyncio_reactor_installed
13+
14+
if TYPE_CHECKING:
15+
from scrapy.http.request import Request
1316
except ImportError as exc:
1417
raise ImportError(
1518
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',

src/apify/scrapy/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22

33
import asyncio
44
from base64 import b64encode
5+
from typing import TYPE_CHECKING
56
from urllib.parse import unquote
67

78
from apify_shared.utils import ignore_docs
89

910
try:
10-
from scrapy.settings import Settings # noqa: TCH002
1111
from scrapy.utils.project import get_project_settings
1212
from scrapy.utils.python import to_bytes
13+
14+
if TYPE_CHECKING:
15+
from scrapy.settings import Settings
1316
except ImportError as exc:
1417
raise ImportError(
1518
'To use this module, you need to install the "scrapy" extra. For example, if you use pip, run '

src/apify/storages/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
22

3-
from ._actor_inputs import create_request_list
3+
from .request_list import RequestList
44

5-
__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'create_request_list']
5+
__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'RequestList']

src/apify/storages/_known_actor_input_keys.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

src/apify/storages/_actor_inputs.py renamed to src/apify/storages/request_list.py

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,14 @@
44
import re
55
from asyncio import Task
66
from functools import partial
7-
from typing import Any
7+
from typing import Any, Union
88

9-
from pydantic import BaseModel, Field
9+
from pydantic import BaseModel, Field, TypeAdapter
1010

1111
from crawlee import Request
1212
from crawlee._types import HttpMethod
1313
from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
14-
from crawlee.storages import RequestList
15-
16-
from ._known_actor_input_keys import ActorInputKeys
14+
from crawlee.storages import RequestList as CrawleeRequestList
1715

1816
URL_NO_COMMAS_REGEX = re.compile(
1917
r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
@@ -24,50 +22,63 @@ class _RequestDetails(BaseModel):
2422
method: HttpMethod = 'GET'
2523
payload: str = ''
2624
headers: dict[str, str] = Field(default_factory=dict)
27-
user_data: dict[str, str] = Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
25+
user_data: dict[str, str] = Field(default_factory=dict, alias='userData')
2826

2927

3028
class _RequestsFromUrlInput(_RequestDetails):
31-
requests_from_url: str = Field(alias=ActorInputKeys.startUrls.requestsFromUrl)
29+
requests_from_url: str = Field(alias='requestsFromUrl')
3230

3331

3432
class _SimpleUrlInput(_RequestDetails):
3533
url: str
3634

3735

38-
async def create_request_list(
39-
actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
36+
url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
37+
38+
39+
class RequestList(CrawleeRequestList):
40+
"""Extends crawlee RequestList."""
41+
42+
@classmethod
43+
async def open(
44+
cls,
45+
name: str | None = None,
46+
actor_start_urls_input: list[dict[str, Any]] | None = None,
47+
http_client: BaseHttpClient | None = None,
48+
) -> RequestList:
49+
"""Creates RequestList from Actor input requestListSources.
50+
51+
name is name of the returned RequestList
52+
actor_start_urls_input can contain list dicts with either url or requestsFromUrl key
53+
http_client is client that will be used to send get request to url defined in requestsFromUrl
54+
55+
Example actor_start_urls_input:
56+
[
57+
# Gather urls from response body.
58+
{'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
59+
# Directly include this url.
60+
{'url': 'https://crawlee.dev', 'method': 'GET'}
61+
]
62+
"""
63+
actor_start_urls_input = actor_start_urls_input or []
64+
return await _create_request_list(name, actor_start_urls_input, http_client)
65+
66+
67+
async def _create_request_list(
68+
name: str | None, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None
4069
) -> RequestList:
41-
"""Creates RequestList from Actor input requestListSources.
42-
43-
actor_start_urls_input can contain list dicts with either url or requestsFromUrl key
44-
http_client is client that will be used to send get request to url defined in requestsFromUrl
45-
46-
Example:
47-
actor_start_urls_input = [
48-
# Gather urls from response body.
49-
{'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
50-
# Directly include this url.
51-
{'url': 'https://crawlee.dev', 'method': 'GET'}
52-
]
53-
"""
5470
if not http_client:
5571
http_client = HttpxHttpClient()
56-
simple_url_requests_inputs = [
57-
_SimpleUrlInput(**request_input)
58-
for request_input in actor_start_urls_input
59-
if ActorInputKeys.startUrls.url in request_input
60-
]
61-
remote_url_requests_inputs = [
62-
_RequestsFromUrlInput(**request_input)
63-
for request_input in actor_start_urls_input
64-
if ActorInputKeys.startUrls.requestsFromUrl in request_input
65-
]
6672

67-
simple_url_requests = _create_requests_from_input(simple_url_requests_inputs)
68-
remote_url_requests = await _create_requests_from_url(remote_url_requests_inputs, http_client=http_client)
73+
ulr_inputs = url_input_adapter.validate_python(actor_start_urls_input) # instance of list[Union[...]]
74+
75+
simple_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _SimpleUrlInput]
76+
remote_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _RequestsFromUrlInput]
77+
78+
simple_url_requests = _create_requests_from_input(simple_url_inputs)
79+
remote_url_requests = await _create_requests_from_url(remote_url_inputs, http_client=http_client)
6980

70-
return RequestList(requests=simple_url_requests + remote_url_requests)
81+
return RequestList(name=name, requests=simple_url_requests + remote_url_requests)
7182

7283

7384
def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:

0 commit comments

Comments
 (0)