diff --git a/pyrightconfig.stricter.json b/pyrightconfig.stricter.json index 8579b6dd516b..c1d731bd8404 100644 --- a/pyrightconfig.stricter.json +++ b/pyrightconfig.stricter.json @@ -27,7 +27,6 @@ "stubs/antlr4-python3-runtime", "stubs/aws-xray-sdk", "stubs/beautifulsoup4", - "stubs/bleach", "stubs/boltons", "stubs/boto", "stubs/braintree", diff --git a/stubs/bleach/@tests/stubtest_allowlist.txt b/stubs/bleach/@tests/stubtest_allowlist.txt index c3ff453fd202..75c027405b78 100644 --- a/stubs/bleach/@tests/stubtest_allowlist.txt +++ b/stubs/bleach/@tests/stubtest_allowlist.txt @@ -1,2 +1,5 @@ -bleach.css_sanitizer # Requires tinycss2 to be installed -bleach.html5lib_shim.* +# Internal private stuff: +bleach._vendor.* + +# Is a property returning a method, simplified: +bleach.html5lib_shim.InputStreamWithMemory.changeEncoding diff --git a/stubs/bleach/METADATA.toml b/stubs/bleach/METADATA.toml index 43e6309332f7..c5926af4801e 100644 --- a/stubs/bleach/METADATA.toml +++ b/stubs/bleach/METADATA.toml @@ -1,6 +1,6 @@ version = "6.1.*" +requires = ["types-html5lib"] upstream_repository = "https://github.com/mozilla/bleach" -partial_stub = true [tool.stubtest] -ignore_missing_stub = true +extras = ["css"] diff --git a/stubs/bleach/bleach/css_sanitizer.pyi b/stubs/bleach/bleach/css_sanitizer.pyi index 5e3c6f2ba35a..9fa319c6d79c 100644 --- a/stubs/bleach/bleach/css_sanitizer.pyi +++ b/stubs/bleach/bleach/css_sanitizer.pyi @@ -1,7 +1,8 @@ from collections.abc import Container +from typing import Final -ALLOWED_CSS_PROPERTIES: frozenset[str] -ALLOWED_SVG_PROPERTIES: frozenset[str] +ALLOWED_CSS_PROPERTIES: Final[frozenset[str]] +ALLOWED_SVG_PROPERTIES: Final[frozenset[str]] class CSSSanitizer: allowed_css_properties: Container[str] diff --git a/stubs/bleach/bleach/html5lib_shim.pyi b/stubs/bleach/bleach/html5lib_shim.pyi index dbcde6f14a00..792de6da4418 100644 --- a/stubs/bleach/bleach/html5lib_shim.pyi +++ b/stubs/bleach/bleach/html5lib_shim.pyi @@ -1,30 +1,70 @@ -from _typeshed import Incomplete +import re +from codecs import CodecInfo from collections.abc import Generator, Iterable, Iterator +from typing import Any, Final, Protocol -class HTMLParser: # actually html5lib.HTMLParser - def __getattr__(self, __name: str) -> Incomplete: ... +# We don't re-export any `html5lib` types / values here, because they are not +# really public and may change at any time. This is just a helper module, +# import things directly from `html5lib` instead! +from html5lib import HTMLParser +from html5lib._inputstream import HTMLBinaryInputStream, HTMLUnicodeInputStream +from html5lib._tokenizer import HTMLTokenizer +from html5lib._trie import Trie +from html5lib.serializer import HTMLSerializer +from html5lib.treewalkers.base import TreeWalker -class Filter: # actually html5lib.filters.base.Filter - source: Incomplete - def __init__(self, source) -> None: ... - def __iter__(self) -> Iterator[Incomplete]: ... - def __getattr__(self, name: str) -> Incomplete: ... # copy attributes from source +# Is actually webencodings.Encoding +class _Encoding(Protocol): + name: str + codec_info: CodecInfo + def __init__(self, name: str, codec_info: CodecInfo) -> None: ... -class SanitizerFilter: # actually html5lib.filters.sanitizer.Filter - def __getattr__(self, __name: str) -> Incomplete: ... +HTML_TAGS: Final[frozenset[str]] +HTML_TAGS_BLOCK_LEVEL: Final[frozenset[str]] +AMP_SPLIT_RE: Final[re.Pattern[str]] +ENTITIES: Final[dict[str, str]] +ENTITIES_TRIE: Final[Trie] +TAG_TOKEN_TYPES: Final[set[int]] +TAG_TOKEN_TYPE_CHARACTERS: Final[int] +TAG_TOKEN_TYPE_END: Final[int] +TAG_TOKEN_TYPE_PARSEERROR: Final[int] +TAG_TOKEN_TYPE_START: Final[int] -class HTMLSerializer: # actually html5lib.serializer.HTMLSerializer - def __getattr__(self, __name: str) -> Incomplete: ... +class InputStreamWithMemory: + position = HTMLUnicodeInputStream.position + reset = HTMLUnicodeInputStream.reset + def __init__(self, inner_stream: HTMLUnicodeInputStream) -> None: ... + @property + def errors(self) -> list[str]: ... + @property + def charEncoding(self) -> tuple[_Encoding, str]: ... + # If inner_stream wasn't a HTMLBinaryInputStream, this will error at runtime + # Is a property returning a method, simplified: + changeEncoding = HTMLBinaryInputStream.changeEncoding + def char(self) -> str: ... + def charsUntil(self, characters: Iterable[str], opposite: bool = False) -> str: ... + def unget(self, char: str | None) -> None: ... + def get_tag(self) -> str: ... + def start_tag(self) -> None: ... + +class BleachHTMLTokenizer(HTMLTokenizer): + consume_entities: bool + stream: InputStreamWithMemory + emitted_last_token: dict[str, Any] | None + def __init__(self, consume_entities: bool = False, **kwargs: Any) -> None: ... class BleachHTMLParser(HTMLParser): tags: list[str] | None strip: bool consume_entities: bool - def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs) -> None: ... + def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs: Any) -> None: ... class BleachHTMLSerializer(HTMLSerializer): escape_rcdata: bool def escape_base_amp(self, stoken: str) -> Generator[str, None, None]: ... - def serialize(self, treewalker, encoding: str | None = None) -> Generator[str, None, None]: ... + def serialize(self, treewalker: TreeWalker, encoding: str | None = None) -> Generator[str, None, None]: ... # type: ignore[override] -def __getattr__(__name: str) -> Incomplete: ... +def convert_entity(value: str) -> str | None: ... +def convert_entities(text: str) -> str: ... +def match_entity(stream: str) -> str | None: ... +def next_possible_entity(text: str) -> Iterator[str]: ... diff --git a/stubs/bleach/bleach/linkifier.pyi b/stubs/bleach/bleach/linkifier.pyi index d9555a53c8a7..efb0c83dbdc0 100644 --- a/stubs/bleach/bleach/linkifier.pyi +++ b/stubs/bleach/bleach/linkifier.pyi @@ -1,22 +1,25 @@ from _typeshed import Incomplete -from collections.abc import Container, Iterable, Iterator +from collections.abc import Container, Iterable, Iterator, Sequence from re import Pattern +from typing import Any, Final +from typing_extensions import TypeAlias -from .callbacks import _Callback -from .html5lib_shim import Filter +from html5lib.filters.base import Filter +from html5lib.treewalkers.base import TreeWalker -DEFAULT_CALLBACKS: list[_Callback] +from .callbacks import _Callback, _HTMLAttrs -TLDS: list[str] +DEFAULT_CALLBACKS: Final[list[_Callback]] +TLDS: Final[list[str]] def build_url_re(tlds: Iterable[str] = ..., protocols: Iterable[str] = ...) -> Pattern[str]: ... -URL_RE: Pattern[str] -PROTO_RE: Pattern[str] +URL_RE: Final[Pattern[str]] +PROTO_RE: Final[Pattern[str]] def build_email_re(tlds: Iterable[str] = ...) -> Pattern[str]: ... -EMAIL_RE: Pattern[str] +EMAIL_RE: Final[Pattern[str]] class Linker: def __init__( @@ -30,6 +33,10 @@ class Linker: ) -> None: ... def linkify(self, text: str) -> str: ... +# TODO: `_Token` might be converted into `TypedDict` +# or `html5lib` token might be reused +_Token: TypeAlias = dict[str, Any] + class LinkifyFilter(Filter): callbacks: Iterable[_Callback] skip_tags: Container[str] @@ -38,18 +45,18 @@ class LinkifyFilter(Filter): email_re: Pattern[str] def __init__( self, - source, + source: TreeWalker, callbacks: Iterable[_Callback] | None = ..., skip_tags: Container[str] | None = None, parse_email: bool = False, url_re: Pattern[str] = ..., email_re: Pattern[str] = ..., ) -> None: ... - def apply_callbacks(self, attrs, is_new): ... - def extract_character_data(self, token_list): ... - def handle_email_addresses(self, src_iter): ... - def strip_non_url_bits(self, fragment): ... - def handle_links(self, src_iter): ... - def handle_a_tag(self, token_buffer): ... - def extract_entities(self, token): ... + def apply_callbacks(self, attrs: _HTMLAttrs, is_new: bool) -> _HTMLAttrs | None: ... + def extract_character_data(self, token_list: Iterable[_Token]) -> str: ... + def handle_email_addresses(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ... + def strip_non_url_bits(self, fragment: str) -> tuple[str, str, str]: ... + def handle_links(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ... + def handle_a_tag(self, token_buffer: Sequence[_Token]) -> Iterator[_Token]: ... + def extract_entities(self, token: _Token) -> Iterator[_Token]: ... def __iter__(self) -> Iterator[Incomplete]: ... diff --git a/stubs/bleach/bleach/parse_shim.pyi b/stubs/bleach/bleach/parse_shim.pyi new file mode 100644 index 000000000000..5b1ef5a35a59 --- /dev/null +++ b/stubs/bleach/bleach/parse_shim.pyi @@ -0,0 +1 @@ +from urllib import parse as parse diff --git a/stubs/bleach/bleach/sanitizer.pyi b/stubs/bleach/bleach/sanitizer.pyi index 4ba0144b5739..a29a6a5e1792 100644 --- a/stubs/bleach/bleach/sanitizer.pyi +++ b/stubs/bleach/bleach/sanitizer.pyi @@ -1,20 +1,27 @@ from _typeshed import Incomplete -from collections.abc import Callable, Iterable +from collections.abc import Callable, Container, Iterable, Iterator from re import Pattern -from typing import Protocol +from typing import Final, Protocol from typing_extensions import TypeAlias +from html5lib.filters.base import Filter +from html5lib.filters.sanitizer import Filter as SanitizerFilter +from html5lib.treewalkers.base import TreeWalker + from . import _HTMLAttrKey from .css_sanitizer import CSSSanitizer -from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer, SanitizerFilter +from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer +from .linkifier import _Token + +ALLOWED_TAGS: Final[frozenset[str]] +ALLOWED_ATTRIBUTES: Final[dict[str, list[str]]] +ALLOWED_PROTOCOLS: Final[frozenset[str]] -ALLOWED_TAGS: frozenset[str] -ALLOWED_ATTRIBUTES: dict[str, list[str]] -ALLOWED_PROTOCOLS: frozenset[str] +INVISIBLE_CHARACTERS: Final[str] +INVISIBLE_CHARACTERS_RE: Final[Pattern[str]] +INVISIBLE_REPLACEMENT_CHAR: Final = "?" -INVISIBLE_CHARACTERS: str -INVISIBLE_CHARACTERS_RE: Pattern[str] -INVISIBLE_REPLACEMENT_CHAR: str +class NoCssSanitizerWarning(UserWarning): ... # A html5lib Filter class class _Filter(Protocol): @@ -24,18 +31,16 @@ _AttributeFilter: TypeAlias = Callable[[str, str, str], bool] _AttributeDict: TypeAlias = dict[str, list[str] | _AttributeFilter] | dict[str, list[str]] | dict[str, _AttributeFilter] _Attributes: TypeAlias = _AttributeFilter | _AttributeDict | list[str] -_TreeWalker: TypeAlias = Callable[[Incomplete], Incomplete] - class Cleaner: tags: Iterable[str] attributes: _Attributes protocols: Iterable[str] strip: bool strip_comments: bool - filters: Iterable[_Filter] + filters: Iterable[Filter] css_sanitizer: CSSSanitizer | None parser: BleachHTMLParser - walker: _TreeWalker + walker: TreeWalker serializer: BleachHTMLSerializer def __init__( self, @@ -63,7 +68,7 @@ class BleachSanitizerFilter(SanitizerFilter): css_sanitizer: CSSSanitizer | None def __init__( self, - source, + source: TreeWalker, allowed_tags: Iterable[str] = ..., attributes: _Attributes = ..., allowed_protocols: Iterable[str] = ..., @@ -74,12 +79,11 @@ class BleachSanitizerFilter(SanitizerFilter): strip_html_comments: bool = True, css_sanitizer: CSSSanitizer | None = None, ) -> None: ... - def sanitize_stream(self, token_iterator): ... - def merge_characters(self, token_iterator): ... - def __iter__(self): ... - def sanitize_token(self, token): ... - def sanitize_characters(self, token): ... - def sanitize_uri_value(self, value, allowed_protocols): ... - def allow_token(self, token): ... - def disallowed_token(self, token): ... - def sanitize_css(self, style): ... + def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ... + def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ... + def __iter__(self) -> Iterator[_Token]: ... + def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ... + def sanitize_characters(self, token: _Token) -> _Token | list[_Token]: ... + def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ... + def allow_token(self, token: _Token) -> _Token: ... + def disallowed_token(self, token: _Token) -> _Token: ... diff --git a/stubs/html5lib/html5lib/_inputstream.pyi b/stubs/html5lib/html5lib/_inputstream.pyi index 02bb378e77ed..adf8be11be09 100644 --- a/stubs/html5lib/html5lib/_inputstream.pyi +++ b/stubs/html5lib/html5lib/_inputstream.pyi @@ -1,7 +1,14 @@ from _typeshed import Incomplete, SupportsRead -from typing import Any, overload +from codecs import CodecInfo +from typing import Any, Protocol, overload from typing_extensions import TypeAlias +# Is actually webencodings.Encoding +class _Encoding(Protocol): + name: str + codec_info: CodecInfo + def __init__(self, name: str, codec_info: CodecInfo) -> None: ... + _UnicodeInputStream: TypeAlias = str | SupportsRead[str] _BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes] _InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files @@ -42,13 +49,13 @@ def HTMLInputStream( class HTMLUnicodeInputStream: reportCharacterErrors: Any newLines: Any - charEncoding: Any + charEncoding: tuple[_Encoding, str] dataStream: Any def __init__(self, source: _UnicodeInputStream) -> None: ... chunk: str chunkSize: int chunkOffset: int - errors: Any + errors: list[str] prevNumLines: int prevNumCols: int def reset(self) -> None: ... @@ -70,7 +77,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): same_origin_parent_encoding: Any likely_encoding: Any default_encoding: Any - charEncoding: Any + charEncoding: tuple[_Encoding, str] def __init__( self, source: _BinaryInputStream, @@ -85,7 +92,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): def reset(self) -> None: ... def openStream(self, source): ... def determineEncoding(self, chardet: bool = True): ... - def changeEncoding(self, newEncoding) -> None: ... + def changeEncoding(self, newEncoding: str | bytes | None) -> None: ... def detectBOM(self): ... def detectEncodingMeta(self): ...