Skip to content

Commit 4375fb5

Browse files
committed
Mark bleach as complete
1 parent 91c72cf commit 4375fb5

File tree

8 files changed

+95
-43
lines changed

8 files changed

+95
-43
lines changed

pyrightconfig.stricter.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
"stdlib/xml/sax",
2222
"stubs/aws-xray-sdk",
2323
"stubs/babel",
24-
"stubs/bleach",
2524
"stubs/boto",
2625
"stubs/beautifulsoup4",
2726
"stubs/braintree",
Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
1-
bleach.css_sanitizer # Requires tinycss2 to be installed
2-
bleach.html5lib_shim.*
1+
# Internal private stuff:
2+
bleach._vendor.*
3+
4+
# Hacks:
5+
bleach.html5lib_shim.InputStreamWithMemory.changeEncoding
6+
bleach.html5lib_shim.InputStreamWithMemory.reset
7+
8+
# Re-exports:
9+
bleach.html5lib_shim.allowed_svg_properties
10+
bleach.html5lib_shim.allowed_protocols
11+
bleach.html5lib_shim.allowed_css_properties
12+
bleach.html5lib_shim.namespaces
13+
bleach.html5lib_shim.prefixes

stubs/bleach/METADATA.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
11
version = "5.0.*"
2+
requires = ["types-html5lib"]
3+
4+
[tool.stubtest]
5+
ignore_missing_stub = false
6+
extras = ["css"]

stubs/bleach/bleach/html5lib_shim.pyi

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,46 @@
1-
from _typeshed import Incomplete
2-
from collections.abc import Generator, Iterable
1+
from collections.abc import Iterator, Iterable
2+
from typing import Any
3+
import re
34

4-
class HTMLParser: # actually html5lib.HTMLParser
5-
def __getattr__(self, __name: str) -> Incomplete: ...
5+
# We don't re-export any `html5lib` types / values here, because they are not
6+
# really public and may change at any time. This is just a helper module,
7+
# import things directly from `html5lib` instead!
8+
from html5lib import HTMLParser
9+
from html5lib.serializer import HTMLSerializer
10+
from html5lib._tokenizer import HTMLTokenizer
11+
from html5lib._inputstream import HTMLUnicodeInputStream
612

7-
class Filter: # actually html5lib.filters.base.Filter
8-
def __getattr__(self, __name: str) -> Incomplete: ...
13+
HTML_TAGS: list[str]
14+
HTML_TAGS_BLOCK_LEVEL: frozenset[str]
15+
AMP_SPLIT_RE: re.Pattern[str]
16+
ENTITIES: dict[str, str]
17+
TAG_TOKEN_TYPES: set[int]
18+
TAG_TOKEN_TYPE_CHARACTERS: int
19+
TAG_TOKEN_TYPE_END: int
20+
TAG_TOKEN_TYPE_PARSEERROR: int
21+
TAG_TOKEN_TYPE_START: int
922

10-
class SanitizerFilter: # actually html5lib.filters.sanitizer.Filter
11-
def __getattr__(self, __name: str) -> Incomplete: ...
23+
class InputStreamWithMemory:
24+
position: int
25+
def __init__(self, inner_stream: HTMLUnicodeInputStream) -> None: ...
26+
def reset(self) -> None: ...
27+
@property
28+
def errors(self) -> list[str]: ...
29+
@property
30+
def charEncoding(self) -> tuple[str, str]: ...
31+
# Is a property returning a method, simplified:
32+
def changeEncoding(self, newEncoding: str) -> None: ...
33+
def char(self) -> str: ...
34+
def charsUntil(self, characters: str, opposite: bool = ...) -> str: ...
35+
def unget(self, char: str | None) -> None: ...
36+
def get_tag(self) -> str: ...
37+
def start_tag(self) -> None: ...
1238

13-
class HTMLSerializer: # actually html5lib.serializer.HTMLSerializer
14-
def __getattr__(self, __name: str) -> Incomplete: ...
39+
class BleachHTMLTokenizer(HTMLTokenizer):
40+
consume_entities: bool
41+
stream: InputStreamWithMemory
42+
emitted_last_token: dict[str, Any] | None
43+
def __init__(self, consume_entities: bool = ..., **kwargs): ...
1544

1645
class BleachHTMLParser(HTMLParser):
1746
tags: list[str] | None
@@ -21,7 +50,10 @@ class BleachHTMLParser(HTMLParser):
2150

2251
class BleachHTMLSerializer(HTMLSerializer):
2352
escape_rcdata: bool
24-
def escape_base_amp(self, stoken: str) -> Generator[str, None, None]: ...
25-
def serialize(self, treewalker, encoding: str | None = ...) -> Generator[str, None, None]: ...
53+
def escape_base_amp(self, stoken: str) -> Iterator[str]: ...
54+
def serialize(self, treewalker, encoding: str | None = ...) -> Iterator[str]: ... # type: ignore[override]
2655

27-
def __getattr__(__name: str) -> Incomplete: ...
56+
def convert_entity(value: str) -> str | None: ...
57+
def convert_entities(text: str) -> str: ...
58+
def match_entity(stream: str) -> str | None: ...
59+
def next_possible_entity(text: str) -> Iterator[str]: ...

stubs/bleach/bleach/linkifier.pyi

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
from _typeshed import Incomplete
2-
from collections.abc import Container, Iterable, MutableMapping
1+
import re
2+
from collections.abc import Container, Iterable, MutableMapping, Sequence, Iterator
33
from re import Pattern
44
from typing import Any, Protocol
55
from typing_extensions import TypeAlias
66

7-
from .html5lib_shim import Filter
7+
from html5lib.filters.base import Filter
88

9-
_Attrs: TypeAlias = MutableMapping[Any, str]
9+
from .callbacks import _Attrs
1010

1111
class _Callback(Protocol):
1212
def __call__(self, attrs: _Attrs, new: bool = ...) -> _Attrs: ...
@@ -36,13 +36,22 @@ class Linker:
3636
) -> None: ...
3737
def linkify(self, text: str) -> str: ...
3838

39+
# TODO: `_Token` might be converted into `TypedDict`
40+
# or `html5lib` token might be reused
41+
_Token: TypeAlias = dict[str, Any]
42+
3943
class LinkifyFilter(Filter):
40-
callbacks: Any
44+
callbacks: list[_Callback]
4145
skip_tags: Container[str]
4246
parse_email: bool
43-
url_re: Any
44-
email_re: Any
47+
url_re: re.Pattern[str]
48+
email_re: re.Pattern[str]
4549
def __init__(
46-
self, source, callbacks=..., skip_tags: Container[str] | None = ..., parse_email: bool = ..., url_re=..., email_re=...
50+
self, source, callbacks=..., skip_tags: Container[str] | None = ..., parse_email: bool = ..., url_re: re.Pattern[str] = ..., email_re: re.Pattern[str] = ...
4751
) -> None: ...
48-
def __getattr__(self, item: str) -> Incomplete: ...
52+
def apply_callbacks(self, attrs: _Attrs, is_new: bool) -> _Attrs: ...
53+
def extract_character_data(self, token_list: _Token) -> str: ...
54+
def handle_a_tag(self, token_buffer: Sequence[_Token]) -> Iterator[_Token]: ...
55+
def handle_email_addresses(self, src_iter: _Token) -> Iterator[_Token]: ...
56+
def handle_links(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ...
57+
def strip_non_url_bits(self, fragment: str) -> tuple[str, str, str]: ...

stubs/bleach/bleach/parse_shim.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from urllib import parse as parse

stubs/bleach/bleach/sanitizer.pyi

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
from collections.abc import Callable, Container, Iterable
1+
from collections.abc import Callable, Container, Iterable, Iterator
22
from re import Pattern
33
from typing import Any
44
from typing_extensions import TypeAlias
55

66
from .css_sanitizer import CSSSanitizer
7-
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer, SanitizerFilter
7+
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer
8+
from .linkifier import _Token
9+
from html5lib.filters.sanitizer import Filter as SanitizerFilter
10+
from html5lib.filters.base import Filter
811

912
ALLOWED_TAGS: list[str]
1013
ALLOWED_ATTRIBUTES: dict[str, list[str]]
@@ -14,16 +17,13 @@ INVISIBLE_CHARACTERS: str
1417
INVISIBLE_CHARACTERS_RE: Pattern[str]
1518
INVISIBLE_REPLACEMENT_CHAR: str
1619

17-
# A html5lib Filter class
18-
_Filter: TypeAlias = Any
19-
2020
class Cleaner:
2121
tags: Container[str]
2222
attributes: _Attributes
2323
protocols: Container[str]
2424
strip: bool
2525
strip_comments: bool
26-
filters: Iterable[_Filter]
26+
filters: Iterable[Filter]
2727
css_sanitizer: CSSSanitizer | None
2828
parser: BleachHTMLParser
2929
walker: Any
@@ -35,7 +35,7 @@ class Cleaner:
3535
protocols: Container[str] = ...,
3636
strip: bool = ...,
3737
strip_comments: bool = ...,
38-
filters: Iterable[_Filter] | None = ...,
38+
filters: Iterable[Filter] | None = ...,
3939
css_sanitizer: CSSSanitizer | None = ...,
4040
) -> None: ...
4141
def clean(self, text: str) -> str: ...
@@ -61,12 +61,7 @@ class BleachSanitizerFilter(SanitizerFilter):
6161
css_sanitizer: CSSSanitizer | None = ...,
6262
**kwargs,
6363
) -> None: ...
64-
def sanitize_stream(self, token_iterator): ...
65-
def merge_characters(self, token_iterator): ...
66-
def __iter__(self): ...
67-
def sanitize_token(self, token): ...
68-
def sanitize_characters(self, token): ...
69-
def sanitize_uri_value(self, value, allowed_protocols): ...
70-
def allow_token(self, token): ...
71-
def disallowed_token(self, token): ...
72-
def sanitize_css(self, style): ...
64+
def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
65+
def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
66+
def sanitize_characters(self, token: _Token) -> _Token | None: ...
67+
def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ...

stubs/html5lib/html5lib/_inputstream.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ def HTMLInputStream(source, **kwargs): ...
2424
class HTMLUnicodeInputStream:
2525
reportCharacterErrors: Any
2626
newLines: Any
27-
charEncoding: Any
27+
charEncoding: tuple[str, str]
2828
dataStream: Any
2929
def __init__(self, source) -> None: ...
3030
chunk: str
3131
chunkSize: int
3232
chunkOffset: int
33-
errors: Any
33+
errors: list[str]
3434
prevNumLines: int
3535
prevNumCols: int
3636
def reset(self) -> None: ...

0 commit comments

Comments
 (0)