Skip to content

Commit 78b7dc6

Browse files
html5lib: Add various types (#11429)
I started out investigating comments in #11411 and ended up adding a few other types that were reasonably obvious from the source code. For reference: https://github.com/html5lib/html5lib-python/tree/master/html5lib
1 parent 601587e commit 78b7dc6

File tree

3 files changed

+74
-54
lines changed

3 files changed

+74
-54
lines changed

stubs/html5lib/html5lib/_inputstream.pyi

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
from _typeshed import Incomplete
2-
from typing import Any
1+
from _typeshed import Incomplete, SupportsRead
2+
from typing import Any, overload
3+
from typing_extensions import TypeAlias
4+
5+
_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
6+
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
7+
_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files
38

49
spaceCharactersBytes: Any
510
asciiLettersBytes: Any
@@ -20,14 +25,26 @@ class BufferedStream:
2025
def seek(self, pos) -> None: ...
2126
def read(self, bytes): ...
2227

23-
def HTMLInputStream(source, **kwargs): ...
28+
@overload
29+
def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ...
30+
@overload
31+
def HTMLInputStream(
32+
source: _BinaryInputStream,
33+
*,
34+
override_encoding: str | bytes | None = None,
35+
transport_encoding: str | bytes | None = None,
36+
same_origin_parent_encoding: str | bytes | None = None,
37+
likely_encoding: str | bytes | None = None,
38+
default_encoding: str = "windows-1252",
39+
useChardet: bool = True,
40+
) -> HTMLBinaryInputStream: ...
2441

2542
class HTMLUnicodeInputStream:
2643
reportCharacterErrors: Any
2744
newLines: Any
2845
charEncoding: Any
2946
dataStream: Any
30-
def __init__(self, source) -> None: ...
47+
def __init__(self, source: _UnicodeInputStream) -> None: ...
3148
chunk: str
3249
chunkSize: int
3350
chunkOffset: int
@@ -56,11 +73,11 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
5673
charEncoding: Any
5774
def __init__(
5875
self,
59-
source,
60-
override_encoding: Incomplete | None = None,
61-
transport_encoding: Incomplete | None = None,
62-
same_origin_parent_encoding: Incomplete | None = None,
63-
likely_encoding: Incomplete | None = None,
76+
source: _BinaryInputStream,
77+
override_encoding: str | bytes | None = None,
78+
transport_encoding: str | bytes | None = None,
79+
same_origin_parent_encoding: str | bytes | None = None,
80+
likely_encoding: str | bytes | None = None,
6481
default_encoding: str = "windows-1252",
6582
useChardet: bool = True,
6683
) -> None: ...
@@ -108,4 +125,4 @@ class ContentAttrParser:
108125
def __init__(self, data) -> None: ...
109126
def parse(self): ...
110127

111-
def lookupEncoding(encoding): ...
128+
def lookupEncoding(encoding: str | bytes | None) -> str | None: ...

stubs/html5lib/html5lib/_tokenizer.pyi

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from _typeshed import Incomplete
22
from typing import Any
33

4+
from ._inputstream import _InputStream
5+
46
entitiesTrie: Any
57
attributeMap = dict
68

@@ -12,7 +14,7 @@ class HTMLTokenizer:
1214
state: Any
1315
escape: bool
1416
currentToken: Any
15-
def __init__(self, stream, parser: Incomplete | None = None, **kwargs) -> None: ...
17+
def __init__(self, stream: _InputStream, parser: Incomplete | None = None, **kwargs) -> None: ...
1618
tokenQueue: Any
1719
def __iter__(self): ...
1820
def consumeNumberEntity(self, isHex): ...
@@ -36,23 +38,23 @@ class HTMLTokenizer:
3638
def rawtextLessThanSignState(self): ...
3739
def rawtextEndTagOpenState(self): ...
3840
def rawtextEndTagNameState(self): ...
39-
def scriptDataLessThanSignState(self): ...
40-
def scriptDataEndTagOpenState(self): ...
41-
def scriptDataEndTagNameState(self): ...
42-
def scriptDataEscapeStartState(self): ...
43-
def scriptDataEscapeStartDashState(self): ...
44-
def scriptDataEscapedState(self): ...
45-
def scriptDataEscapedDashState(self): ...
46-
def scriptDataEscapedDashDashState(self): ...
47-
def scriptDataEscapedLessThanSignState(self): ...
48-
def scriptDataEscapedEndTagOpenState(self): ...
49-
def scriptDataEscapedEndTagNameState(self): ...
50-
def scriptDataDoubleEscapeStartState(self): ...
51-
def scriptDataDoubleEscapedState(self): ...
52-
def scriptDataDoubleEscapedDashState(self): ...
53-
def scriptDataDoubleEscapedDashDashState(self): ...
54-
def scriptDataDoubleEscapedLessThanSignState(self): ...
55-
def scriptDataDoubleEscapeEndState(self): ...
41+
def scriptDataLessThanSignState(self) -> bool: ...
42+
def scriptDataEndTagOpenState(self) -> bool: ...
43+
def scriptDataEndTagNameState(self) -> bool: ...
44+
def scriptDataEscapeStartState(self) -> bool: ...
45+
def scriptDataEscapeStartDashState(self) -> bool: ...
46+
def scriptDataEscapedState(self) -> bool: ...
47+
def scriptDataEscapedDashState(self) -> bool: ...
48+
def scriptDataEscapedDashDashState(self) -> bool: ...
49+
def scriptDataEscapedLessThanSignState(self) -> bool: ...
50+
def scriptDataEscapedEndTagOpenState(self) -> bool: ...
51+
def scriptDataEscapedEndTagNameState(self) -> bool: ...
52+
def scriptDataDoubleEscapeStartState(self) -> bool: ...
53+
def scriptDataDoubleEscapedState(self) -> bool: ...
54+
def scriptDataDoubleEscapedDashState(self) -> bool: ...
55+
def scriptDataDoubleEscapedDashDashState(self) -> bool: ...
56+
def scriptDataDoubleEscapedLessThanSignState(self) -> bool: ...
57+
def scriptDataDoubleEscapeEndState(self) -> bool: ...
5658
def beforeAttributeNameState(self): ...
5759
def attributeNameState(self): ...
5860
def afterAttributeNameState(self): ...
@@ -64,17 +66,17 @@ class HTMLTokenizer:
6466
def selfClosingStartTagState(self): ...
6567
def bogusCommentState(self): ...
6668
def markupDeclarationOpenState(self): ...
67-
def commentStartState(self): ...
68-
def commentStartDashState(self): ...
69-
def commentState(self): ...
70-
def commentEndDashState(self): ...
71-
def commentEndState(self): ...
72-
def commentEndBangState(self): ...
73-
def doctypeState(self): ...
74-
def beforeDoctypeNameState(self): ...
75-
def doctypeNameState(self): ...
76-
def afterDoctypeNameState(self): ...
77-
def afterDoctypePublicKeywordState(self): ...
69+
def commentStartState(self) -> bool: ...
70+
def commentStartDashState(self) -> bool: ...
71+
def commentState(self) -> bool: ...
72+
def commentEndDashState(self) -> bool: ...
73+
def commentEndState(self) -> bool: ...
74+
def commentEndBangState(self) -> bool: ...
75+
def doctypeState(self) -> bool: ...
76+
def beforeDoctypeNameState(self) -> bool: ...
77+
def doctypeNameState(self) -> bool: ...
78+
def afterDoctypeNameState(self) -> bool: ...
79+
def afterDoctypePublicKeywordState(self) -> bool: ...
7880
def beforeDoctypePublicIdentifierState(self): ...
7981
def doctypePublicIdentifierDoubleQuotedState(self): ...
8082
def doctypePublicIdentifierSingleQuotedState(self): ...

stubs/html5lib/html5lib/html5parser.pyi

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,47 @@
1-
from _typeshed import Incomplete, SupportsRead
1+
from _typeshed import Incomplete
22
from typing import Any, Literal, overload
33
from xml.etree.ElementTree import Element
44

5+
from ._inputstream import _InputStream
6+
from ._tokenizer import HTMLTokenizer
7+
58
@overload
69
def parse(
7-
doc: str | bytes | SupportsRead[str] | SupportsRead[bytes],
8-
treebuilder: Literal["etree"] = "etree",
9-
namespaceHTMLElements: bool = True,
10-
**kwargs,
10+
doc: _InputStream, treebuilder: Literal["etree"] = "etree", namespaceHTMLElements: bool = True, **kwargs
1111
) -> Element: ...
1212
@overload
13-
def parse(
14-
doc: str | bytes | SupportsRead[str] | SupportsRead[bytes], treebuilder: str, namespaceHTMLElements: bool = True, **kwargs
13+
def parse(doc: _InputStream, treebuilder: str, namespaceHTMLElements: bool = True, **kwargs): ...
14+
def parseFragment(
15+
doc: _InputStream, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs
1516
): ...
16-
def parseFragment(doc, container: str = "div", treebuilder: str = "etree", namespaceHTMLElements: bool = True, **kwargs): ...
1717
def method_decorator_metaclass(function): ...
1818

1919
class HTMLParser:
20-
strict: Any
20+
strict: bool
2121
tree: Any
22-
errors: Any
22+
errors: list[Incomplete]
2323
phases: Any
2424
def __init__(
2525
self, tree: Incomplete | None = None, strict: bool = False, namespaceHTMLElements: bool = True, debug: bool = False
2626
) -> None: ...
2727
firstStartTag: bool
2828
log: Any
2929
compatMode: str
30+
container: str
3031
innerHTML: Any
3132
phase: Any
3233
lastPhase: Any
3334
beforeRCDataPhase: Any
3435
framesetOK: bool
35-
tokenizer: Any
36+
tokenizer: HTMLTokenizer
3637
def reset(self) -> None: ...
3738
@property
3839
def documentEncoding(self) -> str | None: ...
39-
def isHTMLIntegrationPoint(self, element) -> bool: ...
40-
def isMathMLTextIntegrationPoint(self, element) -> bool: ...
40+
def isHTMLIntegrationPoint(self, element: Element) -> bool: ...
41+
def isMathMLTextIntegrationPoint(self, element: Element) -> bool: ...
4142
def mainLoop(self) -> None: ...
42-
def parse(self, stream, scripting: bool = ..., **kwargs): ...
43-
def parseFragment(self, stream, *args, **kwargs): ...
43+
def parse(self, stream: _InputStream, scripting: bool = ..., **kwargs): ...
44+
def parseFragment(self, stream: _InputStream, *args, **kwargs): ...
4445
def parseError(self, errorcode: str = "XXX-undefined-error", datavars: Incomplete | None = None) -> None: ...
4546
def adjustMathMLAttributes(self, token) -> None: ...
4647
def adjustSVGAttributes(self, token) -> None: ...

0 commit comments

Comments
 (0)