@@ -80,3 +80,65 @@ def test_serializer(data, expected):
8080 serialized = serializer .render (walker (dom ))
8181
8282 assert serialized == expected
83+
84+
85+ @pytest .mark .parametrize ('parser_args, data, expected' , [
86+ # Make sure InputStreamWithMemory has charEncoding and changeEncoding
87+ (
88+ {},
89+ '<meta charset="utf-8">' ,
90+ '<meta charset="utf-8">'
91+ ),
92+ # Handle consume entities False--all entities are passed along and then
93+ # escaped when serialized
94+ (
95+ {'consume_entities' : False },
96+ 'text &>"' ,
97+ 'text &amp;&gt;&quot;'
98+ ),
99+ # Handle consume entities True--all entities are consumed and converted
100+ # to their character equivalents and then &, <, and > are escaped when
101+ # serialized
102+ (
103+ {'consume_entities' : True },
104+ 'text &>"' ,
105+ 'text &>"'
106+ ),
107+ # Test that "invalid-character-in-attribute-name" errors in tokenizing
108+ # result in attributes with invalid names getting dropped
109+ (
110+ {},
111+ '<a href="http://example.com"">' ,
112+ '<a href="http://example.com"></a>'
113+ ),
114+ (
115+ {},
116+ '<a href=\' http://example.com\' \' >' ,
117+ '<a href="http://example.com"></a>'
118+ )
119+ ])
120+ def test_bleach_html_parser (parser_args , data , expected ):
121+ args = {
122+ 'tags' : None ,
123+ 'strip' : True ,
124+ 'consume_entities' : True
125+ }
126+ args .update (parser_args )
127+
128+ # Build a parser, walker, and serializer just like we do in clean()
129+ parser = html5lib_shim .BleachHTMLParser (** args )
130+ walker = html5lib_shim .getTreeWalker ('etree' )
131+ serializer = html5lib_shim .BleachHTMLSerializer (
132+ quote_attr_values = 'always' ,
133+ omit_optional_tags = False ,
134+ escape_lt_in_attrs = True ,
135+ resolve_entities = False ,
136+ sanitize = False ,
137+ alphabetical_attributes = False ,
138+ )
139+
140+ # Parse, walk, and then serialize the output
141+ dom = parser .parseFragment (data )
142+ serialized = serializer .render (walker (dom ))
143+
144+ assert serialized == expected
0 commit comments