@@ -10,35 +10,9 @@ import {isDevMode} from '@angular/core';
1010
1111import { DomAdapter , getDOM } from '../dom/dom_adapter' ;
1212
13+ import { InertBodyHelper } from './inert_body' ;
1314import { sanitizeSrcset , sanitizeUrl } from './url_sanitizer' ;
1415
15- /** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */
16- let inertElement : HTMLElement | null = null ;
17- /** Lazily initialized to make sure the DOM adapter gets set before use. */
18- let DOM : DomAdapter = null ! ;
19-
20- /** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */
21- function getInertElement ( ) {
22- if ( inertElement ) return inertElement ;
23- DOM = getDOM ( ) ;
24-
25- // Prefer using <template> element if supported.
26- const templateEl = DOM . createElement ( 'template' ) ;
27- if ( 'content' in templateEl ) return templateEl ;
28-
29- const doc = DOM . createHtmlDocument ( ) ;
30- inertElement = DOM . querySelector ( doc , 'body' ) ;
31- if ( inertElement == null ) {
32- // usually there should be only one body element in the document, but IE doesn't have any, so we
33- // need to create one.
34- const html = DOM . createElement ( 'html' , doc ) ;
35- inertElement = DOM . createElement ( 'body' , doc ) ;
36- DOM . appendChild ( html , inertElement ) ;
37- DOM . appendChild ( doc , html ) ;
38- }
39- return inertElement ;
40- }
41-
4216function tagSet ( tags : string ) : { [ k : string ] : boolean } {
4317 const res : { [ k : string ] : boolean } = { } ;
4418 for ( const t of tags . split ( ',' ) ) res [ t ] = true ;
@@ -121,53 +95,54 @@ class SanitizingHtmlSerializer {
12195 // because characters were re-encoded.
12296 public sanitizedSomething = false ;
12397 private buf : string [ ] = [ ] ;
98+ private DOM = getDOM ( ) ;
12499
125100 sanitizeChildren ( el : Element ) : string {
126101 // This cannot use a TreeWalker, as it has to run on Angular's various DOM adapters.
127102 // However this code never accesses properties off of `document` before deleting its contents
128103 // again, so it shouldn't be vulnerable to DOM clobbering.
129- let current : Node = el . firstChild ! ;
104+ let current : Node = this . DOM . firstChild ( el ) ! ;
130105 while ( current ) {
131- if ( DOM . isElementNode ( current ) ) {
106+ if ( this . DOM . isElementNode ( current ) ) {
132107 this . startElement ( current as Element ) ;
133- } else if ( DOM . isTextNode ( current ) ) {
134- this . chars ( DOM . nodeValue ( current ) ! ) ;
108+ } else if ( this . DOM . isTextNode ( current ) ) {
109+ this . chars ( this . DOM . nodeValue ( current ) ! ) ;
135110 } else {
136111 // Strip non-element, non-text nodes.
137112 this . sanitizedSomething = true ;
138113 }
139- if ( DOM . firstChild ( current ) ) {
140- current = DOM . firstChild ( current ) ! ;
114+ if ( this . DOM . firstChild ( current ) ) {
115+ current = this . DOM . firstChild ( current ) ! ;
141116 continue ;
142117 }
143118 while ( current ) {
144119 // Leaving the element. Walk up and to the right, closing tags as we go.
145- if ( DOM . isElementNode ( current ) ) {
120+ if ( this . DOM . isElementNode ( current ) ) {
146121 this . endElement ( current as Element ) ;
147122 }
148123
149- let next = checkClobberedElement ( current , DOM . nextSibling ( current ) ! ) ;
124+ let next = this . checkClobberedElement ( current , this . DOM . nextSibling ( current ) ! ) ;
150125
151126 if ( next ) {
152127 current = next ;
153128 break ;
154129 }
155130
156- current = checkClobberedElement ( current , DOM . parentElement ( current ) ! ) ;
131+ current = this . checkClobberedElement ( current , this . DOM . parentElement ( current ) ! ) ;
157132 }
158133 }
159134 return this . buf . join ( '' ) ;
160135 }
161136
162137 private startElement ( element : Element ) {
163- const tagName = DOM . nodeName ( element ) . toLowerCase ( ) ;
138+ const tagName = this . DOM . nodeName ( element ) . toLowerCase ( ) ;
164139 if ( ! VALID_ELEMENTS . hasOwnProperty ( tagName ) ) {
165140 this . sanitizedSomething = true ;
166141 return ;
167142 }
168143 this . buf . push ( '<' ) ;
169144 this . buf . push ( tagName ) ;
170- DOM . attributeMap ( element ) . forEach ( ( value : string , attrName : string ) => {
145+ this . DOM . attributeMap ( element ) . forEach ( ( value : string , attrName : string ) => {
171146 const lower = attrName . toLowerCase ( ) ;
172147 if ( ! VALID_ATTRS . hasOwnProperty ( lower ) ) {
173148 this . sanitizedSomething = true ;
@@ -186,7 +161,7 @@ class SanitizingHtmlSerializer {
186161 }
187162
188163 private endElement ( current : Element ) {
189- const tagName = DOM . nodeName ( current ) . toLowerCase ( ) ;
164+ const tagName = this . DOM . nodeName ( current ) . toLowerCase ( ) ;
190165 if ( VALID_ELEMENTS . hasOwnProperty ( tagName ) && ! VOID_ELEMENTS . hasOwnProperty ( tagName ) ) {
191166 this . buf . push ( '</' ) ;
192167 this . buf . push ( tagName ) ;
@@ -195,14 +170,14 @@ class SanitizingHtmlSerializer {
195170 }
196171
197172 private chars ( chars : string ) { this . buf . push ( encodeEntities ( chars ) ) ; }
198- }
199173
200- function checkClobberedElement ( node : Node , nextNode : Node ) : Node {
201- if ( nextNode && DOM . contains ( node , nextNode ) ) {
202- throw new Error (
203- `Failed to sanitize html because the element is clobbered: ${ DOM . getOuterHTML ( node ) } ` ) ;
174+ checkClobberedElement ( node : Node , nextNode : Node ) : Node {
175+ if ( nextNode && this . DOM . contains ( node , nextNode ) ) {
176+ throw new Error (
177+ `Failed to sanitize html because the element is clobbered: ${ this . DOM . getOuterHTML ( node ) } ` ) ;
178+ }
179+ return nextNode ;
204180 }
205- return nextNode ;
206181}
207182
208183// Regular Expressions for parsing tags and attributes
@@ -232,33 +207,20 @@ function encodeEntities(value: string) {
232207 . replace ( / > / g, '>' ) ;
233208}
234209
235- /**
236- * When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
237- * attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').
238- *
239- * This is undesirable since we don't want to allow any of these custom attributes. This method
240- * strips them all.
241- */
242- function stripCustomNsAttrs ( el : Element ) {
243- DOM . attributeMap ( el ) . forEach ( ( _ , attrName ) => {
244- if ( attrName === 'xmlns:ns1' || attrName . indexOf ( 'ns1:' ) === 0 ) {
245- DOM . removeAttribute ( el , attrName ) ;
246- }
247- } ) ;
248- for ( const n of DOM . childNodesAsList ( el ) ) {
249- if ( DOM . isElementNode ( n ) ) stripCustomNsAttrs ( n as Element ) ;
250- }
251- }
210+ let inertBodyHelper : InertBodyHelper ;
252211
253212/**
254213 * Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
255214 * the DOM in a browser environment.
256215 */
257216export function sanitizeHtml ( defaultDoc : any , unsafeHtmlInput : string ) : string {
217+ const DOM = getDOM ( ) ;
218+ let inertBodyElement : HTMLElement | null = null ;
258219 try {
259- const containerEl = getInertElement ( ) ;
220+ inertBodyHelper = inertBodyHelper || new InertBodyHelper ( defaultDoc , DOM ) ;
260221 // Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
261222 let unsafeHtml = unsafeHtmlInput ? String ( unsafeHtmlInput ) : '' ;
223+ inertBodyElement = inertBodyHelper . getInertBodyElement ( unsafeHtml ) ;
262224
263225 // mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
264226 // trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
@@ -272,31 +234,25 @@ export function sanitizeHtml(defaultDoc: any, unsafeHtmlInput: string): string {
272234 mXSSAttempts -- ;
273235
274236 unsafeHtml = parsedHtml ;
275- DOM . setInnerHTML ( containerEl , unsafeHtml ) ;
276- if ( defaultDoc . documentMode ) {
277- // strip custom-namespaced attributes on IE<=11
278- stripCustomNsAttrs ( containerEl ) ;
279- }
280- parsedHtml = DOM . getInnerHTML ( containerEl ) ;
237+ parsedHtml = DOM . getInnerHTML ( inertBodyElement ) ;
238+ inertBodyElement = inertBodyHelper . getInertBodyElement ( unsafeHtml ) ;
281239 } while ( unsafeHtml !== parsedHtml ) ;
282240
283241 const sanitizer = new SanitizingHtmlSerializer ( ) ;
284- const safeHtml = sanitizer . sanitizeChildren ( DOM . getTemplateContent ( containerEl ) || containerEl ) ;
285-
286- // Clear out the body element.
287- const parent = DOM . getTemplateContent ( containerEl ) || containerEl ;
288- for ( const child of DOM . childNodesAsList ( parent ) ) {
289- DOM . removeChild ( parent , child ) ;
290- }
291-
242+ const safeHtml =
243+ sanitizer . sanitizeChildren ( DOM . getTemplateContent ( inertBodyElement ) || inertBodyElement ) ;
292244 if ( isDevMode ( ) && sanitizer . sanitizedSomething ) {
293245 DOM . log ( 'WARNING: sanitizing HTML stripped some content (see http://g.co/ng/security#xss).' ) ;
294246 }
295247
296248 return safeHtml ;
297- } catch ( e ) {
249+ } finally {
298250 // In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
299- inertElement = null ;
300- throw e ;
251+ if ( inertBodyElement ) {
252+ const parent = DOM . getTemplateContent ( inertBodyElement ) || inertBodyElement ;
253+ for ( const child of DOM . childNodesAsList ( parent ) ) {
254+ DOM . removeChild ( parent , child ) ;
255+ }
256+ }
301257 }
302258}
0 commit comments