From eea03801e05ec5de4e66da1d0438c07d5a77d568 Mon Sep 17 00:00:00 2001 From: Andy Hanson Date: Wed, 7 Sep 2016 06:34:28 -0700 Subject: [PATCH 1/2] When emitting react code, replace HTML numeric entities with their encoded characters --- src/compiler/transformers/jsx.ts | 16 +++++++++++----- .../baselines/reference/tsxReactEmitEntities.js | 2 ++ .../reference/tsxReactEmitEntities.symbols | 4 ++++ .../reference/tsxReactEmitEntities.types | 5 +++++ .../conformance/jsx/tsxReactEmitEntities.tsx | 1 + 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/compiler/transformers/jsx.ts b/src/compiler/transformers/jsx.ts index 4bc523a262f94..8982c0faf2478 100644 --- a/src/compiler/transformers/jsx.ts +++ b/src/compiler/transformers/jsx.ts @@ -210,15 +210,21 @@ namespace ts { } /** - * Decodes JSX entities. + * Replace entities like " ", "{", and "�" with the characters they encode. + * See https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references */ function decodeEntities(text: string) { - return text.replace(/&(\w+);/g, function(s: any, m: string) { - if (entities[m] !== undefined) { - return String.fromCharCode(entities[m]); + return text.replace(/&((#((\d+)|x([\da-fA-F]+)))|(\w+));/g, (match, _all, _number, _digits, decimal, hex, word) => { + if (decimal) { + return String.fromCharCode(parseInt(decimal, 10)); + } + else if (hex) { + return String.fromCharCode(parseInt(hex, 16)); } else { - return s; + const ch = entities[word]; + // If this is not a valid entity, then just use `match` (replace it with itself, i.e. don't replace) + return ch ? String.fromCharCode(ch) : match; } }); } diff --git a/tests/baselines/reference/tsxReactEmitEntities.js b/tests/baselines/reference/tsxReactEmitEntities.js index 2a5c1ec496388..8fbf59a376211 100644 --- a/tests/baselines/reference/tsxReactEmitEntities.js +++ b/tests/baselines/reference/tsxReactEmitEntities.js @@ -9,8 +9,10 @@ declare var React: any;
Dot goes here: · ¬AnEntity;
;
Be careful of "-ed strings!
; +
{{braces}}
; //// [file.js] React.createElement("div", null, "Dot goes here: \u00B7 ¬AnEntity; "); React.createElement("div", null, "Be careful of \"-ed strings!"); +React.createElement("div", null, "{{braces}}"); diff --git a/tests/baselines/reference/tsxReactEmitEntities.symbols b/tests/baselines/reference/tsxReactEmitEntities.symbols index 5a0274029e4c2..873830773b6b0 100644 --- a/tests/baselines/reference/tsxReactEmitEntities.symbols +++ b/tests/baselines/reference/tsxReactEmitEntities.symbols @@ -23,3 +23,7 @@ declare var React: any; >div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) >div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) +
{{braces}}
; +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) + diff --git a/tests/baselines/reference/tsxReactEmitEntities.types b/tests/baselines/reference/tsxReactEmitEntities.types index 111653ea140c5..7fe68df5f47fd 100644 --- a/tests/baselines/reference/tsxReactEmitEntities.types +++ b/tests/baselines/reference/tsxReactEmitEntities.types @@ -25,3 +25,8 @@ declare var React: any; >div : any >div : any +
{{braces}}
; +>
{{braces}}
: JSX.Element +>div : any +>div : any + diff --git a/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx b/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx index 4726008d6bea8..8af5bf206e25d 100644 --- a/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx +++ b/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx @@ -10,3 +10,4 @@ declare var React: any;
Dot goes here: · ¬AnEntity;
;
Be careful of "-ed strings!
; +
{{braces}}
; From a8eb4a20dff9107752f0bce2504c2bfc0a2f6af3 Mon Sep 17 00:00:00 2001 From: Andy Hanson Date: Thu, 8 Sep 2016 07:49:29 -0700 Subject: [PATCH 2/2] Also decode entities when emitting attributes. Also, lexer should not process string escapes in jsx attributes. --- src/compiler/emitter.ts | 2 +- src/compiler/parser.ts | 8 ++++-- src/compiler/scanner.ts | 20 +++++++++++-- src/compiler/transformers/jsx.ts | 11 ++++++-- .../reference/tsxReactEmitEntities.js | 17 +++++++++++ .../reference/tsxReactEmitEntities.symbols | 23 +++++++++++++++ .../reference/tsxReactEmitEntities.types | 28 +++++++++++++++++++ .../conformance/jsx/tsxReactEmitEntities.tsx | 9 ++++++ 8 files changed, 111 insertions(+), 7 deletions(-) diff --git a/src/compiler/emitter.ts b/src/compiler/emitter.ts index 680c446c0d53e..0f96b04df9ab4 100644 --- a/src/compiler/emitter.ts +++ b/src/compiler/emitter.ts @@ -2030,7 +2030,7 @@ const _super = (function (geti, seti) { emitTrailingCommentsOfPosition(commentRange.pos); } - emitExpression(node.initializer); + emitExpression(initializer); } function emitShorthandPropertyAssignment(node: ShorthandPropertyAssignment) { diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts index ac43e0fb63c03..e14a60eb10ca3 100644 --- a/src/compiler/parser.ts +++ b/src/compiler/parser.ts @@ -905,6 +905,10 @@ namespace ts { return currentToken = scanner.scanJsxToken(); } + function scanJsxAttributeValue(): SyntaxKind { + return currentToken = scanner.scanJsxAttributeValue(); + } + function speculationHelper(callback: () => T, isLookAhead: boolean): T { // Keep track of the state we'll need to rollback to if lookahead fails (or if the // caller asked us to always reset our state). @@ -3831,8 +3835,8 @@ namespace ts { scanJsxIdentifier(); const node = createNode(SyntaxKind.JsxAttribute); node.name = parseIdentifierName(); - if (parseOptional(SyntaxKind.EqualsToken)) { - switch (token()) { + if (token() === SyntaxKind.EqualsToken) { + switch (scanJsxAttributeValue()) { case SyntaxKind.StringLiteral: node.initializer = parseLiteralNode(); break; diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 2d07c2998e0d8..bfc9591baa7ea 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -27,6 +27,7 @@ namespace ts { reScanSlashToken(): SyntaxKind; reScanTemplateToken(): SyntaxKind; scanJsxIdentifier(): SyntaxKind; + scanJsxAttributeValue(): SyntaxKind; reScanJsxToken(): SyntaxKind; scanJsxToken(): SyntaxKind; scanJSDocToken(): SyntaxKind; @@ -817,6 +818,7 @@ namespace ts { reScanSlashToken, reScanTemplateToken, scanJsxIdentifier, + scanJsxAttributeValue, reScanJsxToken, scanJsxToken, scanJSDocToken, @@ -911,7 +913,7 @@ namespace ts { return value; } - function scanString(): string { + function scanString(allowEscapes = true): string { const quote = text.charCodeAt(pos); pos++; let result = ""; @@ -929,7 +931,7 @@ namespace ts { pos++; break; } - if (ch === CharacterCodes.backslash) { + if (ch === CharacterCodes.backslash && allowEscapes) { result += text.substring(start, pos); result += scanEscapeSequence(); start = pos; @@ -1737,6 +1739,20 @@ namespace ts { return token; } + function scanJsxAttributeValue(): SyntaxKind { + startPos = pos; + + switch (text.charCodeAt(pos)) { + case CharacterCodes.doubleQuote: + case CharacterCodes.singleQuote: + tokenValue = scanString(/*allowEscapes*/ false); + return token = SyntaxKind.StringLiteral; + default: + // If this scans anything other than `{`, it's a parse error. + return scan(); + } + } + function scanJSDocToken(): SyntaxKind { if (pos >= end) { return token = SyntaxKind.EndOfFileToken; diff --git a/src/compiler/transformers/jsx.ts b/src/compiler/transformers/jsx.ts index 8982c0faf2478..9e6aa507cce5a 100644 --- a/src/compiler/transformers/jsx.ts +++ b/src/compiler/transformers/jsx.ts @@ -140,7 +140,8 @@ namespace ts { return createLiteral(true); } else if (node.kind === SyntaxKind.StringLiteral) { - return node; + const decoded = tryDecodeEntities((node).text); + return decoded ? createLiteral(decoded, /*location*/ node) : node; } else if (node.kind === SyntaxKind.JsxExpression) { return visitJsxExpression(node); @@ -213,7 +214,7 @@ namespace ts { * Replace entities like " ", "{", and "�" with the characters they encode. * See https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references */ - function decodeEntities(text: string) { + function decodeEntities(text: string): string { return text.replace(/&((#((\d+)|x([\da-fA-F]+)))|(\w+));/g, (match, _all, _number, _digits, decimal, hex, word) => { if (decimal) { return String.fromCharCode(parseInt(decimal, 10)); @@ -229,6 +230,12 @@ namespace ts { }); } + /** Like `decodeEntities` but returns `undefined` if there were no entities to decode. */ + function tryDecodeEntities(text: string): string | undefined { + const decoded = decodeEntities(text); + return decoded === text ? undefined : decoded; + } + function getTagName(node: JsxElement | JsxOpeningLikeElement): Expression { if (node.kind === SyntaxKind.JsxElement) { return getTagName((node).openingElement); diff --git a/tests/baselines/reference/tsxReactEmitEntities.js b/tests/baselines/reference/tsxReactEmitEntities.js index 8fbf59a376211..41c294b8baba8 100644 --- a/tests/baselines/reference/tsxReactEmitEntities.js +++ b/tests/baselines/reference/tsxReactEmitEntities.js @@ -10,9 +10,26 @@ declare var React: any;
Dot goes here: · ¬AnEntity;
;
Be careful of "-ed strings!
;
{{braces}}
; +// Escapes do nothing +
\n
; + +// Also works in string literal attributes +
; +// Does not happen for a string literal that happens to be inside an attribute (and escapes then work) +
; +// Preserves single quotes +
//// [file.js] React.createElement("div", null, "Dot goes here: \u00B7 ¬AnEntity; "); React.createElement("div", null, "Be careful of \"-ed strings!"); React.createElement("div", null, "{{braces}}"); +// Escapes do nothing +React.createElement("div", null, "\\n"); +// Also works in string literal attributes +React.createElement("div", { attr: "{\u2026}\\" }); +// Does not happen for a string literal that happens to be inside an attribute (and escapes then work) +React.createElement("div", { attr: "{…}\"" }); +// Preserves single quotes +React.createElement("div", { attr: '"' }); diff --git a/tests/baselines/reference/tsxReactEmitEntities.symbols b/tests/baselines/reference/tsxReactEmitEntities.symbols index 873830773b6b0..470c6177842db 100644 --- a/tests/baselines/reference/tsxReactEmitEntities.symbols +++ b/tests/baselines/reference/tsxReactEmitEntities.symbols @@ -27,3 +27,26 @@ declare var React: any; >div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) >div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) +// Escapes do nothing +
\n
; +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) + +// Also works in string literal attributes +
; +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) +>attr : Symbol(unknown) +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) + +// Does not happen for a string literal that happens to be inside an attribute (and escapes then work) +
; +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) +>attr : Symbol(unknown) +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) + +// Preserves single quotes +
+>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) +>attr : Symbol(unknown) +>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22)) + diff --git a/tests/baselines/reference/tsxReactEmitEntities.types b/tests/baselines/reference/tsxReactEmitEntities.types index 7fe68df5f47fd..3127e97afc3ad 100644 --- a/tests/baselines/reference/tsxReactEmitEntities.types +++ b/tests/baselines/reference/tsxReactEmitEntities.types @@ -30,3 +30,31 @@ declare var React: any; >div : any >div : any +// Escapes do nothing +
\n
; +>
\n
: JSX.Element +>div : any +>div : any + +// Also works in string literal attributes +
; +>
: JSX.Element +>div : any +>attr : any +>div : any + +// Does not happen for a string literal that happens to be inside an attribute (and escapes then work) +
; +>
: JSX.Element +>div : any +>attr : any +>"{…}\"" : string +>div : any + +// Preserves single quotes +
+>
: JSX.Element +>div : any +>attr : any +>div : any + diff --git a/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx b/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx index 8af5bf206e25d..e4b83ebb34b6e 100644 --- a/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx +++ b/tests/cases/conformance/jsx/tsxReactEmitEntities.tsx @@ -11,3 +11,12 @@ declare var React: any;
Dot goes here: · ¬AnEntity;
;
Be careful of "-ed strings!
;
{{braces}}
; +// Escapes do nothing +
\n
; + +// Also works in string literal attributes +
; +// Does not happen for a string literal that happens to be inside an attribute (and escapes then work) +
; +// Preserves single quotes +