1
- import { clamp , isInRange } from '../../utils' ;
1
+ import { clamp , isInRange , regexLastIndexOf } from '../../utils' ;
2
2
import { Position , Range } from 'vscode-languageserver' ;
3
- import parse5 , { Location } from 'parse5 ' ;
3
+ import { Node , getLanguageService } from 'vscode-html-languageservice ' ;
4
4
5
5
export interface TagInformation {
6
6
content : string ;
@@ -12,43 +12,44 @@ export interface TagInformation {
12
12
container : { start : number ; end : number } ;
13
13
}
14
14
15
- function parseAttributes ( attrlist : { name : string ; value : string } [ ] ) : Record < string , string > {
15
+ function parseAttributes (
16
+ rawAttrs : Record < string , string | null > | undefined ,
17
+ ) : Record < string , string > {
16
18
const attrs : Record < string , string > = { } ;
17
- attrlist . forEach ( ( attr ) => {
18
- attrs [ attr . name ] = attr . value === '' ? attr . name : attr . value ; // in order to support boolean attributes (see utils.test.ts)
19
+ if ( ! rawAttrs ) {
20
+ return attrs ;
21
+ }
22
+
23
+ Object . keys ( rawAttrs ) . forEach ( ( attrName ) => {
24
+ const attrValue = rawAttrs [ attrName ] ;
25
+ attrs [ attrName ] = attrValue === null ? attrName : removeOuterQuotes ( attrValue ) ;
19
26
} ) ;
20
27
return attrs ;
21
- }
22
28
23
- function isMatchingTag ( source : string , node : ParsedNode , tag : string ) : boolean {
24
- if ( node . nodeName !== tag ) {
25
- return false ;
29
+ function removeOuterQuotes ( attrValue : string ) {
30
+ if (
31
+ ( attrValue . startsWith ( '"' ) && attrValue . endsWith ( '"' ) ) ||
32
+ ( attrValue . startsWith ( "'" ) && attrValue . endsWith ( "'" ) )
33
+ ) {
34
+ return attrValue . slice ( 1 , attrValue . length - 1 ) ;
35
+ }
36
+ return attrValue ;
26
37
}
38
+ }
27
39
28
- // node name equals tag, but we still have to check for case sensitivity
29
- const orgStart = node . sourceCodeLocation ?. startTag . startOffset || 0 ;
30
- const orgEnd = node . sourceCodeLocation ?. startTag . endOffset || 0 ;
31
- const tagHtml = source . substring ( orgStart , orgEnd ) ;
32
- return tagHtml . startsWith ( `<${ tag } ` ) ;
40
+ const parser = getLanguageService ( ) ;
41
+ function parseHtml ( text : string ) {
42
+ // We can safely only set getText because only this is used for parsing
43
+ return parser . parseHTMLDocument ( < any > { getText : ( ) => text } ) ;
33
44
}
34
45
35
- // parse5's DefaultTreeNode type is insufficient; make our own type to make TS happy
36
- type ParsedNode = {
37
- nodeName : string ;
38
- tagName : string ;
39
- value ?: string ;
40
- attrs : { name : string ; value : string } [ ] ;
41
- childNodes : ParsedNode [ ] ;
42
- parentNode : ParsedNode ;
43
- sourceCodeLocation : Location & { startTag : Location ; endTag : Location } ;
44
- } ;
45
-
46
- const regexIf = new RegExp ( '{#if\\s(.*?)*}' , 'igms' ) ;
46
+ const regexIf = new RegExp ( '{#if\\s.*?}' , 'igms' ) ;
47
47
const regexIfEnd = new RegExp ( '{/if}' , 'igms' ) ;
48
- const regexEach = new RegExp ( '{#each\\s( .*?)* }' , 'igms' ) ;
48
+ const regexEach = new RegExp ( '{#each\\s.*?}' , 'igms' ) ;
49
49
const regexEachEnd = new RegExp ( '{/each}' , 'igms' ) ;
50
- const regexAwait = new RegExp ( '{#await\\s( .*?)* }' , 'igms' ) ;
50
+ const regexAwait = new RegExp ( '{#await\\s.*?}' , 'igms' ) ;
51
51
const regexAwaitEnd = new RegExp ( '{/await}' , 'igms' ) ;
52
+ const regexHtml = new RegExp ( '{@html\\s.*?' , 'igms' ) ;
52
53
53
54
/**
54
55
* Extracts a tag (style or script) from the given text
@@ -57,76 +58,72 @@ const regexAwaitEnd = new RegExp('{/await}', 'igms');
57
58
* @param source text content to extract tag from
58
59
* @param tag the tag to extract
59
60
*/
60
- function extractTags ( source : string , tag : 'script' | 'style' ) : TagInformation [ ] {
61
- const { childNodes } = parse5 . parseFragment ( source , {
62
- sourceCodeLocationInfo : true ,
63
- } ) as { childNodes : ParsedNode [ ] } ;
64
-
65
- const matchedNodes : ParsedNode [ ] = [ ] ;
66
- let currentSvelteDirective ;
67
- for ( const node of childNodes ) {
68
- /**
69
- * skip matching tags if we are inside a directive
70
- *
71
- * extractTag's goal is solely to identify the top level <script> or <style>.
72
- *
73
- * therefore only iterating through top level childNodes is a feature we want!
74
- *
75
- * however, we cannot do a naive childNodes.find() because context matters.
76
- * if we have a <script> tag inside an {#if}, we want to skip that until the {/if}.
77
- * if we have a <script> tag inside an {#each}, we want to skip that until the {/each}.
78
- * if we have a <script> tag inside an {#await}, we want to skip that until the {/await}.
79
- *
80
- * and so on. So we use a tiny inSvelteDirective 'state machine' to track this
81
- * and use regex to detect the svelte directives.
82
- * We might need to improve this regex in future.
83
- */
84
- if ( currentSvelteDirective ) {
85
- if ( node . value && node . nodeName === '#text' ) {
86
- if (
87
- ( currentSvelteDirective === 'if' && regexIfEnd . exec ( node . value ) ) ||
88
- ( currentSvelteDirective === 'each' && regexEachEnd . exec ( node . value ) ) ||
89
- ( currentSvelteDirective === 'await' && regexAwaitEnd . exec ( node . value ) )
90
- ) {
91
- currentSvelteDirective = undefined ;
92
- }
93
- }
94
- } else {
95
- if ( node . value && node . nodeName === '#text' ) {
96
- // potentially a svelte directive
97
- if ( regexIf . exec ( node . value ) ) currentSvelteDirective = 'if' ;
98
- else if ( regexEach . exec ( node . value ) ) currentSvelteDirective = 'each' ;
99
- else if ( regexAwait . exec ( node . value ) ) currentSvelteDirective = 'await' ;
100
- } else if ( isMatchingTag ( source , node , tag ) ) {
101
- matchedNodes . push ( node ) ;
102
- }
103
- }
61
+ function extractTags ( text : string , tag : 'script' | 'style' ) : TagInformation [ ] {
62
+ const rootNodes = parseHtml ( text ) . roots ;
63
+ const matchedNodes = rootNodes
64
+ . filter ( ( node ) => node . tag === tag )
65
+ . filter ( ( tag ) => {
66
+ return isNotInsideControlFlowTag ( tag ) && isNotInsideHtmlTag ( tag ) ;
67
+ } ) ;
68
+ return matchedNodes . map ( transformToTagInfo ) ;
69
+
70
+ /**
71
+ * For every match AFTER the tag do a search for `{/X`.
72
+ * If that is BEFORE `{#X`, we are inside a moustache tag.
73
+ */
74
+ function isNotInsideControlFlowTag ( tag : Node ) {
75
+ const nodes = rootNodes . slice ( rootNodes . indexOf ( tag ) ) ;
76
+ const rootContentAfterTag = nodes
77
+ . map ( ( node , idx ) => {
78
+ return text . substring ( node . end , nodes [ idx + 1 ] ?. start ) ;
79
+ } )
80
+ . join ( '' ) ;
81
+
82
+ return ! [
83
+ [ regexIf , regexIfEnd ] ,
84
+ [ regexEach , regexEachEnd ] ,
85
+ [ regexAwait , regexAwaitEnd ] ,
86
+ ] . some ( ( pair ) => {
87
+ pair [ 0 ] . lastIndex = 0 ;
88
+ pair [ 1 ] . lastIndex = 0 ;
89
+ const start = pair [ 0 ] . exec ( rootContentAfterTag ) ;
90
+ const end = pair [ 1 ] . exec ( rootContentAfterTag ) ;
91
+ return ( end ?. index ?? text . length ) < ( start ?. index ?? text . length ) ;
92
+ } ) ;
104
93
}
105
94
106
- return matchedNodes . map ( transformToTagInfo ) ;
95
+ /**
96
+ * For every match BEFORE the tag do a search for `{@html `.
97
+ * If that is BEFORE `}`, we are inside a moustache tag.
98
+ */
99
+ function isNotInsideHtmlTag ( tag : Node ) {
100
+ const nodes = rootNodes . slice ( 0 , rootNodes . indexOf ( tag ) ) ;
101
+ const rootContentBeforeTag = [ { start : 0 , end : 0 } , ...nodes ]
102
+ . map ( ( node , idx ) => {
103
+ return text . substring ( node . end , nodes [ idx ] ?. start ) ;
104
+ } )
105
+ . join ( '' ) ;
106
+
107
+ return ! (
108
+ regexLastIndexOf ( rootContentBeforeTag , regexHtml ) >
109
+ rootContentBeforeTag . lastIndexOf ( '}' )
110
+ ) ;
111
+ }
107
112
108
- function transformToTagInfo ( matchedNode : ParsedNode ) {
109
- const SCL = matchedNode . sourceCodeLocation ; // shorthand
110
- const attributes = parseAttributes ( matchedNode . attrs ) ;
111
- /**
112
- * Note: `content` will only show top level child node content.
113
- * This is ok given that extractTag is only meant to extract top level
114
- * <style> and <script> tags. But if that ever changes we may have to make this
115
- * recurse and concat all childnodes.
116
- */
117
- const content = matchedNode . childNodes [ 0 ] ?. value || '' ;
118
- const start = SCL . startTag . endOffset ;
119
- const end = SCL . endTag . startOffset ;
120
- const startPos = positionAt ( start , source ) ;
121
- const endPos = positionAt ( end , source ) ;
113
+ function transformToTagInfo ( matchedNode : Node ) {
114
+ const start = matchedNode . startTagEnd ?? matchedNode . start ;
115
+ const end = matchedNode . endTagStart ?? matchedNode . end ;
116
+ const startPos = positionAt ( start , text ) ;
117
+ const endPos = positionAt ( end , text ) ;
122
118
const container = {
123
- start : SCL . startTag . startOffset ,
124
- end : SCL . endTag . endOffset ,
119
+ start : matchedNode . start ,
120
+ end : matchedNode . end ,
125
121
} ;
122
+ const content = text . substring ( start , end ) ;
126
123
127
124
return {
128
125
content,
129
- attributes,
126
+ attributes : parseAttributes ( matchedNode . attributes ) ,
130
127
start,
131
128
end,
132
129
startPos,
0 commit comments