Skip to content

Commit 944ec04

Browse files
authored
(fix) can extract with self-closing component before it (#251)
* (fix) can extract with self-closing component before it #194 Now using the html language service which provides a simple parse method which does the tag nesting logic for us, we then have to do additional checks that we are not inside a moustache tag.
1 parent 87dc5c6 commit 944ec04

File tree

3 files changed

+118
-100
lines changed

3 files changed

+118
-100
lines changed

packages/language-server/package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
"estree-walker": "^2.0.1",
5151
"lodash": "^4.17.10",
5252
"magic-string": "^0.25.3",
53-
"parse5": "^5.1.0",
5453
"prettier": "2.0.5",
5554
"prettier-plugin-svelte": "1.1.0",
5655
"source-map": "^0.7.3",

packages/language-server/src/lib/documents/utils.ts

Lines changed: 87 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import { clamp, isInRange } from '../../utils';
1+
import { clamp, isInRange, regexLastIndexOf } from '../../utils';
22
import { Position, Range } from 'vscode-languageserver';
3-
import parse5, { Location } from 'parse5';
3+
import { Node, getLanguageService } from 'vscode-html-languageservice';
44

55
export interface TagInformation {
66
content: string;
@@ -12,43 +12,44 @@ export interface TagInformation {
1212
container: { start: number; end: number };
1313
}
1414

15-
function parseAttributes(attrlist: { name: string; value: string }[]): Record<string, string> {
15+
function parseAttributes(
16+
rawAttrs: Record<string, string | null> | undefined,
17+
): Record<string, string> {
1618
const attrs: Record<string, string> = {};
17-
attrlist.forEach((attr) => {
18-
attrs[attr.name] = attr.value === '' ? attr.name : attr.value; // in order to support boolean attributes (see utils.test.ts)
19+
if (!rawAttrs) {
20+
return attrs;
21+
}
22+
23+
Object.keys(rawAttrs).forEach((attrName) => {
24+
const attrValue = rawAttrs[attrName];
25+
attrs[attrName] = attrValue === null ? attrName : removeOuterQuotes(attrValue);
1926
});
2027
return attrs;
21-
}
2228

23-
function isMatchingTag(source: string, node: ParsedNode, tag: string): boolean {
24-
if (node.nodeName !== tag) {
25-
return false;
29+
function removeOuterQuotes(attrValue: string) {
30+
if (
31+
(attrValue.startsWith('"') && attrValue.endsWith('"')) ||
32+
(attrValue.startsWith("'") && attrValue.endsWith("'"))
33+
) {
34+
return attrValue.slice(1, attrValue.length - 1);
35+
}
36+
return attrValue;
2637
}
38+
}
2739

28-
// node name equals tag, but we still have to check for case sensitivity
29-
const orgStart = node.sourceCodeLocation?.startTag.startOffset || 0;
30-
const orgEnd = node.sourceCodeLocation?.startTag.endOffset || 0;
31-
const tagHtml = source.substring(orgStart, orgEnd);
32-
return tagHtml.startsWith(`<${tag}`);
40+
const parser = getLanguageService();
41+
function parseHtml(text: string) {
42+
// We can safely only set getText because only this is used for parsing
43+
return parser.parseHTMLDocument(<any>{ getText: () => text });
3344
}
3445

35-
// parse5's DefaultTreeNode type is insufficient; make our own type to make TS happy
36-
type ParsedNode = {
37-
nodeName: string;
38-
tagName: string;
39-
value?: string;
40-
attrs: { name: string; value: string }[];
41-
childNodes: ParsedNode[];
42-
parentNode: ParsedNode;
43-
sourceCodeLocation: Location & { startTag: Location; endTag: Location };
44-
};
45-
46-
const regexIf = new RegExp('{#if\\s(.*?)*}', 'igms');
46+
const regexIf = new RegExp('{#if\\s.*?}', 'igms');
4747
const regexIfEnd = new RegExp('{/if}', 'igms');
48-
const regexEach = new RegExp('{#each\\s(.*?)*}', 'igms');
48+
const regexEach = new RegExp('{#each\\s.*?}', 'igms');
4949
const regexEachEnd = new RegExp('{/each}', 'igms');
50-
const regexAwait = new RegExp('{#await\\s(.*?)*}', 'igms');
50+
const regexAwait = new RegExp('{#await\\s.*?}', 'igms');
5151
const regexAwaitEnd = new RegExp('{/await}', 'igms');
52+
const regexHtml = new RegExp('{@html\\s.*?', 'igms');
5253

5354
/**
5455
* Extracts a tag (style or script) from the given text
@@ -57,76 +58,72 @@ const regexAwaitEnd = new RegExp('{/await}', 'igms');
5758
* @param source text content to extract tag from
5859
* @param tag the tag to extract
5960
*/
60-
function extractTags(source: string, tag: 'script' | 'style'): TagInformation[] {
61-
const { childNodes } = parse5.parseFragment(source, {
62-
sourceCodeLocationInfo: true,
63-
}) as { childNodes: ParsedNode[] };
64-
65-
const matchedNodes: ParsedNode[] = [];
66-
let currentSvelteDirective;
67-
for (const node of childNodes) {
68-
/**
69-
* skip matching tags if we are inside a directive
70-
*
71-
* extractTag's goal is solely to identify the top level <script> or <style>.
72-
*
73-
* therefore only iterating through top level childNodes is a feature we want!
74-
*
75-
* however, we cannot do a naive childNodes.find() because context matters.
76-
* if we have a <script> tag inside an {#if}, we want to skip that until the {/if}.
77-
* if we have a <script> tag inside an {#each}, we want to skip that until the {/each}.
78-
* if we have a <script> tag inside an {#await}, we want to skip that until the {/await}.
79-
*
80-
* and so on. So we use a tiny inSvelteDirective 'state machine' to track this
81-
* and use regex to detect the svelte directives.
82-
* We might need to improve this regex in future.
83-
*/
84-
if (currentSvelteDirective) {
85-
if (node.value && node.nodeName === '#text') {
86-
if (
87-
(currentSvelteDirective === 'if' && regexIfEnd.exec(node.value)) ||
88-
(currentSvelteDirective === 'each' && regexEachEnd.exec(node.value)) ||
89-
(currentSvelteDirective === 'await' && regexAwaitEnd.exec(node.value))
90-
) {
91-
currentSvelteDirective = undefined;
92-
}
93-
}
94-
} else {
95-
if (node.value && node.nodeName === '#text') {
96-
// potentially a svelte directive
97-
if (regexIf.exec(node.value)) currentSvelteDirective = 'if';
98-
else if (regexEach.exec(node.value)) currentSvelteDirective = 'each';
99-
else if (regexAwait.exec(node.value)) currentSvelteDirective = 'await';
100-
} else if (isMatchingTag(source, node, tag)) {
101-
matchedNodes.push(node);
102-
}
103-
}
61+
function extractTags(text: string, tag: 'script' | 'style'): TagInformation[] {
62+
const rootNodes = parseHtml(text).roots;
63+
const matchedNodes = rootNodes
64+
.filter((node) => node.tag === tag)
65+
.filter((tag) => {
66+
return isNotInsideControlFlowTag(tag) && isNotInsideHtmlTag(tag);
67+
});
68+
return matchedNodes.map(transformToTagInfo);
69+
70+
/**
71+
* For every match AFTER the tag do a search for `{/X`.
72+
* If that is BEFORE `{#X`, we are inside a moustache tag.
73+
*/
74+
function isNotInsideControlFlowTag(tag: Node) {
75+
const nodes = rootNodes.slice(rootNodes.indexOf(tag));
76+
const rootContentAfterTag = nodes
77+
.map((node, idx) => {
78+
return text.substring(node.end, nodes[idx + 1]?.start);
79+
})
80+
.join('');
81+
82+
return ![
83+
[regexIf, regexIfEnd],
84+
[regexEach, regexEachEnd],
85+
[regexAwait, regexAwaitEnd],
86+
].some((pair) => {
87+
pair[0].lastIndex = 0;
88+
pair[1].lastIndex = 0;
89+
const start = pair[0].exec(rootContentAfterTag);
90+
const end = pair[1].exec(rootContentAfterTag);
91+
return (end?.index ?? text.length) < (start?.index ?? text.length);
92+
});
10493
}
10594

106-
return matchedNodes.map(transformToTagInfo);
95+
/**
96+
* For every match BEFORE the tag do a search for `{@html`.
97+
* If that is BEFORE `}`, we are inside a moustache tag.
98+
*/
99+
function isNotInsideHtmlTag(tag: Node) {
100+
const nodes = rootNodes.slice(0, rootNodes.indexOf(tag));
101+
const rootContentBeforeTag = [{ start: 0, end: 0 }, ...nodes]
102+
.map((node, idx) => {
103+
return text.substring(node.end, nodes[idx]?.start);
104+
})
105+
.join('');
106+
107+
return !(
108+
regexLastIndexOf(rootContentBeforeTag, regexHtml) >
109+
rootContentBeforeTag.lastIndexOf('}')
110+
);
111+
}
107112

108-
function transformToTagInfo(matchedNode: ParsedNode) {
109-
const SCL = matchedNode.sourceCodeLocation; // shorthand
110-
const attributes = parseAttributes(matchedNode.attrs);
111-
/**
112-
* Note: `content` will only show top level child node content.
113-
* This is ok given that extractTag is only meant to extract top level
114-
* <style> and <script> tags. But if that ever changes we may have to make this
115-
* recurse and concat all childnodes.
116-
*/
117-
const content = matchedNode.childNodes[0]?.value || '';
118-
const start = SCL.startTag.endOffset;
119-
const end = SCL.endTag.startOffset;
120-
const startPos = positionAt(start, source);
121-
const endPos = positionAt(end, source);
113+
function transformToTagInfo(matchedNode: Node) {
114+
const start = matchedNode.startTagEnd ?? matchedNode.start;
115+
const end = matchedNode.endTagStart ?? matchedNode.end;
116+
const startPos = positionAt(start, text);
117+
const endPos = positionAt(end, text);
122118
const container = {
123-
start: SCL.startTag.startOffset,
124-
end: SCL.endTag.endOffset,
119+
start: matchedNode.start,
120+
end: matchedNode.end,
125121
};
122+
const content = text.substring(start, end);
126123

127124
return {
128125
content,
129-
attributes,
126+
attributes: parseAttributes(matchedNode.attributes),
130127
start,
131128
end,
132129
startPos,

packages/language-server/test/lib/documents/utils.test.ts

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,28 @@ describe('document/utils', () => {
6868
assert.deepStrictEqual(attributes, { type: 'typescript' });
6969
});
7070

71+
it('can extract with self-closing component before it', () => {
72+
const extracted = extractStyleTag('<SelfClosing /><style></style>');
73+
assert.deepStrictEqual(extracted, {
74+
start: 22,
75+
end: 22,
76+
startPos: {
77+
character: 22,
78+
line: 0,
79+
},
80+
endPos: {
81+
character: 22,
82+
line: 0,
83+
},
84+
attributes: {},
85+
content: '',
86+
container: {
87+
end: 30,
88+
start: 15,
89+
},
90+
});
91+
});
92+
7193
it('extracts style tag', () => {
7294
const text = `
7395
<p>bla</p>
@@ -118,30 +140,30 @@ describe('document/utils', () => {
118140
const text = `
119141
{#if name}
120142
<script>
121-
console.log('not top level')
143+
console.log('if not top level')
122144
</script>
123145
{/if}
124146
<ul>
125147
{#each cats as cat}
126148
<script>
127-
console.log('not top level')
149+
console.log('each not top level')
128150
</script>
129151
{/each}
130152
</ul>
131153
{#await promise}
132154
<script>
133-
console.log('not top level')
155+
console.log('await not top level')
134156
</script>
135157
{:then number}
136158
<script>
137-
console.log('not top level')
159+
console.log('then not top level')
138160
</script>
139161
{:catch error}
140162
<script>
141-
console.log('not top level')
163+
console.log('catch not top level')
142164
</script>
143165
{/await}
144-
<p>{@html <script> consolelog('not top level')</script>}</p>
166+
<p>{@html <script> console.log('html not top level')</script>}</p>
145167
{@html mycontent}
146168
{@debug myvar}
147169
<!-- p{ color: blue; }</script> -->
@@ -156,11 +178,11 @@ describe('document/utils', () => {
156178
assert.deepStrictEqual(extractScriptTags(text)?.script, {
157179
content: 'top level script',
158180
attributes: {},
159-
start: 1212,
160-
end: 1228,
181+
start: 1243,
182+
end: 1259,
161183
startPos: Position.create(34, 24),
162184
endPos: Position.create(34, 40),
163-
container: { start: 1204, end: 1237 },
185+
container: { start: 1235, end: 1268 },
164186
});
165187
});
166188

0 commit comments

Comments
 (0)