Skip to content

Commit a880e5f

Browse files
committed
Update parse5
1 parent 8213a17 commit a880e5f

File tree

3 files changed

+95
-96
lines changed

3 files changed

+95
-96
lines changed

index.js

Lines changed: 84 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,24 @@ var toParse5 = require('hast-util-to-parse5');
77
var voids = require('html-void-elements');
88
var ns = require('web-namespaces');
99
var zwitch = require('zwitch');
10+
var xtend = require('xtend');
1011

1112
module.exports = wrap;
1213

1314
var IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE';
1415
var CHARACTER_TOKEN = 'CHARACTER_TOKEN';
1516
var START_TAG_TOKEN = 'START_TAG_TOKEN';
1617
var END_TAG_TOKEN = 'END_TAG_TOKEN';
17-
var HIBERNATION_TOKEN = 'HIBERNATION_TOKEN';
1818
var COMMENT_TOKEN = 'COMMENT_TOKEN';
1919
var DOCTYPE_TOKEN = 'DOCTYPE_TOKEN';
20-
var DOCUMENT = 'document';
21-
var FRAGMENT = 'fragment';
2220

2321
function wrap(tree, file) {
24-
var parser = new Parser({locationInfo: true});
22+
var parser = new Parser({sourceCodeLocationInfo: true, scriptingEnabled: false});
2523
var one = zwitch('type');
26-
var mode = inferMode(tree);
24+
var tokenizer;
2725
var preprocessor;
26+
var posTracker;
27+
var locationTracker;
2828
var result;
2929

3030
one.handlers.root = root;
@@ -35,7 +35,7 @@ function wrap(tree, file) {
3535
one.handlers.raw = raw;
3636
one.unknown = unknown;
3737

38-
result = fromParse5(mode === FRAGMENT ? fragment() : document(), file);
38+
result = fromParse5(documentMode(tree) ? document() : fragment(), file);
3939

4040
/* Unpack if possible and when not given a `root`. */
4141
if (tree.type !== 'root' && result.children.length === 1) {
@@ -77,7 +77,10 @@ function wrap(tree, file) {
7777
parser._resetInsertionMode();
7878
parser._findFormInFragmentContext();
7979

80-
preprocessor = parser.tokenizer.preprocessor;
80+
tokenizer = parser.tokenizer;
81+
preprocessor = tokenizer.preprocessor;
82+
locationTracker = tokenizer.__mixins[0];
83+
posTracker = locationTracker.posTracker;
8184

8285
one(tree);
8386

@@ -90,6 +93,10 @@ function wrap(tree, file) {
9093
var doc = parser.treeAdapter.createDocument();
9194

9295
parser._bootstrap(doc, null);
96+
tokenizer = parser.tokenizer;
97+
preprocessor = tokenizer.preprocessor;
98+
locationTracker = tokenizer.__mixins[0];
99+
posTracker = locationTracker.posTracker;
93100

94101
one(tree);
95102

@@ -100,7 +107,7 @@ function wrap(tree, file) {
100107
var length = 0;
101108
var index = -1;
102109

103-
/* istanbul ignore else - invalid nodes, see wooorm/rehype-raw#7. */
110+
/* istanbul ignore else - invalid nodes, see rehypejs/rehype-raw#7. */
104111
if (nodes) {
105112
length = nodes.length;
106113
}
@@ -127,145 +134,136 @@ function wrap(tree, file) {
127134
}
128135

129136
function text(node) {
130-
var start = pos.start(node);
131137
parser._processToken({
132138
type: CHARACTER_TOKEN,
133139
chars: node.value,
134-
location: {
135-
line: start.line,
136-
col: start.column,
137-
startOffset: start.offset,
138-
endOffset: pos.end(node).offset
139-
}
140+
location: createParse5Location(node)
140141
});
141142
}
142143

143144
function doctype(node) {
144145
var p5 = toParse5(node);
146+
145147
parser._processToken({
146148
type: DOCTYPE_TOKEN,
147149
name: p5.name,
148150
forceQuirks: false,
149151
publicId: p5.publicId,
150-
systemId: p5.systemId
152+
systemId: p5.systemId,
153+
location: createParse5Location(node)
151154
});
152155
}
153156

154157
function comment(node) {
155-
var start = pos.start(node);
156158
parser._processToken({
157159
type: COMMENT_TOKEN,
158160
data: node.value,
159-
location: {
160-
line: start.line,
161-
col: start.column,
162-
startOffset: start.offset,
163-
endOffset: pos.end(node).offset
164-
}
161+
location: createParse5Location(node)
165162
});
166163
}
167164

168165
function raw(node) {
169-
var start = pos.start(node).offset;
166+
var start = pos.start(node);
167+
var token;
170168

169+
// Reset preprocessor:
170+
// https://github.com/inikulin/parse5/blob/0491902/packages/parse5/lib/tokenizer/preprocessor.js
171171
preprocessor.html = null;
172+
preprocessor.endOfChunkHit = false;
173+
preprocessor.lastChunkWritten = false;
172174
preprocessor.lastCharPos = -1;
173175
preprocessor.pos = -1;
174176

175-
if (start !== null) {
176-
preprocessor.__locTracker.droppedBufferSize = start;
177-
}
178-
179-
parser.tokenizer.write(node.value);
180-
181-
run(parser);
182-
}
183-
}
184-
185-
function run(p) {
186-
var tokenizer = p.tokenizer;
187-
var token;
188-
189-
while (!p.stopped) {
190-
p._setupTokenizerCDATAMode();
191-
192-
token = tokenizer.getNextToken();
193-
194-
if (token.type === HIBERNATION_TOKEN) {
195-
token = tokenizer.currentCharacterToken || tokenizer.currentToken;
196-
197-
if (token) {
198-
p._processInputToken(token);
199-
}
200-
201-
tokenizer.currentToken = null;
202-
tokenizer.currentCharacterToken = null;
203-
204-
break;
177+
// Reset preprocessor mixin:
178+
// https://github.com/inikulin/parse5/blob/0491902/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js
179+
posTracker.droppedBufferSize = 0;
180+
posTracker.line = start.line;
181+
posTracker.col = 1;
182+
posTracker.offset = 0;
183+
posTracker.lineStartPos = -start.column + 1;
184+
posTracker.droppedBufferSize = start.offset;
185+
186+
// Reset location tracker:
187+
// https://github.com/inikulin/parse5/blob/0491902/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js
188+
locationTracker.currentAttrLocation = null;
189+
locationTracker.ctLoc = createParse5Location(node);
190+
191+
// See the code for `parse` and `parseFragment`:
192+
// https://github.com/inikulin/parse5/blob/0491902/packages/parse5/lib/parser/index.js#L371
193+
tokenizer.write(node.value);
194+
parser._runParsingLoop(null);
195+
196+
// Process final characters if they’re still there after hibernating.
197+
// Similar to:
198+
// https://github.com/inikulin/parse5/blob/3bfa7d9/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95
199+
token = tokenizer.currentCharacterToken;
200+
201+
if (token) {
202+
token.location.endLine = posTracker.line;
203+
token.location.endCol = posTracker.col + 1;
204+
token.location.endOffset = posTracker.offset + 1;
205+
parser._processToken(token);
205206
}
206207

207-
p._processInputToken(token);
208+
// Reset tokenizer:
209+
// https://github.com/inikulin/parse5/blob/8b0048e/packages/parse5/lib/tokenizer/index.js#L215
210+
tokenizer.currentToken = null;
211+
tokenizer.currentCharacterToken = null;
212+
tokenizer.currentAttr = null;
208213
}
209214
}
210215

211216
function startTag(node) {
212-
var start = pos.start(node);
213-
var end = pos.end(node);
217+
var location = createParse5Location(node);
218+
219+
location.startTag = xtend(location);
214220

215221
return {
216222
type: START_TAG_TOKEN,
217223
tagName: node.tagName,
218224
selfClosing: false,
219225
attrs: attributes(node),
220-
location: {
221-
line: start.line,
222-
col: start.column,
223-
startOffset: start.offset,
224-
endOffset: end.offset,
225-
attrs: {},
226-
startTag: {
227-
line: start.line,
228-
col: start.column,
229-
startOffset: start.offset,
230-
endOffset: end.offset
231-
}
232-
}
226+
location: location
233227
};
234228
}
235229

236230
function attributes(node) {
237-
return toParse5({
238-
type: 'element',
239-
properties: node.properties
240-
}).attrs;
231+
return toParse5({type: 'element', properties: node.properties}).attrs;
241232
}
242233

243234
function endTag(node) {
244-
var end = pos.end(node);
235+
var location = createParse5Location(node);
236+
237+
location.endTag = xtend(location);
245238

246239
return {
247240
type: END_TAG_TOKEN,
248241
tagName: node.tagName,
249242
attrs: [],
250-
location: {
251-
line: end.line,
252-
col: end.column,
253-
startOffset: end.offset,
254-
endOffset: end.offset
255-
}
243+
location: location
256244
};
257245
}
258246

259247
function unknown(node) {
260248
throw new Error('Cannot compile `' + node.type + '` node');
261249
}
262250

263-
function inferMode(node) {
251+
function documentMode(node) {
264252
var head = node.type === 'root' ? node.children[0] : node;
265253

266-
if (head && (head.type === 'doctype' || head.tagName === 'html')) {
267-
return DOCUMENT;
268-
}
254+
return head && (head.type === 'doctype' || head.tagName === 'html');
255+
}
269256

270-
return FRAGMENT;
257+
function createParse5Location(node) {
258+
var start = pos.start(node);
259+
var end = pos.end(node);
260+
261+
return {
262+
startLine: start.line,
263+
startCol: start.column,
264+
startOffset: start.offset,
265+
endLine: end.line,
266+
endCol: end.column,
267+
endOffset: end.offset
268+
};
271269
}

package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020
"index.js"
2121
],
2222
"dependencies": {
23-
"hast-util-from-parse5": "^2.0.0",
24-
"hast-util-to-parse5": "^2.0.0",
23+
"hast-util-from-parse5": "^3.0.0",
24+
"hast-util-to-parse5": "^3.0.0",
2525
"html-void-elements": "^1.0.1",
26-
"parse5": "^3.0.3",
26+
"parse5": "^5.0.0",
2727
"unist-util-position": "^3.0.0",
2828
"web-namespaces": "^1.0.0",
29+
"xtend": "^4.0.1",
2930
"zwitch": "^1.0.0"
3031
},
3132
"devDependencies": {

test.js

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ test('raw', function (t) {
6060
h('head'),
6161
h('body')
6262
]),
63-
'should pass documents through'
63+
'should pass documents through (#1)'
6464
);
6565

6666
t.deepEqual(
@@ -75,7 +75,7 @@ test('raw', function (t) {
7575
h('body')
7676
])
7777
]),
78-
'should pass documents through'
78+
'should pass documents through (#2)'
7979
);
8080

8181
t.deepEqual(
@@ -87,7 +87,7 @@ test('raw', function (t) {
8787
h('img', {alt: 'foo', src: 'bar.jpg'}),
8888
h('img', {alt: 'foo', src: 'bar.jpg'})
8989
]),
90-
'should pass raw nodes through'
90+
'should pass raw nodes through (#1)'
9191
);
9292

9393
t.deepEqual(
@@ -99,7 +99,7 @@ test('raw', function (t) {
9999
h('p', 'Foo, bar!'),
100100
h('ol', h('li', 'baz'))
101101
]),
102-
'should pass raw nodes through'
102+
'should pass raw nodes through (#2)'
103103
);
104104

105105
t.end();
@@ -161,7 +161,7 @@ test('integration', function (t) {
161161
}],
162162
position: {
163163
start: {line: 3, column: 1, offset: 19},
164-
end: null
164+
end: {line: 5, column: 1, offset: 37}
165165
}
166166
},
167167
{
@@ -422,12 +422,12 @@ test('integration', function (t) {
422422
value: 'Hello, world!',
423423
position: {
424424
start: {line: 20, column: 4, offset: 256},
425-
end: {}
425+
end: {line: 20, column: 17, offset: 270}
426426
}
427427
}],
428428
position: {
429429
start: {line: 20, column: 1, offset: 253},
430-
end: null
430+
end: {line: 20, column: 17, offset: 270}
431431
}
432432
}
433433
],

0 commit comments

Comments
 (0)