31
31
* parse(value: string | null | undefined): NlcstRoot
32
32
* tokenize(value: string | null | undefined): Array<NlcstSentenceContent>
33
33
* }} ParserInstance
34
+ * nlcst parser.
35
+ *
36
+ * For example, `parse-dutch`, `parse-english`, or `parse-latin`.
34
37
* @typedef {new () => ParserInstance } ParserConstructor
38
+ * Create a new parser.
35
39
*
36
40
* @typedef Options
37
- * @property {Array<string> } [ignore]
41
+ * Configuration.
42
+ * @property {Array<string> | null | undefined } [ignore]
38
43
* List of mdast node types to ignore.
39
- * @property {Array<string> } [source]
44
+ * @property {Array<string> | null | undefined } [source]
40
45
* List of mdast node types to mark as `source`.
41
46
*
42
- * @typedef Context
47
+ * @typedef State
48
+ * Info passed around.
43
49
* @property {string } doc
50
+ * Whole document.
44
51
* @property {Location } place
52
+ * Location info.
45
53
* @property {ParserInstance } parser
54
+ * Parser.
46
55
* @property {Array<string> } ignore
56
+ * List of mdast node types to ignore.
47
57
* @property {Array<string> } source
58
+ * List of mdast node types to mark as source.
48
59
*/
49
60
50
61
import { toString } from 'nlcst-to-string'
@@ -63,12 +74,19 @@ const terminalMarker = /^([!.?\u2026\u203D]+)$/
63
74
* Transform a `tree` in mdast to nlcst.
64
75
*
65
76
* @param {MdastNode } tree
77
+ * mdast tree to transform.
66
78
* @param {VFile } file
67
- * @param {ParserInstance|ParserConstructor } Parser
68
- * @param {Options } [options]
79
+ * Virtual file.
80
+ * @param {ParserInstance | ParserConstructor } Parser
81
+ * Parser to use.
82
+ * @param {Options | null | undefined } [options]
83
+ * Configuration.
84
+ * @returns {NlcstRoot }
85
+ * nlcst tree.
69
86
*/
70
- // eslint-disable-next-line complexity
71
- export function toNlcst ( tree , file , Parser , options = { } ) {
87
+ export function toNlcst ( tree , file , Parser , options ) {
88
+ const options_ = options || { }
89
+
72
90
// Crash on invalid parameters.
73
91
if ( ! tree || ! tree . type ) {
74
92
throw new Error ( 'mdast-util-to-nlcst expected node' )
@@ -83,140 +101,151 @@ export function toNlcst(tree, file, Parser, options = {}) {
83
101
throw new Error ( 'mdast-util-to-nlcst expected parser' )
84
102
}
85
103
86
- if (
87
- ! tree . position ||
88
- ! tree . position . start ||
89
- ! tree . position . start . column ||
90
- ! tree . position . start . line
91
- ) {
104
+ if ( ! pointStart ( tree ) . line || ! pointStart ( tree ) . column ) {
92
105
throw new Error ( 'mdast-util-to-nlcst expected position on nodes' )
93
106
}
94
107
95
- const parser = 'parse' in Parser ? Parser : new Parser ( )
96
-
97
- /** @type {Context } */
98
- const context = {
108
+ /** @type {State } */
109
+ const state = {
99
110
doc : String ( file ) ,
100
111
place : location ( file ) ,
101
- parser,
102
- ignore : options . ignore
103
- ? defaultIgnore . concat ( options . ignore )
112
+ parser : 'parse' in Parser ? Parser : new Parser ( ) ,
113
+ ignore : options_ . ignore
114
+ ? [ ... defaultIgnore , ... options_ . ignore ]
104
115
: defaultIgnore ,
105
- source : options . source
106
- ? defaultSource . concat ( options . source )
116
+ source : options_ . source
117
+ ? [ ... defaultSource , ... options_ . source ]
107
118
: defaultSource
108
119
}
109
120
110
- const result = one ( context , tree )
121
+ return sentenceContentToRoot ( state , one ( state , tree ) || [ ] )
122
+ }
111
123
112
- if ( result && result . length > 0 ) {
113
- const start = pointStart ( result [ 0 ] )
114
- const end = pointEnd ( result [ result . length - 1 ] )
124
+ /**
125
+ * Turn sentence content into an nlcst root.
126
+ *
127
+ * @param {State } state
128
+ * State.
129
+ * @param {Array<NlcstSentenceContent> } nodes
130
+ * Sentence content.
131
+ * @returns {NlcstRoot }
132
+ * Root.
133
+ */
134
+ function sentenceContentToRoot ( state , nodes ) {
135
+ if ( nodes . length === 0 ) {
136
+ return { type : 'RootNode' , children : [ ] }
137
+ }
115
138
116
- // Turn into a sentence.
117
- /** @type {NlcstSentence } */
118
- const sentence = { type : 'SentenceNode' , children : result }
139
+ const start = pointStart ( nodes [ 0 ] )
140
+ const end = pointEnd ( nodes [ nodes . length - 1 ] )
119
141
120
- if ( start && start . line && end && end . line ) {
121
- sentence . position = { start , end }
122
- }
142
+ // Turn into a sentence.
143
+ /** @type { NlcstSentence } */
144
+ const sentence = { type : 'SentenceNode' , children : nodes }
123
145
124
- let index = - 1
125
- while ( parser . tokenizeSentencePlugins [ ++ index ] ) {
126
- parser . tokenizeSentencePlugins [ index ] ( sentence )
127
- }
146
+ if ( start && start . line && end && end . line ) {
147
+ sentence . position = { start, end}
148
+ }
128
149
129
- // Turn into a paragraph.
130
- /** @type {NlcstParagraph } */
131
- const paragraph = {
132
- type : 'ParagraphNode' ,
133
- children : splitNode ( sentence , 'PunctuationNode' , terminalMarker )
134
- }
135
- if ( start && start . line && end && end . line ) {
136
- paragraph . position = { start : { ...start } , end : { ...end } }
137
- }
150
+ let index = - 1
151
+ while ( state . parser . tokenizeSentencePlugins [ ++ index ] ) {
152
+ state . parser . tokenizeSentencePlugins [ index ] ( sentence )
153
+ }
138
154
139
- index = - 1
140
- while ( parser . tokenizeParagraphPlugins [ ++ index ] ) {
141
- parser . tokenizeParagraphPlugins [ index ] ( paragraph )
142
- }
155
+ // Turn into a paragraph.
156
+ /** @type {NlcstParagraph } */
157
+ const paragraph = {
158
+ type : 'ParagraphNode' ,
159
+ children : splitNode ( sentence , 'PunctuationNode' , terminalMarker )
160
+ }
161
+ if ( start && start . line && end && end . line ) {
162
+ paragraph . position = { start : { ...start } , end : { ...end } }
163
+ }
143
164
144
- /** @type {NlcstRoot } */
145
- const root = {
146
- type : 'RootNode' ,
147
- children : splitNode ( paragraph , 'WhiteSpaceNode' , newLine )
148
- }
149
- if ( start && start . line && end && end . line ) {
150
- root . position = { start : { ...start } , end : { ...end } }
151
- }
165
+ index = - 1
166
+ while ( state . parser . tokenizeParagraphPlugins [ ++ index ] ) {
167
+ state . parser . tokenizeParagraphPlugins [ index ] ( paragraph )
168
+ }
152
169
153
- index = - 1
154
- while ( parser . tokenizeRootPlugins [ ++ index ] ) {
155
- parser . tokenizeRootPlugins [ index ] ( root )
156
- }
170
+ // Turn into a root.
171
+ /** @type {NlcstRoot } */
172
+ const root = {
173
+ type : 'RootNode' ,
174
+ children : splitNode ( paragraph , 'WhiteSpaceNode' , newLine )
175
+ }
176
+ if ( start && start . line && end && end . line ) {
177
+ root . position = { start : { ...start } , end : { ...end } }
178
+ }
157
179
158
- return root
180
+ index = - 1
181
+ while ( state . parser . tokenizeRootPlugins [ ++ index ] ) {
182
+ state . parser . tokenizeRootPlugins [ index ] ( root )
159
183
}
160
184
161
- return { type : 'RootNode' , children : [ ] }
185
+ return root
162
186
}
163
187
164
188
/**
165
189
* Transform a single node.
166
- * @param {Context } config
190
+ *
191
+ * @param {State } state
192
+ * State.
167
193
* @param {MdastNode } node
168
- * @returns {Array<NlcstSentenceContent>|undefined }
194
+ * mdast node.
195
+ * @returns {Array<NlcstSentenceContent> | undefined }
196
+ * nlcst sentence content.
169
197
*/
170
- function one ( config , node ) {
171
- const start = node . position ? node . position . start . offset : undefined
172
-
173
- if ( ! config . ignore . includes ( node . type ) ) {
174
- if ( config . source . includes ( node . type ) && start && node . position ) {
175
- return patch (
176
- config ,
177
- [
178
- {
179
- type : 'SourceNode' ,
180
- value : config . doc . slice ( start , node . position . end . offset )
181
- }
182
- ] ,
183
- start
184
- )
185
- }
198
+ function one ( state , node ) {
199
+ if ( state . ignore . includes ( node . type ) ) {
200
+ return
201
+ }
186
202
187
- if ( 'children' in node ) {
188
- return all ( config , node )
189
- }
203
+ let start = node . position ? node . position . start . offset : undefined
204
+ const end = node . position ? node . position . end . offset : undefined
205
+ /** @type {Array<NlcstSentenceContent> | undefined } */
206
+ let results
190
207
191
- if ( ( node . type === 'image' || node . type === 'imageReference' ) && node . alt ) {
192
- return patch (
193
- config ,
194
- config . parser . tokenize ( node . alt ) ,
195
- typeof start === 'number' ? start + 2 : undefined
196
- )
208
+ if ( state . source . includes ( node . type ) ) {
209
+ if ( start !== undefined && end !== undefined ) {
210
+ results = [ { type : 'SourceNode' , value : state . doc . slice ( start , end ) } ]
197
211
}
198
-
199
- if ( node . type === 'break' ) {
200
- return patch ( config , [ { type : 'WhiteSpaceNode' , value : '\n' } ] , start )
212
+ } else if ( 'children' in node ) {
213
+ return all ( state , node )
214
+ } else if ( node . type === 'image' || node . type === 'imageReference' ) {
215
+ if ( node . alt ) {
216
+ results = state . parser . tokenize ( node . alt )
217
+
218
+ if ( typeof start === 'number' ) {
219
+ start += 2
220
+ }
201
221
}
222
+ } else if ( node . type === 'break' ) {
223
+ results = [ { type : 'WhiteSpaceNode' , value : '\n' } ]
224
+ } else if ( node . type === 'text' ) {
225
+ results = state . parser . tokenize ( node . value )
226
+ }
202
227
203
- if ( node . type === 'text' ) {
204
- return patch ( config , config . parser . tokenize ( node . value ) , start )
205
- }
228
+ if ( results ) {
229
+ patch ( state , results , start )
230
+ return results
206
231
}
207
232
}
208
233
209
234
/**
210
235
* Transform all nodes in `parent`.
211
- * @param {Context } config
236
+ *
237
+ * @param {State } state
238
+ * State.
212
239
* @param {MdastParent } parent
240
+ * mdast parent node.
213
241
* @returns {Array<NlcstSentenceContent> }
242
+ * nlcst sentence content.
214
243
*/
215
- function all ( config , parent ) {
244
+ function all ( state , parent ) {
216
245
let index = - 1
217
246
/** @type {Array<NlcstSentenceContent> } */
218
247
const results = [ ]
219
- /** @type {Point| undefined } */
248
+ /** @type {Point | undefined } */
220
249
let end
221
250
222
251
while ( ++ index < parent . children . length ) {
@@ -234,16 +263,17 @@ function all(config, parent) {
234
263
type : 'WhiteSpaceNode' ,
235
264
value : '\n' . repeat ( start . line - end . line )
236
265
}
237
- patch ( config , [ lineEnding ] , end . offset )
266
+ patch ( state , [ lineEnding ] , end . offset )
238
267
268
+ // Make sure it’ll be seen as a break between paragraphs.
239
269
if ( lineEnding . value . length < 2 ) {
240
270
lineEnding . value = '\n\n'
241
271
}
242
272
243
273
results . push ( lineEnding )
244
274
}
245
275
246
- const result = one ( config , child )
276
+ const result = one ( state , child )
247
277
if ( result ) results . push ( ...result )
248
278
end = pointEnd ( child )
249
279
}
@@ -253,40 +283,39 @@ function all(config, parent) {
253
283
254
284
/**
255
285
* Patch a position on each node in `nodes`.
286
+ *
256
287
* `offset` is the offset in `file` this run of content starts at.
257
288
*
258
- * @template {Array<NlcstContent>} T
259
- * @param {Context } config
260
- * @param {T } nodes
261
- * @param {number|undefined } offset
262
- * @returns {T }
289
+ * @param {State } state
290
+ * State.
291
+ * @param {Array<NlcstContent> } nodes
292
+ * nlcst sentence content.
293
+ * @param {number | undefined } offset
294
+ * Offset.
295
+ * @returns {void }
296
+ * Nothing.
263
297
*/
264
- function patch ( config , nodes , offset ) {
298
+ function patch ( state , nodes , offset ) {
265
299
let index = - 1
266
300
let start = offset
267
301
268
302
while ( ++ index < nodes . length ) {
269
303
const node = nodes [ index ]
270
304
271
305
if ( 'children' in node ) {
272
- patch ( config , node . children , start )
306
+ patch ( state , node . children , start )
273
307
}
274
308
275
309
const end =
276
310
typeof start === 'number' ? start + toString ( node ) . length : undefined
277
311
278
312
node . position =
279
313
start !== undefined && end !== undefined
280
- ? {
281
- start : config . place . toPoint ( start ) ,
282
- end : config . place . toPoint ( end )
283
- }
314
+ ? { start : state . place . toPoint ( start ) , end : state . place . toPoint ( end ) }
284
315
: undefined
285
316
286
317
start = end
287
318
}
288
-
289
- return nodes
290
319
}
291
320
292
321
// Ported from:
0 commit comments