Skip to content

Commit dddf9ae

Browse files
authored
Merge pull request #1686 from calculuschild/EmphasisFixes
2 parents a761316 + 6b729ed commit dddf9ae

File tree

9 files changed

+210
-129
lines changed

9 files changed

+210
-129
lines changed

src/Lexer.js

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,29 @@ module.exports = class Lexer {
319319
/**
320320
* Lexing/Compiling
321321
*/
322-
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
322+
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false, prevChar = '') {
323323
let token;
324324

325+
// String with links masked to avoid interference with em and strong
326+
let maskedSrc = src;
327+
let match;
328+
329+
// Mask out reflinks
330+
if (this.tokens.links) {
331+
const links = Object.keys(this.tokens.links);
332+
if (links.length > 0) {
333+
while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
334+
if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
335+
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
336+
}
337+
}
338+
}
339+
}
340+
// Mask out other blocks
341+
while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
342+
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
343+
}
344+
325345
while (src) {
326346
// escape
327347
if (token = this.tokenizer.escape(src)) {
@@ -360,15 +380,15 @@ module.exports = class Lexer {
360380
}
361381

362382
// strong
363-
if (token = this.tokenizer.strong(src)) {
383+
if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) {
364384
src = src.substring(token.raw.length);
365385
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
366386
tokens.push(token);
367387
continue;
368388
}
369389

370390
// em
371-
if (token = this.tokenizer.em(src)) {
391+
if (token = this.tokenizer.em(src, maskedSrc, prevChar)) {
372392
src = src.substring(token.raw.length);
373393
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
374394
tokens.push(token);
@@ -414,6 +434,7 @@ module.exports = class Lexer {
414434
// text
415435
if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
416436
src = src.substring(token.raw.length);
437+
prevChar = token.raw.slice(-1);
417438
tokens.push(token);
418439
continue;
419440
}

src/Tokenizer.js

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -490,25 +490,49 @@ module.exports = class Tokenizer {
490490
}
491491
}
492492

493-
strong(src) {
494-
const cap = this.rules.inline.strong.exec(src);
495-
if (cap) {
496-
return {
497-
type: 'strong',
498-
raw: cap[0],
499-
text: cap[4] || cap[3] || cap[2] || cap[1]
500-
};
493+
strong(src, maskedSrc, prevChar = '') {
494+
let match = this.rules.inline.strong.start.exec(src);
495+
496+
if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
497+
maskedSrc = maskedSrc.slice(-1 * src.length);
498+
const endReg = match[0] === '**' ? this.rules.inline.strong.endAst : this.rules.inline.strong.endUnd;
499+
500+
endReg.lastIndex = 0;
501+
502+
let cap;
503+
while ((match = endReg.exec(maskedSrc)) != null) {
504+
cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
505+
if (cap) {
506+
return {
507+
type: 'strong',
508+
raw: src.slice(0, cap[0].length),
509+
text: src.slice(2, cap[0].length - 2)
510+
};
511+
}
512+
}
501513
}
502514
}
503515

504-
em(src) {
505-
const cap = this.rules.inline.em.exec(src);
506-
if (cap) {
507-
return {
508-
type: 'em',
509-
raw: cap[0],
510-
text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
511-
};
516+
em(src, maskedSrc, prevChar = '') {
517+
let match = this.rules.inline.em.start.exec(src);
518+
519+
if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
520+
maskedSrc = maskedSrc.slice(-1 * src.length);
521+
const endReg = match[0] === '*' ? this.rules.inline.em.endAst : this.rules.inline.em.endUnd;
522+
523+
endReg.lastIndex = 0;
524+
525+
let cap;
526+
while ((match = endReg.exec(maskedSrc)) != null) {
527+
cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
528+
if (cap) {
529+
return {
530+
type: 'em',
531+
raw: src.slice(0, cap[0].length),
532+
text: src.slice(1, cap[0].length - 1)
533+
};
534+
}
535+
}
512536
}
513537
}
514538

src/rules.js

Lines changed: 79 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -168,19 +168,74 @@ const inline = {
168168
link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
169169
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
170170
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
171-
strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,
172-
em: /^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\s,punctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\s,punctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,
171+
reflinkSearch: 'reflink|nolink(?!\\()',
172+
strong: {
173+
start: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])|__/, // (1) returns if starts w/ punctuation
174+
middle: /^\*\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*\*$|^__(?![\s])((?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?)__$/,
175+
endAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
176+
endUnd: /[^\s]__(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
177+
},
178+
em: {
179+
start: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/, // (1) returns if starts w/ punctuation
180+
middle: /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/,
181+
endAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
182+
endUnd: /[^\s]_(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
183+
},
173184
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
174185
br: /^( {2,}|\\)\n(?!\s*$)/,
175186
del: noopTest,
176-
text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/
187+
text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/,
188+
punctuation: /^([\s*punctuation])/
177189
};
178190

179191
// list of punctuation marks from common mark spec
180-
// without ` and ] to workaround Rule 17 (inline code blocks/links)
181-
// without , to work around example 393
182-
inline._punctuation = '!"#$%&\'()*+\\-./:;<=>?@\\[^_{|}~';
183-
inline.em = edit(inline.em).replace(/punctuation/g, inline._punctuation).getRegex();
192+
// without * and _ to workaround cases with double emphasis
193+
inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
194+
inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
195+
196+
// sequences em should skip over [title](link), `code`, <html>
197+
inline._blockSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
198+
inline._overlapSkip = '__[^_]*?__|\\*\\*\\[^\\*\\]*?\\*\\*';
199+
200+
inline.em.start = edit(inline.em.start)
201+
.replace(/punctuation/g, inline._punctuation)
202+
.getRegex();
203+
204+
inline.em.middle = edit(inline.em.middle)
205+
.replace(/punctuation/g, inline._punctuation)
206+
.replace(/overlapSkip/g, inline._overlapSkip)
207+
.getRegex();
208+
209+
inline.em.endAst = edit(inline.em.endAst, 'g')
210+
.replace(/punctuation/g, inline._punctuation)
211+
.getRegex();
212+
213+
inline.em.endUnd = edit(inline.em.endUnd, 'g')
214+
.replace(/punctuation/g, inline._punctuation)
215+
.getRegex();
216+
217+
inline.strong.start = edit(inline.strong.start)
218+
.replace(/punctuation/g, inline._punctuation)
219+
.getRegex();
220+
221+
inline.strong.middle = edit(inline.strong.middle)
222+
.replace(/punctuation/g, inline._punctuation)
223+
.replace(/blockSkip/g, inline._blockSkip)
224+
.getRegex();
225+
226+
inline.strong.endAst = edit(inline.strong.endAst, 'g')
227+
.replace(/punctuation/g, inline._punctuation)
228+
.getRegex();
229+
230+
inline.strong.endUnd = edit(inline.strong.endUnd, 'g')
231+
.replace(/punctuation/g, inline._punctuation)
232+
.getRegex();
233+
234+
inline.blockSkip = edit(inline._blockSkip, 'g')
235+
.getRegex();
236+
237+
inline.overlapSkip = edit(inline._overlapSkip, 'g')
238+
.getRegex();
184239

185240
inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
186241

@@ -212,6 +267,11 @@ inline.reflink = edit(inline.reflink)
212267
.replace('label', inline._label)
213268
.getRegex();
214269

270+
inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
271+
.replace('reflink', inline.reflink)
272+
.replace('nolink', inline.nolink)
273+
.getRegex();
274+
215275
/**
216276
* Normal Inline Grammar
217277
*/
@@ -223,8 +283,18 @@ inline.normal = merge({}, inline);
223283
*/
224284

225285
inline.pedantic = merge({}, inline.normal, {
226-
strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
227-
em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/,
286+
strong: {
287+
start: /^__|\*\*/,
288+
middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
289+
endAst: /\*\*(?!\*)/g,
290+
endUnd: /__(?!_)/g
291+
},
292+
em: {
293+
start: /^_|\*/,
294+
middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
295+
endAst: /\*(?!\*)/g,
296+
endUnd: /_(?!_)/g
297+
},
228298
link: edit(/^!?\[(label)\]\((.*?)\)/)
229299
.replace('label', inline._label)
230300
.getRegex(),

0 commit comments

Comments
 (0)