diff --git a/CHANGES.md b/CHANGES.md index e3f59f7aec..cc8ba35a0c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ Improvements: - added a function to default export to generate a fresh highlighter instance to be used by extensions [WisamMechano][] +- added BETA `__emitTokens` key to grammars to allow then to direct their own parsing, only using Highlight.js for the HTML rendering [Josh Goebel][] New Grammars: @@ -24,6 +25,7 @@ Parser: - add removePlugin api [faga295][] +[Josh Goebel]: https://github.com/joshgoebel [Timur Kamaev]: https://github.com/doiftrue [Leopard20]: https://github.com/Leopard20/ [WisamMechano]: https://github.com/wisammechano diff --git a/docs/mode-reference.rst b/docs/mode-reference.rst index 960b999fd5..764e44ac38 100644 --- a/docs/mode-reference.rst +++ b/docs/mode-reference.rst @@ -105,6 +105,52 @@ Disables autodetection for this language. (defaults to false, meaning auto-detect is enabled) +__emitTokens +^^^^^^^^^^^^ + +.. warning:: + + **This is currently still private/beta API**, though it's expected to be fairly stable. + + It should land in version 12.0. + +Allows grammars to bundle custom parsers - bypassing the default parser and grammar mode definitions. This should be a function that accepts the raw source code as the first argument and an "Emitter" object as the second. + +A custom parser may parse the source as it sees fit - making calls to the Emitter along the way - allowing Highlight.js to generate and theme the final HTML. + +The **Emitter** API is trivial: + +- ``addText(text)`` +- ``startScope(name)`` +- ``endScope()`` + +Given: + +:: + + hello beautiful world! + + +Assuming beautiful is a keyword our Emitter calls might look something like: + +:: + + addText("hello ") + startScope("keyword") + addText("beautiful") + endScope() + addText(" world!") + +Resulting in the following generated HTML: + +.. code-block:: html + + hello beautiful world! + +.. note:: + + The intended use of ``addText`` is larger chunks of plain text, not individual characters. Custom parsers should buffer plain text output into complete strings rather than sending output one character at a time. + compilerExtensions ^^^^^^^^^^^^^^^^^^ diff --git a/src/highlight.js b/src/highlight.js index b27e169320..d0ae2a850f 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -15,6 +15,7 @@ import * as packageJSON from '../package.json'; import * as logger from "./lib/logger.js"; import HTMLInjectionError from "./lib/html_injection_error.js"; + /** @typedef {import('highlight.js').Mode} Mode @typedef {import('highlight.js').CompiledMode} CompiledMode @@ -224,7 +225,7 @@ const HLJS = function(hljs) { buf += match[0]; } else { const cssClass = language.classNameAliases[kind] || kind; - emitter.addKeyword(match[0], cssClass); + emitKeyword(match[0], cssClass); } } else { buf += match[0]; @@ -259,7 +260,7 @@ const HLJS = function(hljs) { if (top.relevance > 0) { relevance += result.relevance; } - emitter.addSublanguage(result._emitter, result.language); + emitter.__addSublanguage(result._emitter, result.language); } function processBuffer() { @@ -271,6 +272,18 @@ const HLJS = function(hljs) { modeBuffer = ''; } + /** + * @param {string} text + * @param {string} scope + */ + function emitKeyword(keyword, scope) { + if (keyword === "") return; + + emitter.startScope(scope); + emitter.addText(keyword); + emitter.endScope(); + } + /** * @param {CompiledScope} scope * @param {RegExpMatchArray} match @@ -283,7 +296,7 @@ const HLJS = function(hljs) { const klass = language.classNameAliases[scope[i]] || scope[i]; const text = match[i]; if (klass) { - emitter.addKeyword(text, klass); + emitKeyword(text, klass); } else { modeBuffer = text; processKeywords(); @@ -304,7 +317,7 @@ const HLJS = function(hljs) { if (mode.beginScope) { // beginScope just wraps the begin match itself in a scope if (mode.beginScope._wrap) { - emitter.addKeyword(modeBuffer, language.classNameAliases[mode.beginScope._wrap] || mode.beginScope._wrap); + emitKeyword(modeBuffer, language.classNameAliases[mode.beginScope._wrap] || mode.beginScope._wrap); modeBuffer = ""; } else if (mode.beginScope._multi) { // at this point modeBuffer should just be the match @@ -415,7 +428,7 @@ const HLJS = function(hljs) { const origin = top; if (top.endScope && top.endScope._wrap) { processBuffer(); - emitter.addKeyword(lexeme, top.endScope._wrap); + emitKeyword(lexeme, top.endScope._wrap); } else if (top.endScope && top.endScope._multi) { processBuffer(); emitMultiClass(top.endScope, match); @@ -558,30 +571,34 @@ const HLJS = function(hljs) { let resumeScanAtSamePosition = false; try { - top.matcher.considerAll(); - - for (;;) { - iterations++; - if (resumeScanAtSamePosition) { - // only regexes not matched previously will now be - // considered for a potential match - resumeScanAtSamePosition = false; - } else { - top.matcher.considerAll(); - } - top.matcher.lastIndex = index; + if (!language.__emitTokens) { + top.matcher.considerAll(); + + for (;;) { + iterations++; + if (resumeScanAtSamePosition) { + // only regexes not matched previously will now be + // considered for a potential match + resumeScanAtSamePosition = false; + } else { + top.matcher.considerAll(); + } + top.matcher.lastIndex = index; - const match = top.matcher.exec(codeToHighlight); - // console.log("match", match[0], match.rule && match.rule.begin) + const match = top.matcher.exec(codeToHighlight); + // console.log("match", match[0], match.rule && match.rule.begin) - if (!match) break; + if (!match) break; - const beforeMatch = codeToHighlight.substring(index, match.index); - const processedCount = processLexeme(beforeMatch, match); - index = match.index + processedCount; + const beforeMatch = codeToHighlight.substring(index, match.index); + const processedCount = processLexeme(beforeMatch, match); + index = match.index + processedCount; + } + processLexeme(codeToHighlight.substring(index)); + } else { + language.__emitTokens(codeToHighlight, emitter); } - processLexeme(codeToHighlight.substring(index)); - emitter.closeAllNodes(); + emitter.finalize(); result = emitter.toHTML(); diff --git a/src/lib/html_renderer.js b/src/lib/html_renderer.js index 51ba5a140c..c37971244d 100644 --- a/src/lib/html_renderer.js +++ b/src/lib/html_renderer.js @@ -21,7 +21,7 @@ const SPAN_CLOSE = ''; const emitsWrappingTags = (node) => { // rarely we can have a sublanguage where language is undefined // TODO: track down why - return !!node.scope || (node.sublanguage && node.language); + return !!node.scope; }; /** @@ -30,6 +30,11 @@ const emitsWrappingTags = (node) => { * @param {{prefix:string}} options */ const scopeToCSSClass = (name, { prefix }) => { + // sub-language + if (name.startsWith("language:")) { + return name.replace("language:", "language-"); + } + // tiered scope: comment.line if (name.includes(".")) { const pieces = name.split("."); return [ @@ -37,6 +42,7 @@ const scopeToCSSClass = (name, { prefix }) => { ...(pieces.map((x, i) => `${x}${"_".repeat(i + 1)}`)) ].join(" "); } + // simple scope return `${prefix}${name}`; }; @@ -69,12 +75,8 @@ export default class HTMLRenderer { openNode(node) { if (!emitsWrappingTags(node)) return; - let className = ""; - if (node.sublanguage) { - className = `language-${node.language}`; - } else { - className = scopeToCSSClass(node.scope, { prefix: this.classPrefix }); - } + const className = scopeToCSSClass(node.scope, + { prefix: this.classPrefix }); this.span(className); } diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index f6b7ba70e1..30c793a298 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -106,13 +106,11 @@ class TokenTree { Minimal interface: - - addKeyword(text, scope) - addText(text) - - addSublanguage(emitter, subLanguageName) + - __addSublanguage(emitter, subLanguageName) + - startScope(scope) + - endScope() - finalize() - - openNode(scope) - - closeNode() - - closeAllNodes() - toHTML() */ @@ -131,34 +129,31 @@ export default class TokenTreeEmitter extends TokenTree { /** * @param {string} text - * @param {string} scope */ - addKeyword(text, scope) { + addText(text) { if (text === "") { return; } - this.openNode(scope); - this.addText(text); - this.closeNode(); + this.add(text); } - /** - * @param {string} text - */ - addText(text) { - if (text === "") { return; } + /** @param {string} scope */ + startScope(scope) { + this.openNode(scope); + } - this.add(text); + endScope() { + this.closeNode(); } /** * @param {Emitter & {root: DataNode}} emitter * @param {string} name */ - addSublanguage(emitter, name) { + __addSublanguage(emitter, name) { /** @type DataNode */ const node = emitter.root; - node.sublanguage = true; - node.language = name; + if (name) node.scope = `language:${name}`; + this.add(node); } @@ -168,6 +163,7 @@ export default class TokenTreeEmitter extends TokenTree { } finalize() { + this.closeAllNodes(); return true; } }