From e53ce2852fc003b760ef8ea19dee7bb80bbc61fe Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 13 Sep 2022 11:01:39 -0400 Subject: [PATCH 1/6] private __emitTokens API --- src/highlight.js | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index b27e169320..5188b06be1 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -558,29 +558,35 @@ const HLJS = function(hljs) { let resumeScanAtSamePosition = false; try { - top.matcher.considerAll(); - - for (;;) { - iterations++; - if (resumeScanAtSamePosition) { - // only regexes not matched previously will now be - // considered for a potential match - resumeScanAtSamePosition = false; - } else { - top.matcher.considerAll(); - } - top.matcher.lastIndex = index; - const match = top.matcher.exec(codeToHighlight); - // console.log("match", match[0], match.rule && match.rule.begin) + if (!language.__emitTokens) { + top.matcher.considerAll(); + + for (;;) { + iterations++; + if (resumeScanAtSamePosition) { + // only regexes not matched previously will now be + // considered for a potential match + resumeScanAtSamePosition = false; + } else { + top.matcher.considerAll(); + } + top.matcher.lastIndex = index; - if (!match) break; + const match = top.matcher.exec(codeToHighlight); + // console.log("match", match[0], match.rule && match.rule.begin) - const beforeMatch = codeToHighlight.substring(index, match.index); - const processedCount = processLexeme(beforeMatch, match); - index = match.index + processedCount; + if (!match) break; + + const beforeMatch = codeToHighlight.substring(index, match.index); + const processedCount = processLexeme(beforeMatch, match); + index = match.index + processedCount; + } + processLexeme(codeToHighlight.substring(index)); + } else { + language.__emitTokens(codeToHighlight, emitter) } - processLexeme(codeToHighlight.substring(index)); + emitter.closeAllNodes(); emitter.finalize(); result = emitter.toHTML(); From cbf82138cd05134a2d5b8b9a6c06f3d0c1fb4175 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 13 Sep 2022 12:41:22 -0400 Subject: [PATCH 2/6] remove addKeyword from Emitter API --- src/highlight.js | 21 ++++++++++++++++----- src/lib/token_tree.js | 26 +++++++++++--------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index 5188b06be1..66ab12116e 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -224,7 +224,7 @@ const HLJS = function(hljs) { buf += match[0]; } else { const cssClass = language.classNameAliases[kind] || kind; - emitter.addKeyword(match[0], cssClass); + emitKeyword(match[0], cssClass); } } else { buf += match[0]; @@ -271,6 +271,18 @@ const HLJS = function(hljs) { modeBuffer = ''; } + /** + * @param {string} text + * @param {string} scope + */ + function emitKeyword(keyword, scope) { + if (keyword === "") return; + + emitter.startScope(scope); + emitter.addText(keyword); + emitter.endScope(); + } + /** * @param {CompiledScope} scope * @param {RegExpMatchArray} match @@ -283,7 +295,7 @@ const HLJS = function(hljs) { const klass = language.classNameAliases[scope[i]] || scope[i]; const text = match[i]; if (klass) { - emitter.addKeyword(text, klass); + emitKeyword(text, klass); } else { modeBuffer = text; processKeywords(); @@ -304,7 +316,7 @@ const HLJS = function(hljs) { if (mode.beginScope) { // beginScope just wraps the begin match itself in a scope if (mode.beginScope._wrap) { - emitter.addKeyword(modeBuffer, language.classNameAliases[mode.beginScope._wrap] || mode.beginScope._wrap); + emitKeyword(modeBuffer, language.classNameAliases[mode.beginScope._wrap] || mode.beginScope._wrap); modeBuffer = ""; } else if (mode.beginScope._multi) { // at this point modeBuffer should just be the match @@ -415,7 +427,7 @@ const HLJS = function(hljs) { const origin = top; if (top.endScope && top.endScope._wrap) { processBuffer(); - emitter.addKeyword(lexeme, top.endScope._wrap); + emitKeyword(lexeme, top.endScope._wrap); } else if (top.endScope && top.endScope._multi) { processBuffer(); emitMultiClass(top.endScope, match); @@ -587,7 +599,6 @@ const HLJS = function(hljs) { language.__emitTokens(codeToHighlight, emitter) } - emitter.closeAllNodes(); emitter.finalize(); result = emitter.toHTML(); diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index f6b7ba70e1..f7ab38541f 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -106,13 +106,11 @@ class TokenTree { Minimal interface: - - addKeyword(text, scope) - addText(text) - addSublanguage(emitter, subLanguageName) + - startScope(scope) + - endScope() - finalize() - - openNode(scope) - - closeNode() - - closeAllNodes() - toHTML() */ @@ -131,23 +129,20 @@ export default class TokenTreeEmitter extends TokenTree { /** * @param {string} text - * @param {string} scope */ - addKeyword(text, scope) { + addText(text) { if (text === "") { return; } - this.openNode(scope); - this.addText(text); - this.closeNode(); + this.add(text); } - /** - * @param {string} text - */ - addText(text) { - if (text === "") { return; } + /** @param {string} scope */ + startScope(scope) { + this.openNode(scope); + } - this.add(text); + endScope() { + this.closeNode() } /** @@ -168,6 +163,7 @@ export default class TokenTreeEmitter extends TokenTree { } finalize() { + this.closeAllNodes() return true; } } From f418acaa2a4b6d21e28797b7f9e77ebb262934f7 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Wed, 14 Sep 2022 02:46:37 -0400 Subject: [PATCH 3/6] use language: namespace scope prefix to handle sublanguages --- src/highlight.js | 2 +- src/lib/html_renderer.js | 16 +++++++++------- src/lib/token_tree.js | 8 ++++---- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index 66ab12116e..83fa037a51 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -259,7 +259,7 @@ const HLJS = function(hljs) { if (top.relevance > 0) { relevance += result.relevance; } - emitter.addSublanguage(result._emitter, result.language); + emitter.__addSublanguage(result._emitter, result.language); } function processBuffer() { diff --git a/src/lib/html_renderer.js b/src/lib/html_renderer.js index 51ba5a140c..48444ef8c9 100644 --- a/src/lib/html_renderer.js +++ b/src/lib/html_renderer.js @@ -21,7 +21,7 @@ const SPAN_CLOSE = ''; const emitsWrappingTags = (node) => { // rarely we can have a sublanguage where language is undefined // TODO: track down why - return !!node.scope || (node.sublanguage && node.language); + return !!node.scope; }; /** @@ -30,6 +30,11 @@ const emitsWrappingTags = (node) => { * @param {{prefix:string}} options */ const scopeToCSSClass = (name, { prefix }) => { + // sub-language + if (name.startsWith("language:")) { + return name.replace("language:","language-") + } + // tiered scope: comment.line if (name.includes(".")) { const pieces = name.split("."); return [ @@ -37,6 +42,7 @@ const scopeToCSSClass = (name, { prefix }) => { ...(pieces.map((x, i) => `${x}${"_".repeat(i + 1)}`)) ].join(" "); } + // simple scope return `${prefix}${name}`; }; @@ -69,12 +75,8 @@ export default class HTMLRenderer { openNode(node) { if (!emitsWrappingTags(node)) return; - let className = ""; - if (node.sublanguage) { - className = `language-${node.language}`; - } else { - className = scopeToCSSClass(node.scope, { prefix: this.classPrefix }); - } + const className = scopeToCSSClass(node.scope, + { prefix: this.classPrefix }); this.span(className); } diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index f7ab38541f..9ae4ffe8f4 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -107,7 +107,7 @@ class TokenTree { Minimal interface: - addText(text) - - addSublanguage(emitter, subLanguageName) + - __addSublanguage(emitter, subLanguageName) - startScope(scope) - endScope() - finalize() @@ -149,11 +149,11 @@ export default class TokenTreeEmitter extends TokenTree { * @param {Emitter & {root: DataNode}} emitter * @param {string} name */ - addSublanguage(emitter, name) { + __addSublanguage(emitter, name) { /** @type DataNode */ const node = emitter.root; - node.sublanguage = true; - node.language = name; + if (name) node.scope = `language:${name}` + this.add(node); } From a662a6cd946dcbc6d2fa9755fb778c3977a483a6 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Wed, 14 Sep 2022 03:00:43 -0400 Subject: [PATCH 4/6] linting --- src/highlight.js | 4 ++-- src/lib/html_renderer.js | 4 ++-- src/lib/token_tree.js | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index 83fa037a51..d0ae2a850f 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -15,6 +15,7 @@ import * as packageJSON from '../package.json'; import * as logger from "./lib/logger.js"; import HTMLInjectionError from "./lib/html_injection_error.js"; + /** @typedef {import('highlight.js').Mode} Mode @typedef {import('highlight.js').CompiledMode} CompiledMode @@ -570,7 +571,6 @@ const HLJS = function(hljs) { let resumeScanAtSamePosition = false; try { - if (!language.__emitTokens) { top.matcher.considerAll(); @@ -596,7 +596,7 @@ const HLJS = function(hljs) { } processLexeme(codeToHighlight.substring(index)); } else { - language.__emitTokens(codeToHighlight, emitter) + language.__emitTokens(codeToHighlight, emitter); } emitter.finalize(); diff --git a/src/lib/html_renderer.js b/src/lib/html_renderer.js index 48444ef8c9..c37971244d 100644 --- a/src/lib/html_renderer.js +++ b/src/lib/html_renderer.js @@ -32,7 +32,7 @@ const emitsWrappingTags = (node) => { const scopeToCSSClass = (name, { prefix }) => { // sub-language if (name.startsWith("language:")) { - return name.replace("language:","language-") + return name.replace("language:", "language-"); } // tiered scope: comment.line if (name.includes(".")) { @@ -76,7 +76,7 @@ export default class HTMLRenderer { if (!emitsWrappingTags(node)) return; const className = scopeToCSSClass(node.scope, - { prefix: this.classPrefix }); + { prefix: this.classPrefix }); this.span(className); } diff --git a/src/lib/token_tree.js b/src/lib/token_tree.js index 9ae4ffe8f4..30c793a298 100644 --- a/src/lib/token_tree.js +++ b/src/lib/token_tree.js @@ -142,7 +142,7 @@ export default class TokenTreeEmitter extends TokenTree { } endScope() { - this.closeNode() + this.closeNode(); } /** @@ -152,7 +152,7 @@ export default class TokenTreeEmitter extends TokenTree { __addSublanguage(emitter, name) { /** @type DataNode */ const node = emitter.root; - if (name) node.scope = `language:${name}` + if (name) node.scope = `language:${name}`; this.add(node); } @@ -163,7 +163,7 @@ export default class TokenTreeEmitter extends TokenTree { } finalize() { - this.closeAllNodes() + this.closeAllNodes(); return true; } } From 5c182ac13a60dc7507c4fe6ce2996ca93b3e2edc Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Wed, 14 Sep 2022 04:01:50 -0400 Subject: [PATCH 5/6] add emitTokens docs --- docs/mode-reference.rst | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/docs/mode-reference.rst b/docs/mode-reference.rst index 960b999fd5..764e44ac38 100644 --- a/docs/mode-reference.rst +++ b/docs/mode-reference.rst @@ -105,6 +105,52 @@ Disables autodetection for this language. (defaults to false, meaning auto-detect is enabled) +__emitTokens +^^^^^^^^^^^^ + +.. warning:: + + **This is currently still private/beta API**, though it's expected to be fairly stable. + + It should land in version 12.0. + +Allows grammars to bundle custom parsers - bypassing the default parser and grammar mode definitions. This should be a function that accepts the raw source code as the first argument and an "Emitter" object as the second. + +A custom parser may parse the source as it sees fit - making calls to the Emitter along the way - allowing Highlight.js to generate and theme the final HTML. + +The **Emitter** API is trivial: + +- ``addText(text)`` +- ``startScope(name)`` +- ``endScope()`` + +Given: + +:: + + hello beautiful world! + + +Assuming beautiful is a keyword our Emitter calls might look something like: + +:: + + addText("hello ") + startScope("keyword") + addText("beautiful") + endScope() + addText(" world!") + +Resulting in the following generated HTML: + +.. code-block:: html + + hello beautiful world! + +.. note:: + + The intended use of ``addText`` is larger chunks of plain text, not individual characters. Custom parsers should buffer plain text output into complete strings rather than sending output one character at a time. + compilerExtensions ^^^^^^^^^^^^^^^^^^ From 2211b778357f9e4e20c04fdddb33dbe85bf947a5 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sun, 19 Mar 2023 16:44:32 -0400 Subject: [PATCH 6/6] add changelog entry --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index e3f59f7aec..cc8ba35a0c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ Improvements: - added a function to default export to generate a fresh highlighter instance to be used by extensions [WisamMechano][] +- added BETA `__emitTokens` key to grammars to allow then to direct their own parsing, only using Highlight.js for the HTML rendering [Josh Goebel][] New Grammars: @@ -24,6 +25,7 @@ Parser: - add removePlugin api [faga295][] +[Josh Goebel]: https://github.com/joshgoebel [Timur Kamaev]: https://github.com/doiftrue [Leopard20]: https://github.com/Leopard20/ [WisamMechano]: https://github.com/wisammechano