diff --git a/lib/model/content.dart b/lib/model/content.dart index c98a5ceaaf..e228163a2e 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -786,7 +786,7 @@ class MathInlineNode extends InlineContentNode { class GlobalTimeNode extends InlineContentNode { const GlobalTimeNode({super.debugHtmlNode, required this.datetime}); - /// Always in UTC, enforced in [_ZulipContentParser.parseInlineContent]. + /// Always in UTC, enforced in [_ZulipInlineContentParser.parseInlineContent]. final DateTime datetime; @override @@ -806,72 +806,68 @@ class GlobalTimeNode extends InlineContentNode { //////////////////////////////////////////////////////////////// -/// What sort of nodes a [_ZulipContentParser] is currently expecting to find. -enum _ParserContext { - /// The parser is currently looking for block nodes. - block, +String? _parseMath(dom.Element element, {required bool block}) { + final dom.Element katexElement; + if (!block) { + assert(element.localName == 'span' && element.className == 'katex'); - /// The parser is currently looking for inline nodes. - inline, -} - -class _ZulipContentParser { - /// The current state of what sort of nodes the parser is looking for. - /// - /// This exists for the sake of debug-mode checks, - /// and should be read or updated only inside an assertion. - _ParserContext _debugParserContext = _ParserContext.block; - - String? parseMath(dom.Element element, {required bool block}) { - assert(block == (_debugParserContext == _ParserContext.block)); - - final dom.Element katexElement; - if (!block) { - assert(element.localName == 'span' && element.className == 'katex'); + katexElement = element; + } else { + assert(element.localName == 'span' && element.className == 'katex-display'); - katexElement = element; - } else { - assert(element.localName == 'span' && element.className == 'katex-display'); - - if (element.nodes.length != 1) return null; - final child = element.nodes.single; - if (child is! dom.Element) return null; - if (child.localName != 'span') return null; - if (child.className != 'katex') return null; - katexElement = child; - } - - // Expect two children span.katex-mathml, span.katex-html . - // For now we only care about the .katex-mathml . - if (katexElement.nodes.isEmpty) return null; - final child = katexElement.nodes.first; + if (element.nodes.length != 1) return null; + final child = element.nodes.single; if (child is! dom.Element) return null; if (child.localName != 'span') return null; - if (child.className != 'katex-mathml') return null; - - if (child.nodes.length != 1) return null; - final grandchild = child.nodes.single; - if (grandchild is! dom.Element) return null; - if (grandchild.localName != 'math') return null; - if (grandchild.attributes['display'] != (block ? 'block' : null)) return null; - if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML') return null; - - if (grandchild.nodes.length != 1) return null; - final greatgrand = grandchild.nodes.single; - if (greatgrand is! dom.Element) return null; - if (greatgrand.localName != 'semantics') return null; - - if (greatgrand.nodes.isEmpty) return null; - final descendant4 = greatgrand.nodes.last; - if (descendant4 is! dom.Element) return null; - if (descendant4.localName != 'annotation') return null; - if (descendant4.attributes['encoding'] != 'application/x-tex') return null; - - return descendant4.text.trim(); + if (child.className != 'katex') return null; + katexElement = child; + } + + // Expect two children span.katex-mathml, span.katex-html . + // For now we only care about the .katex-mathml . + if (katexElement.nodes.isEmpty) return null; + final child = katexElement.nodes.first; + if (child is! dom.Element) return null; + if (child.localName != 'span') return null; + if (child.className != 'katex-mathml') return null; + + if (child.nodes.length != 1) return null; + final grandchild = child.nodes.single; + if (grandchild is! dom.Element) return null; + if (grandchild.localName != 'math') return null; + if (grandchild.attributes['display'] != (block ? 'block' : null)) return null; + if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML') return null; + + if (grandchild.nodes.length != 1) return null; + final greatgrand = grandchild.nodes.single; + if (greatgrand is! dom.Element) return null; + if (greatgrand.localName != 'semantics') return null; + + if (greatgrand.nodes.isEmpty) return null; + final descendant4 = greatgrand.nodes.last; + if (descendant4 is! dom.Element) return null; + if (descendant4.localName != 'annotation') return null; + if (descendant4.attributes['encoding'] != 'application/x-tex') return null; + + return descendant4.text.trim(); +} + +/// Parser for the inline-content subtrees within Zulip content HTML. +/// +/// The only entry point to this class is [parseBlockInline]. +/// +/// After a call to [parseBlockInline] returns, the [_ZulipInlineContentParser] +/// instance has been reset to its starting state, and can be re-used for +/// parsing other subtrees. +class _ZulipInlineContentParser { + InlineContentNode? parseInlineMath(dom.Element element) { + final debugHtmlNode = kDebugMode ? element : null; + final texSource = _parseMath(element, block: false); + if (texSource == null) return null; + return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode); } UserMentionNode? parseUserMention(dom.Element element) { - assert(_debugParserContext == _ParserContext.inline); assert(element.localName == 'span'); final debugHtmlNode = kDebugMode ? element : null; @@ -945,7 +941,6 @@ class _ZulipContentParser { static final _emojiCodeFromClassNameRegexp = RegExp(r"emoji-([^ ]+)"); InlineContentNode parseInlineContent(dom.Node node) { - assert(_debugParserContext == _ParserContext.inline); final debugHtmlNode = kDebugMode ? node : null; InlineContentNode unimplemented() => UnimplementedInlineContentNode(htmlNode: node); @@ -1025,9 +1020,7 @@ class _ZulipContentParser { } if (localName == 'span' && className == 'katex') { - final texSource = parseMath(element, block: false); - if (texSource == null) return unimplemented(); - return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode); + return parseInlineMath(element) ?? unimplemented(); } // TODO more types of node @@ -1035,26 +1028,41 @@ class _ZulipContentParser { } List parseInlineContentList(List nodes) { - assert(_debugParserContext == _ParserContext.inline); return nodes.map(parseInlineContent).toList(growable: false); } + /// Parse the children of a [BlockInlineContainerNode], making up a + /// complete subtree of inline content with no further inline ancestors. ({List nodes, List? links}) parseBlockInline(List nodes) { - assert(_debugParserContext == _ParserContext.block); - assert(() { - _debugParserContext = _ParserContext.inline; - return true; - }()); final resultNodes = parseInlineContentList(nodes); - assert(() { - _debugParserContext = _ParserContext.block; - return true; - }()); return (nodes: resultNodes, links: _takeLinkNodes()); } +} + +/// Parser for a complete piece of Zulip HTML content, a [ZulipContent]. +/// +/// The only entry point to this class is [parse]. +class _ZulipContentParser { + /// The single inline-content parser used and re-used throughout parsing of + /// a complete piece of Zulip HTML content. + /// + /// Because block content can never appear nested inside inline content, + /// there's never a need for more than one of these at a time, + /// so we can allocate just one up front. + final inlineParser = _ZulipInlineContentParser(); + + ({List nodes, List? links}) parseBlockInline(List nodes) { + return inlineParser.parseBlockInline(nodes); + } + + BlockContentNode parseMathBlock(dom.Element element) { + final debugHtmlNode = kDebugMode ? element : null; + final texSource = _parseMath(element, block: true); + if (texSource == null) return UnimplementedBlockContentNode(htmlNode: element); + return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode); + } BlockContentNode parseListNode(dom.Element element) { - assert(_debugParserContext == _ParserContext.block); ListStyle? listStyle; switch (element.localName) { case 'ol': listStyle = ListStyle.ordered; break; @@ -1077,7 +1085,6 @@ class _ZulipContentParser { } BlockContentNode parseSpoilerNode(dom.Element divElement) { - assert(_debugParserContext == _ParserContext.block); assert(divElement.localName == 'div' && divElement.className == 'spoiler-block'); @@ -1097,7 +1104,6 @@ class _ZulipContentParser { } BlockContentNode parseCodeBlock(dom.Element divElement) { - assert(_debugParserContext == _ParserContext.block); final mainElement = () { assert(divElement.localName == 'div' && divElement.className == "codehilite"); @@ -1180,7 +1186,6 @@ class _ZulipContentParser { static final _imageDimensionsRegExp = RegExp(r'^(\d+)x(\d+)$'); BlockContentNode parseImageNode(dom.Element divElement) { - assert(_debugParserContext == _ParserContext.block); final elements = () { assert(divElement.localName == 'div' && divElement.className == 'message_inline_image'); @@ -1272,7 +1277,6 @@ class _ZulipContentParser { }(); BlockContentNode parseInlineVideoNode(dom.Element divElement) { - assert(_debugParserContext == _ParserContext.block); assert(divElement.localName == 'div' && _videoClassNameRegexp.hasMatch(divElement.className)); @@ -1305,7 +1309,6 @@ class _ZulipContentParser { } BlockContentNode parseEmbedVideoNode(dom.Element divElement) { - assert(_debugParserContext == _ParserContext.block); assert(divElement.localName == 'div' && _videoClassNameRegexp.hasMatch(divElement.className)); @@ -1344,7 +1347,6 @@ class _ZulipContentParser { } BlockContentNode parseTableContent(dom.Element tableElement) { - assert(_debugParserContext == _ParserContext.block); assert(tableElement.localName == 'table' && tableElement.className.isEmpty); @@ -1452,7 +1454,6 @@ class _ZulipContentParser { } BlockContentNode parseBlockContent(dom.Node node) { - assert(_debugParserContext == _ParserContext.block); final debugHtmlNode = kDebugMode ? node : null; if (node is! dom.Element) { return UnimplementedBlockContentNode(htmlNode: node); @@ -1480,9 +1481,7 @@ class _ZulipContentParser { // The case with the `
\n` can happen when at the end of a quote; // it seems like a glitch in the server's Markdown processing, // so hopefully there just aren't any further such glitches. - final texSource = parseMath(child, block: true); - if (texSource == null) return UnimplementedBlockContentNode(htmlNode: node); - return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode); + return parseMathBlock(child); } } } @@ -1579,10 +1578,15 @@ class _ZulipContentParser { /// /// See [ParagraphNode]. List parseImplicitParagraphBlockContentList(dom.NodeList nodes) { - assert(_debugParserContext == _ParserContext.block); final List result = []; - final List currentParagraph = []; + List imageNodes = []; + void consumeImageNodes() { + result.add(ImageNodeList(imageNodes)); + imageNodes = []; + } + + final List currentParagraph = []; void consumeParagraph() { final parsed = parseBlockInline(currentParagraph); result.add(ParagraphNode( @@ -1597,8 +1601,7 @@ class _ZulipContentParser { if (_isPossibleInlineNode(node)) { if (imageNodes.isNotEmpty) { - result.add(ImageNodeList(imageNodes)); - imageNodes = []; + consumeImageNodes(); // In a context where paragraphs are implicit it should be impossible // to have more paragraph content after image previews. result.add(UnimplementedBlockContentNode(htmlNode: node)); @@ -1613,24 +1616,25 @@ class _ZulipContentParser { imageNodes.add(block); continue; } - if (imageNodes.isNotEmpty) { - result.add(ImageNodeList(imageNodes)); - imageNodes = []; - } + if (imageNodes.isNotEmpty) consumeImageNodes(); result.add(block); } if (currentParagraph.isNotEmpty) consumeParagraph(); - if (imageNodes.isNotEmpty) result.add(ImageNodeList(imageNodes)); - + if (imageNodes.isNotEmpty) consumeImageNodes(); return result; } static final _redundantLineBreaksRegexp = RegExp(r'^\n+$'); List parseBlockContentList(dom.NodeList nodes) { - assert(_debugParserContext == _ParserContext.block); final List result = []; + List imageNodes = []; + void consumeImageNodes() { + result.add(ImageNodeList(imageNodes)); + imageNodes = []; + } + for (final node in nodes) { // We get a bunch of newline Text nodes between paragraphs. // A browser seems to ignore these; let's do the same. @@ -1643,13 +1647,10 @@ class _ZulipContentParser { imageNodes.add(block); continue; } - if (imageNodes.isNotEmpty) { - result.add(ImageNodeList(imageNodes)); - imageNodes = []; - } + if (imageNodes.isNotEmpty) consumeImageNodes(); result.add(block); } - if (imageNodes.isNotEmpty) result.add(ImageNodeList(imageNodes)); + if (imageNodes.isNotEmpty) consumeImageNodes(); return result; } @@ -1660,6 +1661,8 @@ class _ZulipContentParser { } } +/// Parse a complete piece of Zulip HTML content, +/// such as an entire value of [Message.content]. ZulipContent parseContent(String html) { return _ZulipContentParser().parse(html); }