@@ -17,6 +17,8 @@ limitations under the License.
1717
1818import * as commonmark from 'commonmark' ;
1919import { escape } from "lodash" ;
20+ import { logger } from 'matrix-js-sdk/src/logger' ;
21+ import * as linkify from 'linkifyjs' ;
2022
2123const ALLOWED_HTML_TAGS = [ 'sub' , 'sup' , 'del' , 'u' ] ;
2224
@@ -29,6 +31,9 @@ interface CommonmarkHtmlRendererInternal extends commonmark.HtmlRenderer {
2931 link : ( node : commonmark . Node , entering : boolean ) => void ;
3032 html_inline : ( node : commonmark . Node ) => void ; // eslint-disable-line camelcase
3133 html_block : ( node : commonmark . Node ) => void ; // eslint-disable-line camelcase
34+ text : ( node : commonmark . Node ) => void ;
35+ out : ( text : string ) => void ;
36+ emph : ( node : commonmark . Node ) => void ;
3237}
3338
3439function isAllowedHtmlTag ( node : commonmark . Node ) : boolean {
@@ -61,6 +66,33 @@ function isMultiLine(node: commonmark.Node): boolean {
6166 return par . firstChild != par . lastChild ;
6267}
6368
69+ function getTextUntilEndOrLinebreak ( node : commonmark . Node ) {
70+ let currentNode = node ;
71+ let text = '' ;
72+ while ( currentNode !== null && currentNode . type !== 'softbreak' && currentNode . type !== 'linebreak' ) {
73+ const { literal, type } = currentNode ;
74+ if ( type === 'text' && literal ) {
75+ let n = 0 ;
76+ let char = literal [ n ] ;
77+ while ( char !== ' ' && char !== null && n <= literal . length ) {
78+ if ( char === ' ' ) {
79+ break ;
80+ }
81+ if ( char ) {
82+ text += char ;
83+ }
84+ n += 1 ;
85+ char = literal [ n ] ;
86+ }
87+ if ( char === ' ' ) {
88+ break ;
89+ }
90+ }
91+ currentNode = currentNode . next ;
92+ }
93+ return text ;
94+ }
95+
6496/**
6597 * Class that wraps commonmark, adding the ability to see whether
6698 * a given message actually uses any markdown syntax or whether
@@ -70,11 +102,87 @@ export default class Markdown {
70102 private input : string ;
71103 private parsed : commonmark . Node ;
72104
73- constructor ( input ) {
105+ constructor ( input : string ) {
74106 this . input = input ;
75107
76108 const parser = new commonmark . Parser ( ) ;
77109 this . parsed = parser . parse ( this . input ) ;
110+ this . parsed = this . escapeLinks ( this . parsed ) ;
111+ }
112+
113+ private escapeLinks ( parsed : commonmark . Node ) {
114+ const walker = parsed . walker ( ) ;
115+ let event : commonmark . NodeWalkingStep = null ;
116+ let text = '' ;
117+ let isInPara = false ;
118+ let previousNode : commonmark . Node | null = null ;
119+ while ( ( event = walker . next ( ) ) ) {
120+ const { node } = event ;
121+ if ( node . type === 'paragraph' ) {
122+ if ( event . entering ) {
123+ isInPara = true ;
124+ } else {
125+ isInPara = false ;
126+ }
127+ }
128+ if ( isInPara ) {
129+ // Clear saved string when line ends
130+ if (
131+ node . type === 'softbreak' ||
132+ node . type === 'linebreak' ||
133+ // Also start calculating the text from the beginning on any spaces
134+ ( node . type === 'text' && node . literal === ' ' )
135+ ) {
136+ text = '' ;
137+ }
138+ if ( node . type === 'text' ) {
139+ text += node . literal ;
140+ }
141+ // We should not do this if previous node was not a textnode, as we can't combine it then.
142+ if ( node . type === 'emph' && previousNode . type === 'text' ) {
143+ if ( event . entering ) {
144+ const foundLinks = linkify . find ( text ) ;
145+ for ( const { value } of foundLinks ) {
146+ if ( node . firstChild . literal ) {
147+ /**
148+ * NOTE: This technically should unlink the emph node and create LINK nodes instead, adding all the next elements as siblings
149+ * but this solution seems to work well and is hopefully slightly easier to understand too
150+ */
151+ const nonEmphasizedText = `_${ node . firstChild . literal } _` ;
152+ const f = getTextUntilEndOrLinebreak ( node ) ;
153+ const newText = value + nonEmphasizedText + f ;
154+ const newLinks = linkify . find ( newText ) ;
155+ // Should always find only one link here, if it finds more it means that the algorithm is broken
156+ if ( newLinks . length === 1 ) {
157+ const emphasisTextNode = new commonmark . Node ( 'text' ) ;
158+ emphasisTextNode . literal = nonEmphasizedText ;
159+ previousNode . insertAfter ( emphasisTextNode ) ;
160+ node . firstChild . literal = '' ;
161+ // Empty string is a sign that we should ignore it in HTML rendering
162+ node . literal = '' ;
163+ } else {
164+ logger . error (
165+ "Markdown links escaping found too many links for following text: " ,
166+ text ,
167+ ) ;
168+ logger . error (
169+ "Markdown links escaping found too many links for modified text: " ,
170+ newText ,
171+ ) ;
172+ }
173+ }
174+ }
175+ } else {
176+ node . firstChild . literal = '' ;
177+ // Empty string is a sign that we should ignore it in HTML rendering
178+ node . literal = '' ;
179+ // node.unlink();
180+ }
181+ }
182+ }
183+ previousNode = node ;
184+ }
185+ return parsed ;
78186 }
79187
80188 isPlainText ( ) : boolean {
@@ -120,9 +228,7 @@ export default class Markdown {
120228 // you can nest them.
121229 //
122230 // Let's try sending with <p/>s anyway for now, though.
123-
124231 const realParagraph = renderer . paragraph ;
125-
126232 renderer . paragraph = function ( node : commonmark . Node , entering : boolean ) {
127233 // If there is only one top level node, just return the
128234 // bare text: it's a single line of text and so should be
@@ -133,6 +239,9 @@ export default class Markdown {
133239 realParagraph . call ( this , node , entering ) ;
134240 }
135241 } ;
242+ renderer . text = ( node : commonmark . Node ) => {
243+ renderer . out ( node . literal ) ;
244+ } ;
136245
137246 renderer . link = function ( node , entering ) {
138247 const attrs = this . attrs ( node ) ;
@@ -165,7 +274,7 @@ export default class Markdown {
165274 renderer . html_block = function ( node : commonmark . Node ) {
166275 /*
167276 // as with `paragraph`, we only insert line breaks
168- // if there are multiple lines in the markdown .
277+ // if there are multiple lines in the markdownode .
169278 const isMultiLine = is_multi_line(node);
170279 if (isMultiLine) this.cr();
171280 */
@@ -175,6 +284,17 @@ export default class Markdown {
175284 */
176285 } ;
177286
287+ const realEmph = renderer . emph ;
288+
289+ renderer . emph = function ( node : commonmark . Node ) {
290+ // We're escaping links with emphasis in the middle of it to act like a real link
291+ // This empty string check here is to verify that we have modified the emphasis node properly
292+ if ( node . type === 'emph' && node . literal === "" ) {
293+ return ;
294+ }
295+ return realEmph ( node ) ;
296+ } ;
297+
178298 return renderer . render ( this . parsed ) ;
179299 }
180300
@@ -184,7 +304,7 @@ export default class Markdown {
184304 * markdown syntax
185305 * (to fix https://github.com/vector-im/element-web/issues/2870).
186306 *
187- * N .B. this does **NOT** render arbitrary MD to plain text - only MD
307+ * node .B. this does **NOT** render arbitrary MD to plain text - only MD
188308 * which has no formatting. Otherwise it emits HTML(!).
189309 */
190310 toPlaintext ( ) : string {
@@ -205,6 +325,16 @@ export default class Markdown {
205325 if ( isMultiLine ( node ) && node . next ) this . lit ( '\n\n' ) ;
206326 } ;
207327
328+ const realEmph = renderer . emph ;
329+ renderer . emph = function ( node : commonmark . Node ) {
330+ // We're escaping links with emphasis in the middle of it to act like a real link
331+ // This empty string check here is to verify that we have modified the emphasis node properly
332+ if ( node . type === 'emph' && node . literal === "" ) {
333+ return ;
334+ }
335+ return realEmph ( node ) ;
336+ } ;
337+
208338 return renderer . render ( this . parsed ) ;
209339 }
210340}
0 commit comments