2727#define CODE_INDENT 4
2828#define TAB_STOP 4
2929
30+ /**
31+ * Very deeply nested lists can cause quadratic performance issues.
32+ * This constant is used in open_new_blocks() to limit the nesting
33+ * depth. It is unlikely that a non-contrived markdown document will
34+ * be nested this deeply.
35+ */
36+ #define MAX_LIST_DEPTH 100
37+
3038#ifndef MIN
3139#define MIN (x , y ) ((x < y) ? x : y)
3240#endif
@@ -70,22 +78,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
7078static void S_process_line (cmark_parser * parser , const unsigned char * buffer ,
7179 bufsize_t bytes );
7280
73- static void subtract_open_block_counts (cmark_parser * parser , cmark_node * node ) {
74- do {
75- decr_open_block_count (parser , S_type (node ));
76- node -> flags &= ~CMARK_NODE__OPEN_BLOCK ;
77- node = node -> last_child ;
78- } while (node );
79- }
80-
81- static void add_open_block_counts (cmark_parser * parser , cmark_node * node ) {
82- do {
83- incr_open_block_count (parser , S_type (node ));
84- node -> flags |= CMARK_NODE__OPEN_BLOCK ;
85- node = node -> last_child ;
86- } while (node );
87- }
88-
8981static cmark_node * make_block (cmark_mem * mem , cmark_node_type tag ,
9082 int start_line , int start_column ) {
9183 cmark_node * e ;
@@ -145,7 +137,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
145137 parser -> refmap = cmark_reference_map_new (parser -> mem );
146138 parser -> root = document ;
147139 parser -> current = document ;
148- add_open_block_counts (parser , document );
149140
150141 parser -> syntax_extensions = saved_exts ;
151142 parser -> inline_syntax_extensions = saved_inline_exts ;
@@ -259,18 +250,15 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
259250// Check to see if a node ends with a blank line, descending
260251// if needed into lists and sublists.
261252static bool S_ends_with_blank_line (cmark_node * node ) {
262- while (true) {
263- if (S_last_line_checked (node )) {
264- return (S_last_line_blank (node ));
265- } else if ((S_type (node ) == CMARK_NODE_LIST ||
266- S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
267- S_set_last_line_checked (node );
268- node = node -> last_child ;
269- continue ;
270- } else {
271- S_set_last_line_checked (node );
272- return (S_last_line_blank (node ));
273- }
253+ if (S_last_line_checked (node )) {
254+ return (S_last_line_blank (node ));
255+ } else if ((S_type (node ) == CMARK_NODE_LIST ||
256+ S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
257+ S_set_last_line_checked (node );
258+ return (S_ends_with_blank_line (node -> last_child ));
259+ } else {
260+ S_set_last_line_checked (node );
261+ return (S_last_line_blank (node ));
274262 }
275263}
276264
@@ -330,12 +318,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
330318 has_content = resolve_reference_link_definitions (parser , b );
331319 if (!has_content ) {
332320 // remove blank node (former reference def)
333- if (b -> flags & CMARK_NODE__OPEN_BLOCK ) {
334- decr_open_block_count (parser , S_type (b ));
335- if (b -> prev ) {
336- add_open_block_counts (parser , b -> prev );
337- }
338- }
339321 cmark_node_free (b );
340322 }
341323 break ;
@@ -408,17 +390,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
408390 return parent ;
409391}
410392
411- // Recalculates the number of open blocks. Returns true if it matches what's currently stored
412- // in parser. (Used to check that the counts in parser, which are updated incrementally, are
413- // correct.)
414- bool check_open_block_counts (cmark_parser * parser ) {
415- cmark_parser tmp_parser = {0 }; // Only used for its open_block_counts and total_open_blocks fields.
416- add_open_block_counts (& tmp_parser , parser -> root );
417- return
418- tmp_parser .total_open_blocks == parser -> total_open_blocks &&
419- memcmp (tmp_parser .open_block_counts , parser -> open_block_counts , sizeof (parser -> open_block_counts )) == 0 ;
420- }
421-
422393// Add a node as child of another. Return pointer to child.
423394static cmark_node * add_child (cmark_parser * parser , cmark_node * parent ,
424395 cmark_node_type block_type , int start_column ) {
@@ -437,14 +408,11 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
437408 if (parent -> last_child ) {
438409 parent -> last_child -> next = child ;
439410 child -> prev = parent -> last_child ;
440- subtract_open_block_counts (parser , parent -> last_child );
441411 } else {
442412 parent -> first_child = child ;
443413 child -> prev = NULL ;
444414 }
445415 parent -> last_child = child ;
446- add_open_block_counts (parser , child );
447-
448416 return child ;
449417}
450418
@@ -1087,14 +1055,8 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
10871055 * all_matched = false;
10881056 cmark_node * container = parser -> root ;
10891057 cmark_node_type cont_type ;
1090- cmark_parser tmp_parser ; // Only used for its open_block_counts and total_open_blocks fields.
1091- memcpy (tmp_parser .open_block_counts , parser -> open_block_counts , sizeof (parser -> open_block_counts ));
1092- tmp_parser .total_open_blocks = parser -> total_open_blocks ;
1093-
1094- assert (check_open_block_counts (parser ));
10951058
10961059 while (S_last_child_is_open (container )) {
1097- decr_open_block_count (& tmp_parser , S_type (container ));
10981060 container = container -> last_child ;
10991061 cont_type = S_type (container );
11001062
@@ -1106,53 +1068,6 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
11061068 continue ;
11071069 }
11081070
1109- // This block of code is a workaround for the quadratic performance
1110- // issue described here (issue 2):
1111- //
1112- // https://github.com/github/cmark-gfm/security/advisories/GHSA-66g8-4hjf-77xh
1113- //
1114- // If the current line is empty then we might be able to skip directly
1115- // to the end of the list of open blocks. To determine whether this is
1116- // possible, we have been maintaining a count of the number of
1117- // different types of open blocks. The main criterium is that every
1118- // remaining block, except the last element of the list, is a LIST or
1119- // ITEM. The code below checks the conditions, and if they're ok, skips
1120- // forward to parser->current.
1121- if (parser -> blank && parser -> indent == 0 ) { // Current line is empty
1122- // Make sure that parser->current doesn't point to a closed block.
1123- if (parser -> current -> flags & CMARK_NODE__OPEN_BLOCK ) {
1124- if (parser -> current -> flags & CMARK_NODE__OPEN ) {
1125- const size_t n_list = read_open_block_count (& tmp_parser , CMARK_NODE_LIST );
1126- const size_t n_item = read_open_block_count (& tmp_parser , CMARK_NODE_ITEM );
1127- // At most one block can be something other than a LIST or ITEM.
1128- if (n_list + n_item + 1 >= tmp_parser .total_open_blocks ) {
1129- // Check that parser->current is suitable for jumping to.
1130- switch (S_type (parser -> current )) {
1131- case CMARK_NODE_LIST :
1132- case CMARK_NODE_ITEM :
1133- if (n_list + n_item != tmp_parser .total_open_blocks ) {
1134- if (parser -> current -> last_child == NULL ) {
1135- // There's another node type somewhere in the middle of
1136- // the list, so don't attempt the optimization.
1137- break ;
1138- }
1139- }
1140- // fall through
1141- case CMARK_NODE_CODE_BLOCK :
1142- case CMARK_NODE_PARAGRAPH :
1143- case CMARK_NODE_HTML_BLOCK :
1144- // Jump to parser->current
1145- container = parser -> current ;
1146- cont_type = S_type (container );
1147- break ;
1148- default :
1149- break ;
1150- }
1151- }
1152- }
1153- }
1154- }
1155-
11561071 switch (cont_type ) {
11571072 case CMARK_NODE_BLOCK_QUOTE :
11581073 if (!parse_block_quote_prefix (parser , input ))
@@ -1212,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
12121127 bool has_content ;
12131128 int save_offset ;
12141129 int save_column ;
1130+ size_t depth = 0 ;
12151131
12161132 while (cont_type != CMARK_NODE_CODE_BLOCK &&
12171133 cont_type != CMARK_NODE_HTML_BLOCK ) {
1218-
1134+ depth ++ ;
12191135 S_find_first_nonspace (parser , input );
12201136 indented = parser -> indent >= CODE_INDENT ;
12211137
@@ -1286,9 +1202,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
12861202 has_content = resolve_reference_link_definitions (parser , * container );
12871203
12881204 if (has_content ) {
1289- cmark_node_set_type (* container , CMARK_NODE_HEADING );
1290- decr_open_block_count (parser , CMARK_NODE_PARAGRAPH );
1291- incr_open_block_count (parser , CMARK_NODE_HEADING );
1205+
1206+ (* container )-> type = (uint16_t )CMARK_NODE_HEADING ;
12921207 (* container )-> as .heading .level = lev ;
12931208 (* container )-> as .heading .setext = true;
12941209 S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
@@ -1318,6 +1233,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
13181233 (* container )-> internal_offset = matched ;
13191234 } else if ((!indented || cont_type == CMARK_NODE_LIST ) &&
13201235 parser -> indent < 4 &&
1236+ depth < MAX_LIST_DEPTH &&
13211237 (matched = parse_list_marker (
13221238 parser -> mem , input , parser -> first_nonspace ,
13231239 (* container )-> type == CMARK_NODE_PARAGRAPH , & data ))) {
@@ -1443,7 +1359,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
14431359 S_set_last_line_blank (container , last_line_blank );
14441360
14451361 tmp = container ;
1446- while (tmp -> parent && S_last_line_blank ( tmp -> parent ) ) {
1362+ while (tmp -> parent ) {
14471363 S_set_last_line_blank (tmp -> parent , false);
14481364 tmp = tmp -> parent ;
14491365 }
@@ -1572,7 +1488,6 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
15721488
15731489 parser -> line_number ++ ;
15741490
1575- assert (parser -> current -> next == NULL );
15761491 last_matched_container = check_open_blocks (parser , & input , & all_matched );
15771492
15781493 if (!last_matched_container )
0 commit comments