diff --git a/Makefile b/Makefile index b92c1c11..9e682c56 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,3 @@ ext/commonmarker/cmark-gfm_export.h: ext/commonmarker/cmark-upstream/build/src/c ext/commonmarker/cmark-gfm_version.h: ext/commonmarker/cmark-upstream/build/src/cmark-gfm_version.h cp $< $@ - -ext/commonmarker/cmark-gfm-extensions_export.h: ext/commonmarker/cmark-upstream/build/extensions/cmark-gfm-extensions_export.h - cp $< $@ diff --git a/ext/commonmarker/blocks.c b/ext/commonmarker/blocks.c index 94045aba..03a58748 100644 --- a/ext/commonmarker/blocks.c +++ b/ext/commonmarker/blocks.c @@ -27,6 +27,14 @@ #define CODE_INDENT 4 #define TAB_STOP 4 +/** + * Very deeply nested lists can cause quadratic performance issues. + * This constant is used in open_new_blocks() to limit the nesting + * depth. It is unlikely that a non-contrived markdown document will + * be nested this deeply. + */ +#define MAX_LIST_DEPTH 100 + #ifndef MIN #define MIN(x, y) ((x < y) ? x : y) #endif @@ -1119,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, bool has_content; int save_offset; int save_column; + size_t depth = 0; while (cont_type != CMARK_NODE_CODE_BLOCK && cont_type != CMARK_NODE_HTML_BLOCK) { - + depth++; S_find_first_nonspace(parser, input); indented = parser->indent >= CODE_INDENT; @@ -1224,6 +1233,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->internal_offset = matched; } else if ((!indented || cont_type == CMARK_NODE_LIST) && parser->indent < 4 && + depth < MAX_LIST_DEPTH && (matched = parse_list_marker( parser->mem, input, parser->first_nonspace, (*container)->type == CMARK_NODE_PARAGRAPH, &data))) { diff --git a/ext/commonmarker/cmark-gfm.h b/ext/commonmarker/cmark-gfm.h index 6b362a86..0544057a 100644 --- a/ext/commonmarker/cmark-gfm.h +++ b/ext/commonmarker/cmark-gfm.h @@ -413,6 +413,17 @@ CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node); */ CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight); +/** + * Returns item index of 'node'. This is only used when rendering output + * formats such as commonmark, which need to output the index. It is not + * required for formats such as html or latex. + */ +CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node); + +/** Sets item index of 'node'. Returns 1 on success, 0 on failure. + */ +CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx); + /** Returns the info string from a fenced code block. */ CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); diff --git a/ext/commonmarker/cmark-gfm_version.h b/ext/commonmarker/cmark-gfm_version.h index e2bbfbbb..19e67d81 100644 --- a/ext/commonmarker/cmark-gfm_version.h +++ b/ext/commonmarker/cmark-gfm_version.h @@ -1,7 +1,7 @@ #ifndef CMARK_GFM_VERSION_H #define CMARK_GFM_VERSION_H -#define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 6) -#define CMARK_GFM_VERSION_STRING "0.29.0.gfm.6" +#define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 11) +#define CMARK_GFM_VERSION_STRING "0.29.0.gfm.11" #endif diff --git a/ext/commonmarker/cmark-upstream b/ext/commonmarker/cmark-upstream index c32ef78b..1e230827 160000 --- a/ext/commonmarker/cmark-upstream +++ b/ext/commonmarker/cmark-upstream @@ -1 +1 @@ -Subproject commit c32ef78bae851cb83b7ad52d0fbff880acdcd44a +Subproject commit 1e230827a584ebc9938c3eadc5059c55ef3c9abf diff --git a/ext/commonmarker/commonmark.c b/ext/commonmarker/commonmark.c index 2e071944..4815bfc3 100644 --- a/ext/commonmarker/commonmark.c +++ b/ext/commonmarker/commonmark.c @@ -153,23 +153,8 @@ static bool is_autolink(cmark_node *node) { link_text->as.literal.len) == 0); } -// if node is a block node, returns node. -// otherwise returns first block-level node that is an ancestor of node. -// if there is no block-level ancestor, returns NULL. -static cmark_node *get_containing_block(cmark_node *node) { - while (node) { - if (CMARK_NODE_BLOCK_P(node)) { - return node; - } else { - node = node->parent; - } - } - return NULL; -} - static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - cmark_node *tmp; int list_number; cmark_delim_type list_delim; int numticks; @@ -189,14 +174,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and // a following list. - if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { - tmp = get_containing_block(node); - renderer->in_tight_list_item = - tmp && // tmp might be NULL if there is no containing block - ((tmp->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent)) || - (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent->parent))); + if (entering) { + if (node->parent && node->parent->type == CMARK_NODE_ITEM) { + renderer->in_tight_list_item = node->parent->parent->as.list.tight; + } + } else { + if (node->type == CMARK_NODE_LIST) { + renderer->in_tight_list_item = + node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->parent->parent->as.list.tight; + } } if (node->extension && node->extension->commonmark_render_func) { @@ -234,13 +222,8 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { marker_width = 4; } else { - list_number = cmark_node_get_list_start(node->parent); + list_number = cmark_node_get_item_index(node); list_delim = cmark_node_get_list_delim(node->parent); - tmp = node; - while (tmp->prev) { - tmp = tmp->prev; - list_number += 1; - } // we ensure a width of at least 4 so // we get nice transition from single digits // to double @@ -405,10 +388,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - LIT("**"); - } else { - LIT("**"); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + LIT("**"); + } else { + LIT("**"); + } } break; diff --git a/ext/commonmarker/html.c b/ext/commonmarker/html.c index 529e0ea3..22513c93 100644 --- a/ext/commonmarker/html.c +++ b/ext/commonmarker/html.c @@ -364,10 +364,12 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - cmark_strbuf_puts(html, ""); - } else { - cmark_strbuf_puts(html, ""); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } } break; diff --git a/ext/commonmarker/latex.c b/ext/commonmarker/latex.c index 8be15b0d..1a6367a4 100644 --- a/ext/commonmarker/latex.c +++ b/ext/commonmarker/latex.c @@ -385,10 +385,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - LIT("\\textbf{"); - } else { - LIT("}"); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + LIT("\\textbf{"); + } else { + LIT("}"); + } } break; diff --git a/ext/commonmarker/man.c b/ext/commonmarker/man.c index 441a96e4..634fd9d0 100644 --- a/ext/commonmarker/man.c +++ b/ext/commonmarker/man.c @@ -74,7 +74,6 @@ static void S_outc(cmark_renderer *renderer, cmark_node *node, static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - cmark_node *tmp; int list_number; bool entering = (ev_type == CMARK_EVENT_ENTER); bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); @@ -123,12 +122,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { LIT("\\[bu] 2"); } else { - list_number = cmark_node_get_list_start(node->parent); - tmp = node; - while (tmp->prev) { - tmp = tmp->prev; - list_number += 1; - } + list_number = cmark_node_get_item_index(node); char list_number_s[LIST_NUMBER_SIZE]; snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number); LIT(list_number_s); @@ -225,10 +219,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - LIT("\\f[B]"); - } else { - LIT("\\f[]"); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + LIT("\\f[B]"); + } else { + LIT("\\f[]"); + } } break; diff --git a/ext/commonmarker/node.c b/ext/commonmarker/node.c index 56daf0aa..67f657d8 100644 --- a/ext/commonmarker/node.c +++ b/ext/commonmarker/node.c @@ -5,6 +5,16 @@ #include "node.h" #include "syntax_extension.h" +/** + * Expensive safety checks are off by default, but can be enabled + * by calling cmark_enable_safety_checks(). + */ +static bool enable_safety_checks = false; + +void cmark_enable_safety_checks(bool enable) { + enable_safety_checks = enable; +} + static void S_node_unlink(cmark_node *node); #define NODE_MEM(node) cmark_node_mem(node) @@ -29,7 +39,7 @@ void cmark_register_node_flag(cmark_node_internal_flags *flags) { nextflag <<= 1; } -void cmark_init_standard_node_flags() {} +void cmark_init_standard_node_flags(void) {} bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) { if (child_type == CMARK_NODE_DOCUMENT) { @@ -70,8 +80,6 @@ bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) { } static bool S_can_contain(cmark_node *node, cmark_node *child) { - cmark_node *cur; - if (node == NULL || child == NULL) { return false; } @@ -79,14 +87,16 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { return 0; } - // Verify that child is not an ancestor of node or equal to node. - cur = node; - do { - if (cur == child) { - return false; - } - cur = cur->parent; - } while (cur != NULL); + if (enable_safety_checks) { + // Verify that child is not an ancestor of node or equal to node. + cmark_node *cur = node; + do { + if (cur == child) { + return false; + } + cur = cur->parent; + } while (cur != NULL); + } return cmark_node_can_contain_type(node, (cmark_node_type) child->type); } @@ -554,6 +564,31 @@ int cmark_node_set_list_tight(cmark_node *node, int tight) { } } +int cmark_node_get_item_index(cmark_node *node) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_ITEM) { + return node->as.list.start; + } else { + return 0; + } +} + +int cmark_node_set_item_index(cmark_node *node, int idx) { + if (node == NULL || idx < 0) { + return 0; + } + + if (node->type == CMARK_NODE_ITEM) { + node->as.list.start = idx; + return 1; + } else { + return 0; + } +} + const char *cmark_node_get_fence_info(cmark_node *node) { if (node == NULL) { return NULL; diff --git a/ext/commonmarker/node.h b/ext/commonmarker/node.h index e025e25d..38ac4a6f 100644 --- a/ext/commonmarker/node.h +++ b/ext/commonmarker/node.h @@ -82,6 +82,14 @@ struct cmark_node { cmark_syntax_extension *extension; + /** + * Used during cmark_render() to cache the most recent non-NULL + * extension, if you go up the parent chain like this: + * + * node->parent->...parent->extension + */ + cmark_syntax_extension *ancestor_extension; + union { int ref_ix; int def_count; @@ -119,7 +127,7 @@ void cmark_register_node_flag(cmark_node_internal_flags *flags); * library. It is now a no-op. */ CMARK_GFM_EXPORT -void cmark_init_standard_node_flags(); +void cmark_init_standard_node_flags(void); static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) { return node->content.mem; @@ -144,6 +152,13 @@ static CMARK_INLINE bool CMARK_NODE_INLINE_P(cmark_node *node) { CMARK_GFM_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type); +/** + * Enable (or disable) extra safety checks. These extra checks cause + * extra performance overhead (in some cases quadratic), so they are only + * intended to be used during testing. + */ +CMARK_GFM_EXPORT void cmark_enable_safety_checks(bool enable); + #ifdef __cplusplus } #endif diff --git a/ext/commonmarker/plaintext.c b/ext/commonmarker/plaintext.c index b25e4a39..0c7d257b 100644 --- a/ext/commonmarker/plaintext.c +++ b/ext/commonmarker/plaintext.c @@ -16,23 +16,8 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, cmark_render_code_point(renderer, c); } -// if node is a block node, returns node. -// otherwise returns first block-level node that is an ancestor of node. -// if there is no block-level ancestor, returns NULL. -static cmark_node *get_containing_block(cmark_node *node) { - while (node) { - if (CMARK_NODE_BLOCK_P(node)) { - return node; - } else { - node = node->parent; - } - } - return NULL; -} - static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - cmark_node *tmp; int list_number; cmark_delim_type list_delim; int i; @@ -46,14 +31,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and // a following list. - if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { - tmp = get_containing_block(node); - renderer->in_tight_list_item = - tmp && // tmp might be NULL if there is no containing block - ((tmp->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent)) || - (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent->parent))); + if (entering) { + if (node->parent && node->parent->type == CMARK_NODE_ITEM) { + renderer->in_tight_list_item = node->parent->parent->as.list.tight; + } + } else { + if (node->type == CMARK_NODE_LIST) { + renderer->in_tight_list_item = + node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->parent->parent->as.list.tight; + } } if (node->extension && node->extension->plaintext_render_func) { @@ -79,13 +67,8 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { marker_width = 4; } else { - list_number = cmark_node_get_list_start(node->parent); + list_number = cmark_node_get_item_index(node); list_delim = cmark_node_get_list_delim(node->parent); - tmp = node; - while (tmp->prev) { - tmp = tmp->prev; - list_number += 1; - } // we ensure a width of at least 4 so // we get nice transition from single digits // to double diff --git a/ext/commonmarker/render.c b/ext/commonmarker/render.c index 02e9e838..1a0d2ae8 100644 --- a/ext/commonmarker/render.c +++ b/ext/commonmarker/render.c @@ -31,13 +31,7 @@ static void S_out(cmark_renderer *renderer, cmark_node *node, cmark_chunk remainder = cmark_chunk_literal(""); int k = renderer->buffer->size - 1; - cmark_syntax_extension *ext = NULL; - cmark_node *n = node; - while (n && !ext) { - ext = n->extension; - if (!ext) - n = n->parent; - } + cmark_syntax_extension *ext = node->ancestor_extension; if (ext && !ext->commonmark_escape_func) ext = NULL; @@ -182,6 +176,20 @@ char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); + if (cur->extension) { + cur->ancestor_extension = cur->extension; + } else if (cur->parent) { + cur->ancestor_extension = cur->parent->ancestor_extension; + } + if (cur->type == CMARK_NODE_ITEM) { + // Calculate the list item's index, for the benefit of output formats + // like commonmark and plaintext. + if (cur->prev) { + cmark_node_set_item_index(cur, 1 + cmark_node_get_item_index(cur->prev)); + } else { + cmark_node_set_item_index(cur, cmark_node_get_list_start(cur->parent)); + } + } if (!render_node(&renderer, cur, ev_type, options)) { // a false value causes us to skip processing // the node's contents. this is used for diff --git a/ext/commonmarker/xml.c b/ext/commonmarker/xml.c index 2975bf96..5753e5ab 100644 --- a/ext/commonmarker/xml.c +++ b/ext/commonmarker/xml.c @@ -11,6 +11,7 @@ #include "syntax_extension.h" #define BUFFER_SIZE 100 +#define MAX_INDENT 40 // Functions to convert cmark_nodes to XML strings. @@ -26,7 +27,7 @@ struct render_state { static CMARK_INLINE void indent(struct render_state *state) { int i; - for (i = 0; i < state->indent; i++) { + for (i = 0; i < state->indent && i < MAX_INDENT; i++) { cmark_strbuf_putc(state->xml, ' '); } } diff --git a/lib/commonmarker/renderer/html_renderer.rb b/lib/commonmarker/renderer/html_renderer.rb index df10042d..2d8375ab 100644 --- a/lib/commonmarker/renderer/html_renderer.rb +++ b/lib/commonmarker/renderer/html_renderer.rb @@ -129,8 +129,12 @@ def emph(_) out("", :children, "") end - def strong(_) - out("", :children, "") + def strong(node) + if node.parent&.type == :strong + out(:children) + else + out("", :children, "") + end end def link(node) diff --git a/lib/commonmarker/version.rb b/lib/commonmarker/version.rb index 6405511f..77032fd8 100644 --- a/lib/commonmarker/version.rb +++ b/lib/commonmarker/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module CommonMarker - VERSION = "0.23.8" + VERSION = "0.23.9" end