From 07a62c6c751455da95a4ec9dfad2576b9dcd766a Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Tue, 3 Sep 2019 19:40:31 +0300 Subject: [PATCH] Move nested delimiter info to opening token instead of inline state --- lib/rules_inline/balance_pairs.js | 115 +++++++++++++----- lib/rules_inline/emphasis.js | 28 +++-- lib/rules_inline/state_inline.js | 30 ++++- lib/rules_inline/strikethrough.js | 26 +++- .../markdown-it/commonmark_extras.txt | 7 ++ test/fixtures/markdown-it/strikethrough.txt | 15 +++ 6 files changed, 170 insertions(+), 51 deletions(-) diff --git a/lib/rules_inline/balance_pairs.js b/lib/rules_inline/balance_pairs.js index 93d3100..8ee8695 100644 --- a/lib/rules_inline/balance_pairs.js +++ b/lib/rules_inline/balance_pairs.js @@ -3,53 +3,106 @@ 'use strict'; -module.exports = function link_pairs(state) { - var i, j, lastDelim, currDelim, - delimiters = state.delimiters, - max = state.delimiters.length; +function processDelimiters(state, delimiters) { + var closerIdx, openerIdx, closer, opener, minOpenerIdx, newMinOpenerIdx, + isOddMatch, lastJump, + openersBottom = {}, + max = delimiters.length; + + for (closerIdx = 0; closerIdx < max; closerIdx++) { + closer = delimiters[closerIdx]; + + // Length is only used for emphasis-specific "rule of 3", + // if it's not defined (in strikethrough or 3rd party plugins), + // we can default it to 0 to disable those checks. + // + closer.length = closer.length || 0; + + if (!closer.close) continue; - for (i = 0; i < max; i++) { - lastDelim = delimiters[i]; + // Previously calculated lower bounds (previous fails) + // for each marker and each delimiter length modulo 3. + if (!openersBottom.hasOwnProperty(closer.marker)) { + openersBottom[closer.marker] = [ -1, -1, -1 ]; + } + + minOpenerIdx = openersBottom[closer.marker][closer.length % 3]; + newMinOpenerIdx = -1; - if (!lastDelim.close) { continue; } + openerIdx = closerIdx - closer.jump - 1; - j = i - lastDelim.jump - 1; + for (; openerIdx > minOpenerIdx; openerIdx -= opener.jump + 1) { + opener = delimiters[openerIdx]; - while (j >= 0) { - currDelim = delimiters[j]; + if (opener.marker !== closer.marker) continue; - if (currDelim.open && - currDelim.marker === lastDelim.marker && - currDelim.end < 0 && - currDelim.level === lastDelim.level) { + if (newMinOpenerIdx === -1) newMinOpenerIdx = openerIdx; - var odd_match = false; + if (opener.open && + opener.end < 0 && + opener.level === closer.level) { - // typeofs are for backward compatibility with plugins - if ((currDelim.close || lastDelim.open) && - typeof currDelim.length !== 'undefined' && - typeof lastDelim.length !== 'undefined') { + isOddMatch = false; - // from spec: - // sum of the lengths [...] must not be a multiple of 3 - // unless both lengths are multiples of 3 - if ((currDelim.length + lastDelim.length) % 3 === 0) { - if (currDelim.length % 3 !== 0 || lastDelim.length % 3 !== 0) { - odd_match = true; + // from spec: + // + // If one of the delimiters can both open and close emphasis, then the + // sum of the lengths of the delimiter runs containing the opening and + // closing delimiters must not be a multiple of 3 unless both lengths + // are multiples of 3. + // + if (opener.close || closer.open) { + if ((opener.length + closer.length) % 3 === 0) { + if (opener.length % 3 !== 0 || closer.length % 3 !== 0) { + isOddMatch = true; } } } - if (!odd_match) { - lastDelim.jump = i - j; - lastDelim.open = false; - currDelim.end = i; - currDelim.jump = 0; + if (!isOddMatch) { + // If previous delimiter cannot be an opener, we can safely skip + // the entire sequence in future checks. This is required to make + // sure algorithm has linear complexity (see *_*_*_*_*_... case). + // + lastJump = openerIdx > 0 && !delimiters[openerIdx - 1].open ? + delimiters[openerIdx - 1].jump + 1 : + 0; + + closer.jump = closerIdx - openerIdx + lastJump; + closer.open = false; + opener.end = closerIdx; + opener.jump = lastJump; + opener.close = false; + newMinOpenerIdx = -1; break; } } + } + + if (newMinOpenerIdx !== -1) { + // If match for this delimiter run failed, we want to set lower bound for + // future lookups. This is required to make sure algorithm has linear + // complexity. + // + // See details here: + // https://github.com/commonmark/cmark/issues/178#issuecomment-270417442 + // + openersBottom[closer.marker][(closer.length || 0) % 3] = newMinOpenerIdx; + } + } +} + + +module.exports = function link_pairs(state) { + var curr, + tokens_meta = state.tokens_meta, + max = state.tokens_meta.length; + + processDelimiters(state, state.delimiters); - j -= currDelim.jump + 1; + for (curr = 0; curr < max; curr++) { + if (tokens_meta[curr] && tokens_meta[curr].delimiters) { + processDelimiters(state, tokens_meta[curr].delimiters); } } }; diff --git a/lib/rules_inline/emphasis.js b/lib/rules_inline/emphasis.js index 54549e3..c140d2c 100644 --- a/lib/rules_inline/emphasis.js +++ b/lib/rules_inline/emphasis.js @@ -42,10 +42,6 @@ module.exports.tokenize = function emphasis(state, silent) { // token: state.tokens.length - 1, - // Token level. - // - level: state.level, - // If this delimiter is matched as a valid opener, `end` will be // equal to its position, otherwise it's `-1`. // @@ -65,17 +61,14 @@ module.exports.tokenize = function emphasis(state, silent) { }; -// Walk through delimiter list and replace text tokens with tags -// -module.exports.postProcess = function emphasis(state) { +function postProcess(state, delimiters) { var i, startDelim, endDelim, token, ch, isStrong, - delimiters = state.delimiters, - max = state.delimiters.length; + max = delimiters.length; for (i = max - 1; i >= 0; i--) { startDelim = delimiters[i]; @@ -124,4 +117,21 @@ module.exports.postProcess = function emphasis(state) { i--; } } +} + + +// Walk through delimiter list and replace text tokens with tags +// +module.exports.postProcess = function emphasis(state) { + var curr, + tokens_meta = state.tokens_meta, + max = state.tokens_meta.length; + + postProcess(state, state.delimiters); + + for (curr = 0; curr < max; curr++) { + if (tokens_meta[curr] && tokens_meta[curr].delimiters) { + postProcess(state, tokens_meta[curr].delimiters); + } + } }; diff --git a/lib/rules_inline/state_inline.js b/lib/rules_inline/state_inline.js index acf0ddc..8510504 100644 --- a/lib/rules_inline/state_inline.js +++ b/lib/rules_inline/state_inline.js @@ -14,6 +14,7 @@ function StateInline(src, md, env, outTokens) { this.env = env; this.md = md; this.tokens = outTokens; + this.tokens_meta = Array(outTokens.length); this.pos = 0; this.posMax = this.src.length; @@ -21,10 +22,15 @@ function StateInline(src, md, env, outTokens) { this.pending = ''; this.pendingLevel = 0; - this.cache = {}; // Stores { start: end } pairs. Useful for backtrack - // optimization of pairs parse (emphasis, strikes). + // Stores { start: end } pairs. Useful for backtrack + // optimization of pairs parse (emphasis, strikes). + this.cache = {}; - this.delimiters = []; // Emphasis-like delimiters + // List of emphasis-like delimiters for current tag + this.delimiters = []; + + // Stack of delimiter lists for upper level tags + this._prev_delimiters = []; } @@ -49,13 +55,27 @@ StateInline.prototype.push = function (type, tag, nesting) { } var token = new Token(type, tag, nesting); + var token_meta = null; + + if (nesting < 0) { + // closing tag + this.level--; + this.delimiters = this._prev_delimiters.pop(); + } - if (nesting < 0) this.level--; // closing tag token.level = this.level; - if (nesting > 0) this.level++; // opening tag + + if (nesting > 0) { + // opening tag + this.level++; + this._prev_delimiters.push(this.delimiters); + this.delimiters = []; + token_meta = { delimiters: this.delimiters }; + } this.pendingLevel = this.level; this.tokens.push(token); + this.tokens_meta.push(token_meta); return token; }; diff --git a/lib/rules_inline/strikethrough.js b/lib/rules_inline/strikethrough.js index 0347241..f3afe4a 100644 --- a/lib/rules_inline/strikethrough.js +++ b/lib/rules_inline/strikethrough.js @@ -32,9 +32,9 @@ module.exports.tokenize = function strikethrough(state, silent) { state.delimiters.push({ marker: marker, + length: 0, // disable "rule of 3" length checks meant for emphasis jump: i, token: state.tokens.length - 1, - level: state.level, end: -1, open: scanned.can_open, close: scanned.can_close @@ -47,16 +47,13 @@ module.exports.tokenize = function strikethrough(state, silent) { }; -// Walk through delimiter list and replace text tokens with tags -// -module.exports.postProcess = function strikethrough(state) { +function postProcess(state, delimiters) { var i, j, startDelim, endDelim, token, loneMarkers = [], - delimiters = state.delimiters, - max = state.delimiters.length; + max = delimiters.length; for (i = 0; i < max; i++) { startDelim = delimiters[i]; @@ -114,4 +111,21 @@ module.exports.postProcess = function strikethrough(state) { state.tokens[i] = token; } } +} + + +// Walk through delimiter list and replace text tokens with tags +// +module.exports.postProcess = function strikethrough(state) { + var curr, + tokens_meta = state.tokens_meta, + max = state.tokens_meta.length; + + postProcess(state, state.delimiters); + + for (curr = 0; curr < max; curr++) { + if (tokens_meta[curr] && tokens_meta[curr].delimiters) { + postProcess(state, tokens_meta[curr].delimiters); + } + } }; diff --git a/test/fixtures/markdown-it/commonmark_extras.txt b/test/fixtures/markdown-it/commonmark_extras.txt index 369977d..27b0cf0 100644 --- a/test/fixtures/markdown-it/commonmark_extras.txt +++ b/test/fixtures/markdown-it/commonmark_extras.txt @@ -103,6 +103,13 @@ _(hai)_.

(hai).

. +Regression test, should not match emphasis markers in different link tags: +. +[*b]() [c*]() +. +

*b c*

+. + Those are two separate blockquotes: . - > foo diff --git a/test/fixtures/markdown-it/strikethrough.txt b/test/fixtures/markdown-it/strikethrough.txt index fb1463f..ec2678b 100644 --- a/test/fixtures/markdown-it/strikethrough.txt +++ b/test/fixtures/markdown-it/strikethrough.txt @@ -84,6 +84,14 @@ foo ~~ bar ~~ baz . +Should parse strikethrough within link tags: +. +[~~foo~~]() +. +

foo

+. + + Newline should be considered a whitespace: . ~~test @@ -112,3 +120,10 @@ a~~"foo"~~ .

a~~“foo”~~

. + +Coverage: single tilde +. +~a~ +. +

~a~

+.