From 378ac9074e393354253281fbe243860732d1f631 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Fri, 25 Sep 2015 18:53:55 +0300 Subject: [PATCH] Change emphasis parsing algorithm Tokenize first, and replace tokens with emphasis tags on a second pass using an algorithm close to one used in CM. --- docs/4.0_migration.md | 10 +- docs/5.0_migration.md | 18 ++ lib/index.js | 7 + lib/parser_inline.js | 35 ++- lib/presets/commonmark.js | 5 + lib/presets/zero.js | 4 + lib/rules_inline/balance_pairs.js | 36 +++ lib/rules_inline/emphasis.js | 257 ++++++++------------ lib/rules_inline/image.js | 6 +- lib/rules_inline/state_inline.js | 72 +++++- lib/rules_inline/strikethrough.js | 199 +++++++-------- lib/rules_inline/text_collapse.js | 33 +++ test/fixtures/commonmark/bad.txt | 14 -- test/fixtures/commonmark/good.txt | 9 + test/fixtures/markdown-it/strikethrough.txt | 9 + test/misc.js | 5 - 16 files changed, 421 insertions(+), 298 deletions(-) create mode 100644 docs/5.0_migration.md create mode 100644 lib/rules_inline/balance_pairs.js create mode 100644 lib/rules_inline/text_collapse.js diff --git a/docs/4.0_migration.md b/docs/4.0_migration.md index 75a255d..f8abedf 100644 --- a/docs/4.0_migration.md +++ b/docs/4.0_migration.md @@ -6,14 +6,14 @@ authors should update their packages. ## For users -External API did not changed. +External API did not change. -- If you used `markdown-it` with plugins - make shure to update those. +- If you used `markdown-it` with plugins - make sure to update those. - If you modified renderer - see dev info below. -- If you did not used plugins and renderer modification - no chanches needed. +- If you did not use plugins and renderer modification - no changes needed. -## For Developpers +## For developers ### Tokens and renderer @@ -25,7 +25,7 @@ External API did not changed. repos in this org. - [Renderer](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js) methods were unified. Number of custom renderer rules were significantly reduced. - Custom renderer functions need update due tokens format change. + Custom renderer functions need update due to tokens format change. ### Other changes diff --git a/docs/5.0_migration.md b/docs/5.0_migration.md new file mode 100644 index 0000000..3346210 --- /dev/null +++ b/docs/5.0_migration.md @@ -0,0 +1,18 @@ +Migration to v5 +=============== + +v5 has the same external API as v4, only internals were changed. Plugins that introduce block elements might need an update. + +## For users + +External API did not change. + +- If you use `markdown-it` with plugins, make sure to update them. + + +## For plugin developers + +- added `stateBlock.sCount` to calculate indents instead of `stateBlock.tShift`, it only differs if tabs are present: + - `stateBlock.tShift` is used to calculate a number of *characters* (tab is 1 character) + - `stateBlock.sCount` is used to calculate the block *offset* (tab is 1-4 characters depending on position) + diff --git a/lib/index.js b/lib/index.js index 6b301e8..d6ab79d 100644 --- a/lib/index.js +++ b/lib/index.js @@ -382,6 +382,9 @@ MarkdownIt.prototype.configure = function (presets) { if (presets.components[name].rules) { self[name].ruler.enableOnly(presets.components[name].rules); } + if (presets.components[name].rules2) { + self[name].ruler2.enableOnly(presets.components[name].rules2); + } }); } return this; @@ -414,6 +417,8 @@ MarkdownIt.prototype.enable = function (list, ignoreInvalid) { result = result.concat(this[chain].ruler.enable(list, true)); }, this); + result = result.concat(this.inline.ruler2.enable(list, true)); + var missed = list.filter(function (name) { return result.indexOf(name) < 0; }); if (missed.length && !ignoreInvalid) { @@ -440,6 +445,8 @@ MarkdownIt.prototype.disable = function (list, ignoreInvalid) { result = result.concat(this[chain].ruler.disable(list, true)); }, this); + result = result.concat(this.inline.ruler2.disable(list, true)); + var missed = list.filter(function (name) { return result.indexOf(name) < 0; }); if (missed.length && !ignoreInvalid) { diff --git a/lib/parser_inline.js b/lib/parser_inline.js index a19a5de..e89a71b 100644 --- a/lib/parser_inline.js +++ b/lib/parser_inline.js @@ -17,8 +17,8 @@ var _rules = [ [ 'newline', require('./rules_inline/newline') ], [ 'escape', require('./rules_inline/escape') ], [ 'backticks', require('./rules_inline/backticks') ], - [ 'strikethrough', require('./rules_inline/strikethrough') ], - [ 'emphasis', require('./rules_inline/emphasis') ], + [ 'strikethrough', require('./rules_inline/strikethrough').tokenize ], + [ 'emphasis', require('./rules_inline/emphasis').tokenize ], [ 'link', require('./rules_inline/link') ], [ 'image', require('./rules_inline/image') ], [ 'autolink', require('./rules_inline/autolink') ], @@ -26,11 +26,20 @@ var _rules = [ [ 'entity', require('./rules_inline/entity') ] ]; +var _rules2 = [ + [ 'balance_pairs', require('./rules_inline/balance_pairs') ], + [ 'strikethrough', require('./rules_inline/strikethrough').postProcess ], + [ 'emphasis', require('./rules_inline/emphasis').postProcess ], + [ 'text_collapse', require('./rules_inline/text_collapse') ] +]; + /** * new ParserInline() **/ function ParserInline() { + var i; + /** * ParserInline#ruler -> Ruler * @@ -38,9 +47,21 @@ function ParserInline() { **/ this.ruler = new Ruler(); - for (var i = 0; i < _rules.length; i++) { + for (i = 0; i < _rules.length; i++) { this.ruler.push(_rules[i][0], _rules[i][1]); } + + /** + * ParserInline#ruler2 -> Ruler + * + * [[Ruler]] instance. Second ruler used for post-processing + * (e.g. in emphasis-like rules). + **/ + this.ruler2 = new Ruler(); + + for (i = 0; i < _rules2.length; i++) { + this.ruler2.push(_rules2[i][0], _rules2[i][1]); + } } @@ -119,9 +140,17 @@ ParserInline.prototype.tokenize = function (state) { * Process input string and push inline tokens into `outTokens` **/ ParserInline.prototype.parse = function (str, md, env, outTokens) { + var i, rules, len; var state = new this.State(str, md, env, outTokens); this.tokenize(state); + + rules = this.ruler2.getRules(''); + len = rules.length; + + for (i = 0; i < len; i++) { + rules[i](state); + } }; diff --git a/lib/presets/commonmark.js b/lib/presets/commonmark.js index 99e5b2b..b0aaabc 100644 --- a/lib/presets/commonmark.js +++ b/lib/presets/commonmark.js @@ -68,6 +68,11 @@ module.exports = { 'link', 'newline', 'text' + ], + rules2: [ + 'balance_pairs', + 'emphasis', + 'text_collapse' ] } } diff --git a/lib/presets/zero.js b/lib/presets/zero.js index eda6dba..fd0376f 100644 --- a/lib/presets/zero.js +++ b/lib/presets/zero.js @@ -51,6 +51,10 @@ module.exports = { inline: { rules: [ 'text' + ], + rules2: [ + 'balance_pairs', + 'text_collapse' ] } } diff --git a/lib/rules_inline/balance_pairs.js b/lib/rules_inline/balance_pairs.js new file mode 100644 index 0000000..4ec2845 --- /dev/null +++ b/lib/rules_inline/balance_pairs.js @@ -0,0 +1,36 @@ +// For each opening emphasis-like marker find a matching closing one +// +'use strict'; + + +module.exports = function link_pairs(state) { + var i, j, lastDelim, currDelim, + delimiters = state.delimiters, + max = state.delimiters.length; + + for (i = 0; i < max; i++) { + lastDelim = delimiters[i]; + + if (!lastDelim.close) { continue; } + + j = i - lastDelim.jump - 1; + + while (j >= 0) { + currDelim = delimiters[j]; + + if (currDelim.open && + currDelim.marker === lastDelim.marker && + currDelim.end < 0 && + currDelim.level === lastDelim.level) { + + lastDelim.jump = i - j; + lastDelim.open = false; + currDelim.end = i; + currDelim.jump = 0; + break; + } + + j -= currDelim.jump + 1; + } + } +}; diff --git a/lib/rules_inline/emphasis.js b/lib/rules_inline/emphasis.js index 051d2d0..fa26e61 100644 --- a/lib/rules_inline/emphasis.js +++ b/lib/rules_inline/emphasis.js @@ -3,172 +3,121 @@ 'use strict'; -var isWhiteSpace = require('../common/utils').isWhiteSpace; -var isPunctChar = require('../common/utils').isPunctChar; -var isMdAsciiPunct = require('../common/utils').isMdAsciiPunct; - - -// parse sequence of emphasis markers, -// "start" should point at a valid marker -function scanDelims(state, start) { - var pos = start, lastChar, nextChar, count, can_open, can_close, - isLastWhiteSpace, isLastPunctChar, - isNextWhiteSpace, isNextPunctChar, - left_flanking = true, - right_flanking = true, - max = state.posMax, +// Insert each marker as a separate text token, and add it to delimiter list +// +module.exports.tokenize = function emphasis(state, silent) { + var i, scanned, token, + start = state.pos, marker = state.src.charCodeAt(start); - // treat beginning of the line as a whitespace - lastChar = start > 0 ? state.src.charCodeAt(start - 1) : 0x20; - - while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; } - - count = pos - start; - - // treat end of the line as a whitespace - nextChar = pos < max ? state.src.charCodeAt(pos) : 0x20; - - isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar)); - isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar)); - - isLastWhiteSpace = isWhiteSpace(lastChar); - isNextWhiteSpace = isWhiteSpace(nextChar); - - if (isNextWhiteSpace) { - left_flanking = false; - } else if (isNextPunctChar) { - if (!(isLastWhiteSpace || isLastPunctChar)) { - left_flanking = false; - } + if (silent) { return false; } + + if (marker !== 0x5F /* _ */ && marker !== 0x2A /* * */) { return false; } + + scanned = state.scanDelims(state.pos, marker === 0x2A); + + for (i = 0; i < scanned.length; i++) { + token = state.push('text', '', 0); + token.content = String.fromCharCode(marker); + + state.delimiters.push({ + // Char code of the starting marker (number). + // + marker: marker, + + // An amount of characters before this one that's equivalent to + // current one. In plain English: if this delimiter does not open + // an emphasis, neither do previous `jump` characters. + // + // Used to skip sequences like "*****" in one step, for 1st asterisk + // value will be 0, for 2nd it's 1 and so on. + // + jump: i, + + // A position of the token this delimiter corresponds to. + // + token: state.tokens.length - 1, + + // Token level. + // + level: state.level, + + // If this delimiter is matched as a valid opener, `end` will be + // equal to its position, otherwise it's `-1`. + // + end: -1, + + // Boolean flags that determine if this delimiter could open or close + // an emphasis. + // + open: scanned.can_open, + close: scanned.can_close + }); } - if (isLastWhiteSpace) { - right_flanking = false; - } else if (isLastPunctChar) { - if (!(isNextWhiteSpace || isNextPunctChar)) { - right_flanking = false; - } - } + state.pos += scanned.length; - if (marker === 0x5F /* _ */) { - // "_" inside a word can neither open nor close an emphasis - can_open = left_flanking && (!right_flanking || isLastPunctChar); - can_close = right_flanking && (!left_flanking || isNextPunctChar); - } else { - can_open = left_flanking; - can_close = right_flanking; - } + return true; +}; - return { - can_open: can_open, - can_close: can_close, - delims: count - }; -} - -module.exports = function emphasis(state, silent) { - var startCount, - count, - found, - oldCount, - newCount, - stack, - res, - token, - max = state.posMax, - start = state.pos, - marker = state.src.charCodeAt(start); - if (marker !== 0x5F/* _ */ && marker !== 0x2A /* * */) { return false; } - if (silent) { return false; } // don't run any pairs in validation mode +// Walk through delimiter list and replace text tokens with tags +// +module.exports.postProcess = function emphasis(state) { + var i, + startDelim, + endDelim, + token, + ch, + isStrong, + delimiters = state.delimiters, + max = state.delimiters.length; - res = scanDelims(state, start); - startCount = res.delims; - if (!res.can_open) { - state.pos += startCount; - // Earlier we checked !silent, but this implementation does not need it - state.pending += state.src.slice(start, state.pos); - return true; - } + for (i = 0; i < max; i++) { + startDelim = delimiters[i]; - state.pos = start + startCount; - stack = [ startCount ]; - - while (state.pos < max) { - if (state.src.charCodeAt(state.pos) === marker) { - res = scanDelims(state, state.pos); - count = res.delims; - if (res.can_close) { - oldCount = stack.pop(); - newCount = count; - - while (oldCount !== newCount) { - if (newCount < oldCount) { - stack.push(oldCount - newCount); - break; - } - - // assert(newCount > oldCount) - newCount -= oldCount; - - if (stack.length === 0) { break; } - state.pos += oldCount; - oldCount = stack.pop(); - } - - if (stack.length === 0) { - startCount = oldCount; - found = true; - break; - } - state.pos += count; - continue; - } - - if (res.can_open) { stack.push(count); } - state.pos += count; + if (startDelim.marker !== 0x5F/* _ */ && startDelim.marker !== 0x2A/* * */) { continue; } - state.md.inline.skipToken(state); - } - - if (!found) { - // parser failed to find ending tag, so it's not valid emphasis - state.pos = start; - return false; - } - - // found! - state.posMax = state.pos; - state.pos = start + startCount; - - // Earlier we checked !silent, but this implementation does not need it - - // we have `startCount` starting and ending markers, - // now trying to serialize them into tokens - for (count = startCount; count > 1; count -= 2) { - token = state.push('strong_open', 'strong', 1); - token.markup = String.fromCharCode(marker) + String.fromCharCode(marker); - } - if (count % 2) { - token = state.push('em_open', 'em', 1); - token.markup = String.fromCharCode(marker); - } - - state.md.inline.tokenize(state); + // Process only opening markers + if (startDelim.end === -1) { + continue; + } - if (count % 2) { - token = state.push('em_close', 'em', -1); - token.markup = String.fromCharCode(marker); - } - for (count = startCount; count > 1; count -= 2) { - token = state.push('strong_close', 'strong', -1); - token.markup = String.fromCharCode(marker) + String.fromCharCode(marker); + endDelim = delimiters[startDelim.end]; + + // If the next delimiter has the same marker and is adjacent to this one, + // merge those into one strong delimiter. + // + // `whatever` -> `whatever` + // + isStrong = i + 1 < max && + delimiters[i + 1].end === startDelim.end - 1 && + delimiters[i + 1].token === startDelim.token + 1 && + delimiters[startDelim.end - 1].token === endDelim.token - 1 && + delimiters[i + 1].marker === startDelim.marker; + + ch = String.fromCharCode(startDelim.marker); + + token = state.tokens[startDelim.token]; + token.type = isStrong ? 'strong_open' : 'em_open'; + token.tag = isStrong ? 'strong' : 'em'; + token.nesting = 1; + token.markup = isStrong ? ch + ch : ch; + token.content = ''; + + token = state.tokens[endDelim.token]; + token.type = isStrong ? 'strong_close' : 'em_close'; + token.tag = isStrong ? 'strong' : 'em'; + token.nesting = -1; + token.markup = isStrong ? ch + ch : ch; + token.content = ''; + + if (isStrong) { + state.tokens[delimiters[i + 1].token].content = ''; + state.tokens[delimiters[startDelim.end - 1].token].content = ''; + i++; + } } - - state.pos = state.posMax + startCount; - state.posMax = max; - return true; }; diff --git a/lib/rules_inline/image.js b/lib/rules_inline/image.js index 32355e0..167b861 100644 --- a/lib/rules_inline/image.js +++ b/lib/rules_inline/image.js @@ -136,16 +136,12 @@ module.exports = function image(state, silent) { // so all that's left to do is to call tokenizer. // if (!silent) { - state.pos = labelStart; - state.posMax = labelEnd; - - var newState = new state.md.inline.State( + state.md.inline.parse( state.src.slice(labelStart, labelEnd), state.md, state.env, tokens = [] ); - newState.md.inline.tokenize(newState); token = state.push('image', 'img', 0); token.attrs = attrs = [ [ 'src', href ], [ 'alt', '' ] ]; diff --git a/lib/rules_inline/state_inline.js b/lib/rules_inline/state_inline.js index 8b3a5fc..2847496 100644 --- a/lib/rules_inline/state_inline.js +++ b/lib/rules_inline/state_inline.js @@ -3,7 +3,11 @@ 'use strict'; -var Token = require('../token'); +var Token = require('../token'); +var isWhiteSpace = require('../common/utils').isWhiteSpace; +var isPunctChar = require('../common/utils').isPunctChar; +var isMdAsciiPunct = require('../common/utils').isMdAsciiPunct; + function StateInline(src, md, env, outTokens) { this.src = src; @@ -19,6 +23,8 @@ function StateInline(src, md, env, outTokens) { this.cache = {}; // Stores { start: end } pairs. Useful for backtrack // optimization of pairs parse (emphasis, strikes). + + this.delimiters = []; // Emphasis-like delimiters } @@ -53,6 +59,70 @@ StateInline.prototype.push = function (type, tag, nesting) { return token; }; + +// Scan a sequence of emphasis-like markers, and determine whether +// it can start an emphasis sequence or end an emphasis sequence. +// +// - start - position to scan from (it should point at a valid marker); +// - canSplitWord - determine if these markers can be found inside a word +// +StateInline.prototype.scanDelims = function (start, canSplitWord) { + var pos = start, lastChar, nextChar, count, can_open, can_close, + isLastWhiteSpace, isLastPunctChar, + isNextWhiteSpace, isNextPunctChar, + left_flanking = true, + right_flanking = true, + max = this.posMax, + marker = this.src.charCodeAt(start); + + // treat beginning of the line as a whitespace + lastChar = start > 0 ? this.src.charCodeAt(start - 1) : 0x20; + + while (pos < max && this.src.charCodeAt(pos) === marker) { pos++; } + + count = pos - start; + + // treat end of the line as a whitespace + nextChar = pos < max ? this.src.charCodeAt(pos) : 0x20; + + isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar)); + isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar)); + + isLastWhiteSpace = isWhiteSpace(lastChar); + isNextWhiteSpace = isWhiteSpace(nextChar); + + if (isNextWhiteSpace) { + left_flanking = false; + } else if (isNextPunctChar) { + if (!(isLastWhiteSpace || isLastPunctChar)) { + left_flanking = false; + } + } + + if (isLastWhiteSpace) { + right_flanking = false; + } else if (isLastPunctChar) { + if (!(isNextWhiteSpace || isNextPunctChar)) { + right_flanking = false; + } + } + + if (!canSplitWord) { + can_open = left_flanking && (!right_flanking || isLastPunctChar); + can_close = right_flanking && (!left_flanking || isNextPunctChar); + } else { + can_open = left_flanking; + can_close = right_flanking; + } + + return { + can_open: can_open, + can_close: can_close, + length: count + }; +}; + + // re-export Token class to use in block rules StateInline.prototype.Token = Token; diff --git a/lib/rules_inline/strikethrough.js b/lib/rules_inline/strikethrough.js index 5787687..0347241 100644 --- a/lib/rules_inline/strikethrough.js +++ b/lib/rules_inline/strikethrough.js @@ -3,138 +3,115 @@ 'use strict'; -var isWhiteSpace = require('../common/utils').isWhiteSpace; -var isPunctChar = require('../common/utils').isPunctChar; -var isMdAsciiPunct = require('../common/utils').isMdAsciiPunct; - - -// parse sequence of markers, -// "start" should point at a valid marker -function scanDelims(state, start) { - var pos = start, lastChar, nextChar, count, - isLastWhiteSpace, isLastPunctChar, - isNextWhiteSpace, isNextPunctChar, - can_open = true, - can_close = true, - max = state.posMax, +// Insert each marker as a separate text token, and add it to delimiter list +// +module.exports.tokenize = function strikethrough(state, silent) { + var i, scanned, token, len, ch, + start = state.pos, marker = state.src.charCodeAt(start); - // treat beginning of the line as a whitespace - lastChar = start > 0 ? state.src.charCodeAt(start - 1) : 0x20; + if (silent) { return false; } - while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; } + if (marker !== 0x7E/* ~ */) { return false; } - if (pos >= max) { - can_open = false; - } + scanned = state.scanDelims(state.pos, true); + len = scanned.length; + ch = String.fromCharCode(marker); - count = pos - start; + if (len < 2) { return false; } - // treat end of the line as a whitespace - nextChar = pos < max ? state.src.charCodeAt(pos) : 0x20; + if (len % 2) { + token = state.push('text', '', 0); + token.content = ch; + len--; + } - isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar)); - isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar)); + for (i = 0; i < len; i += 2) { + token = state.push('text', '', 0); + token.content = ch + ch; + + state.delimiters.push({ + marker: marker, + jump: i, + token: state.tokens.length - 1, + level: state.level, + end: -1, + open: scanned.can_open, + close: scanned.can_close + }); + } - isLastWhiteSpace = isWhiteSpace(lastChar); - isNextWhiteSpace = isWhiteSpace(nextChar); + state.pos += scanned.length; - if (isNextWhiteSpace) { - can_open = false; - } else if (isNextPunctChar) { - if (!(isLastWhiteSpace || isLastPunctChar)) { - can_open = false; - } - } + return true; +}; - if (isLastWhiteSpace) { - can_close = false; - } else if (isLastPunctChar) { - if (!(isNextWhiteSpace || isNextPunctChar)) { - can_close = false; - } - } - return { - can_open: can_open, - can_close: can_close, - delims: count - }; -} - - -module.exports = function strikethrough(state, silent) { - var startCount, - count, - tagCount, - found, - stack, - res, +// Walk through delimiter list and replace text tokens with tags +// +module.exports.postProcess = function strikethrough(state) { + var i, j, + startDelim, + endDelim, token, - max = state.posMax, - start = state.pos, - marker = state.src.charCodeAt(start); + loneMarkers = [], + delimiters = state.delimiters, + max = state.delimiters.length; - if (marker !== 0x7E/* ~ */) { return false; } - if (silent) { return false; } // don't run any pairs in validation mode - - res = scanDelims(state, start); - startCount = res.delims; - if (!res.can_open) { - state.pos += startCount; - // Earlier we checked !silent, but this implementation does not need it - state.pending += state.src.slice(start, state.pos); - return true; - } + for (i = 0; i < max; i++) { + startDelim = delimiters[i]; - stack = Math.floor(startCount / 2); - if (stack <= 0) { return false; } - state.pos = start + startCount; - - while (state.pos < max) { - if (state.src.charCodeAt(state.pos) === marker) { - res = scanDelims(state, state.pos); - count = res.delims; - tagCount = Math.floor(count / 2); - if (res.can_close) { - if (tagCount >= stack) { - state.pos += count - 2; - found = true; - break; - } - stack -= tagCount; - state.pos += count; - continue; - } - - if (res.can_open) { stack += tagCount; } - state.pos += count; + if (startDelim.marker !== 0x7E/* ~ */) { continue; } - state.md.inline.skipToken(state); - } + if (startDelim.end === -1) { + continue; + } - if (!found) { - // parser failed to find ending tag, so it's not valid emphasis - state.pos = start; - return false; - } + endDelim = delimiters[startDelim.end]; + + token = state.tokens[startDelim.token]; + token.type = 's_open'; + token.tag = 's'; + token.nesting = 1; + token.markup = '~~'; + token.content = ''; - // found! - state.posMax = state.pos; - state.pos = start + 2; + token = state.tokens[endDelim.token]; + token.type = 's_close'; + token.tag = 's'; + token.nesting = -1; + token.markup = '~~'; + token.content = ''; - // Earlier we checked !silent, but this implementation does not need it - token = state.push('s_open', 's', 1); - token.markup = '~~'; + if (state.tokens[endDelim.token - 1].type === 'text' && + state.tokens[endDelim.token - 1].content === '~') { - state.md.inline.tokenize(state); + loneMarkers.push(endDelim.token - 1); + } + } - token = state.push('s_close', 's', -1); - token.markup = '~~'; + // If a marker sequence has an odd number of characters, it's splitted + // like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the + // start of the sequence. + // + // So, we have to move all those markers after subsequent s_close tags. + // + while (loneMarkers.length) { + i = loneMarkers.pop(); + j = i + 1; + + while (j < state.tokens.length && state.tokens[j].type === 's_close') { + j++; + } - state.pos = state.posMax + 2; - state.posMax = max; - return true; + j--; + + if (i !== j) { + token = state.tokens[j]; + state.tokens[j] = state.tokens[i]; + state.tokens[i] = token; + } + } }; diff --git a/lib/rules_inline/text_collapse.js b/lib/rules_inline/text_collapse.js new file mode 100644 index 0000000..3104c0c --- /dev/null +++ b/lib/rules_inline/text_collapse.js @@ -0,0 +1,33 @@ +// Merge adjacent text nodes into one, and re-calculate all token levels +// +'use strict'; + + +module.exports = function text_collapse(state) { + var curr, last, + level = 0, + tokens = state.tokens, + max = state.tokens.length; + + for (curr = last = 0; curr < max; curr++) { + // re-calculate levels + level += tokens[curr].nesting; + tokens[curr].level = level; + + if (tokens[curr].type === 'text' && + curr + 1 < max && + tokens[curr + 1].type === 'text') { + + // collapse two adjacent text nodes + tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content; + } else { + if (curr !== last) { tokens[last] = tokens[curr]; } + + last++; + } + } + + if (curr !== last) { + tokens.length = last; + } +}; diff --git a/test/fixtures/commonmark/bad.txt b/test/fixtures/commonmark/bad.txt index 9a799f9..e69de29 100644 --- a/test/fixtures/commonmark/bad.txt +++ b/test/fixtures/commonmark/bad.txt @@ -1,14 +0,0 @@ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -src line: 6241 - -. -*foo __bar *baz bim__ bam* -. -

foo bar *baz bim bam

-. - -error: - -

*foo bar *baz bim bam*

- - diff --git a/test/fixtures/commonmark/good.txt b/test/fixtures/commonmark/good.txt index bdb86d3..5df0d9b 100644 --- a/test/fixtures/commonmark/good.txt +++ b/test/fixtures/commonmark/good.txt @@ -5444,6 +5444,15 @@ src line: 6235

foobar*

. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src line: 6241 + +. +*foo __bar *baz bim__ bam* +. +

foo bar *baz bim bam

+. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ src line: 6249 diff --git a/test/fixtures/markdown-it/strikethrough.txt b/test/fixtures/markdown-it/strikethrough.txt index 299dc3a..fb1463f 100644 --- a/test/fixtures/markdown-it/strikethrough.txt +++ b/test/fixtures/markdown-it/strikethrough.txt @@ -22,6 +22,15 @@ x ~~~~foo~~~~

x foo

. +. +x ~~a ~~foo~~~~~~~~~~~bar~~ b~~ + +x ~~a ~~foo~~~~~~~~~~~~bar~~ b~~ +. +

x a foo~~~bar b

+

x a foo~~~~bar b

+. + Strikeouts have the same priority as emphases: . diff --git a/test/misc.js b/test/misc.js index 82e2381..e35b3de 100644 --- a/test/misc.js +++ b/test/misc.js @@ -261,11 +261,6 @@ describe('Links validation', function () { describe('maxNesting', function () { - it('Inline parser should not nest above limit', function () { - var md = markdownit({ maxNesting: 2 }); - assert.strictEqual(md.render('*foo *bar *baz* bar* foo*'), '

foo bar *baz* bar foo

\n'); - }); - it('Block parser should not nest above limit', function () { var md = markdownit({ maxNesting: 2 }); assert.strictEqual(