From 8cb29935a388d96aa23976c3420038f5216bd0f1 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Sat, 3 Jan 2015 11:15:21 +0300 Subject: [PATCH] Move reference rule to the block chain --- lib/helpers/parse_link_destination.js | 49 +++--- lib/helpers/parse_link_title.js | 44 +++-- lib/parser_block.js | 21 +-- lib/parser_core.js | 1 - lib/presets/commonmark.js | 2 +- lib/presets/default.js | 3 +- lib/rules_block/reference.js | 151 ++++++++++++++++++ lib/rules_core/references.js | 107 ------------- lib/rules_inline/image.js | 15 +- lib/rules_inline/link.js | 15 +- lib/rules_inline/state_inline.js | 2 - test/fixtures/markdown-it/abbr.txt | 15 ++ .../markdown-it/commonmark_extras.txt | 19 +++ 13 files changed, 269 insertions(+), 175 deletions(-) create mode 100644 lib/rules_block/reference.js delete mode 100644 lib/rules_core/references.js diff --git a/lib/helpers/parse_link_destination.js b/lib/helpers/parse_link_destination.js index 01549ee..57a75fb 100644 --- a/lib/helpers/parse_link_destination.js +++ b/lib/helpers/parse_link_destination.js @@ -1,8 +1,5 @@ // Parse link destination // -// on success it returns a string and updates state.pos; -// on failure it returns null -// 'use strict'; @@ -10,22 +7,27 @@ var normalizeLink = require('../common/utils').normalizeLink; var unescapeMd = require('../common/utils').unescapeMd; -module.exports = function parseLinkDestination(state, pos) { - var code, level, link, +module.exports = function parseLinkDestination(str, pos, max) { + var code, level, + lines = 0, start = pos, - max = state.posMax; - - if (state.src.charCodeAt(pos) === 0x3C /* < */) { + result = { + ok: false, + pos: 0, + lines: 0, + str: '' + }; + + if (str.charCodeAt(pos) === 0x3C /* < */) { pos++; while (pos < max) { - code = state.src.charCodeAt(pos); - if (code === 0x0A /* \n */) { return false; } + code = str.charCodeAt(pos); + if (code === 0x0A /* \n */) { return result; } if (code === 0x3E /* > */) { - link = normalizeLink(unescapeMd(state.src.slice(start + 1, pos))); - if (!state.md.inline.validateLink(link)) { return false; } - state.pos = pos + 1; - state.linkContent = link; - return true; + result.pos = pos + 1; + result.str = normalizeLink(unescapeMd(str.slice(start + 1, pos))); + result.ok = true; + return result; } if (code === 0x5C /* \ */ && pos + 1 < max) { pos += 2; @@ -36,14 +38,14 @@ module.exports = function parseLinkDestination(state, pos) { } // no closing '>' - return false; + return result; } // this should be ... } else { ... branch level = 0; while (pos < max) { - code = state.src.charCodeAt(pos); + code = str.charCodeAt(pos); if (code === 0x20) { break; } @@ -68,12 +70,11 @@ module.exports = function parseLinkDestination(state, pos) { pos++; } - if (start === pos) { return false; } - - link = normalizeLink(unescapeMd(state.src.slice(start, pos))); - if (!state.md.inline.validateLink(link)) { return false; } + if (start === pos) { return result; } - state.linkContent = link; - state.pos = pos; - return true; + result.str = normalizeLink(unescapeMd(str.slice(start, pos))); + result.lines = lines; + result.pos = pos; + result.ok = true; + return result; }; diff --git a/lib/helpers/parse_link_title.js b/lib/helpers/parse_link_title.js index 956ecd9..8ca82a3 100644 --- a/lib/helpers/parse_link_title.js +++ b/lib/helpers/parse_link_title.js @@ -1,21 +1,28 @@ // Parse link title // -// on success it returns a string and updates state.pos; -// on failure it returns null -// 'use strict'; var unescapeMd = require('../common/utils').unescapeMd; -module.exports = function parseLinkTitle(state, pos) { +module.exports = function parseLinkTitle(str, pos, max) { var code, + marker, + lines = 0, start = pos, - max = state.posMax, - marker = state.src.charCodeAt(pos); + result = { + ok: false, + pos: 0, + lines: 0, + str: '' + }; + + if (pos >= max) { return result; } - if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return false; } + marker = str.charCodeAt(pos); + + if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return result; } pos++; @@ -23,19 +30,24 @@ module.exports = function parseLinkTitle(state, pos) { if (marker === 0x28) { marker = 0x29; } while (pos < max) { - code = state.src.charCodeAt(pos); + code = str.charCodeAt(pos); if (code === marker) { - state.pos = pos + 1; - state.linkContent = unescapeMd(state.src.slice(start + 1, pos)); - return true; - } - if (code === 0x5C /* \ */ && pos + 1 < max) { - pos += 2; - continue; + result.pos = pos + 1; + result.lines = lines; + result.str = unescapeMd(str.slice(start + 1, pos)); + result.ok = true; + return result; + } else if (code === 0x0A) { + lines++; + } else if (code === 0x5C /* \ */ && pos + 1 < max) { + pos++; + if (str.charCodeAt(pos) === 0x0A) { + lines++; + } } pos++; } - return false; + return result; }; diff --git a/lib/parser_block.js b/lib/parser_block.js index 8b2b26a..f21b10e 100644 --- a/lib/parser_block.js +++ b/lib/parser_block.js @@ -11,17 +11,18 @@ var Ruler = require('./ruler'); var _rules = [ [ 'code', require('./rules_block/code') ], - [ 'fences', require('./rules_block/fences'), [ 'paragraph', 'blockquote', 'list' ] ], - [ 'blockquote', require('./rules_block/blockquote'), [ 'paragraph', 'blockquote', 'list' ] ], - [ 'hr', require('./rules_block/hr'), [ 'paragraph', 'blockquote', 'list' ] ], - [ 'list', require('./rules_block/list'), [ 'paragraph', 'blockquote' ] ], - [ 'abbr', require('./rules_block/abbr'), [ 'paragraph' ] ], - [ 'footnote', require('./rules_block/footnote'), [ 'paragraph' ] ], - [ 'heading', require('./rules_block/heading'), [ 'paragraph', 'blockquote' ] ], + [ 'fences', require('./rules_block/fences'), [ 'paragraph', 'reference', 'blockquote', 'list' ] ], + [ 'blockquote', require('./rules_block/blockquote'), [ 'paragraph', 'reference', 'blockquote', 'list' ] ], + [ 'hr', require('./rules_block/hr'), [ 'paragraph', 'reference', 'blockquote', 'list' ] ], + [ 'list', require('./rules_block/list'), [ 'paragraph', 'reference', 'blockquote' ] ], + [ 'abbr', require('./rules_block/abbr'), [ 'paragraph', 'reference' ] ], + [ 'footnote', require('./rules_block/footnote'), [ 'paragraph', 'reference' ] ], + [ 'reference', require('./rules_block/reference'), [ 'reference' ] ], + [ 'heading', require('./rules_block/heading'), [ 'paragraph', 'reference', 'blockquote' ] ], [ 'lheading', require('./rules_block/lheading') ], - [ 'htmlblock', require('./rules_block/htmlblock'), [ 'paragraph', 'blockquote' ] ], - [ 'table', require('./rules_block/table'), [ 'paragraph' ] ], - [ 'deflist', require('./rules_block/deflist'), [ 'paragraph' ] ], + [ 'htmlblock', require('./rules_block/htmlblock'), [ 'paragraph', 'reference', 'blockquote' ] ], + [ 'table', require('./rules_block/table'), [ 'paragraph', 'reference' ] ], + [ 'deflist', require('./rules_block/deflist'), [ 'paragraph', 'reference' ] ], [ 'paragraph', require('./rules_block/paragraph') ] ]; diff --git a/lib/parser_core.js b/lib/parser_core.js index 816f080..d3072f8 100644 --- a/lib/parser_core.js +++ b/lib/parser_core.js @@ -12,7 +12,6 @@ var Ruler = require('./ruler'); var _rules = [ [ 'block', require('./rules_core/block') ], - [ 'references', require('./rules_core/references') ], [ 'inline', require('./rules_core/inline') ], [ 'footnote_tail', require('./rules_core/footnote_tail') ], [ 'abbr2', require('./rules_core/abbr2') ], diff --git a/lib/presets/commonmark.js b/lib/presets/commonmark.js index ee98206..2cb61fe 100644 --- a/lib/presets/commonmark.js +++ b/lib/presets/commonmark.js @@ -34,7 +34,6 @@ module.exports = { rules: [ 'block', 'inline', - 'references', 'abbr2' ] }, @@ -49,6 +48,7 @@ module.exports = { 'htmlblock', 'lheading', 'list', + 'reference', 'paragraph' ] }, diff --git a/lib/presets/default.js b/lib/presets/default.js index c47e6b4..b8c234d 100644 --- a/lib/presets/default.js +++ b/lib/presets/default.js @@ -34,11 +34,9 @@ module.exports = { rules: [ 'block', 'inline', - 'references', 'replacements', 'linkify', 'smartquotes', - 'references', 'abbr2', 'footnote_tail' ] @@ -55,6 +53,7 @@ module.exports = { 'lheading', 'list', 'paragraph', + 'reference', 'table' ] }, diff --git a/lib/rules_block/reference.js b/lib/rules_block/reference.js new file mode 100644 index 0000000..77a30f1 --- /dev/null +++ b/lib/rules_block/reference.js @@ -0,0 +1,151 @@ +'use strict'; + + +var parseLinkDestination = require('../helpers/parse_link_destination'); +var parseLinkTitle = require('../helpers/parse_link_title'); +var normalizeReference = require('../helpers/normalize_reference'); + + +module.exports = function reference(state, startLine, _endLine, silent) { + var ch, + destEndPos, + destEndLineNo, + endLine, + href, + i, + l, + label, + labelEnd, + res, + start, + str, + terminate, + terminatorRules, + title, + lines = 0, + pos = state.bMarks[startLine] + state.tShift[startLine], + max = state.eMarks[startLine], + nextLine = startLine + 1; + + if (pos >= max) { return false; } + if (state.src.charCodeAt(pos) !== 0x5B/* [ */) { return false; } + + endLine = state.lineMax; + + // jump line-by-line until empty one or EOF + if (nextLine < endLine && !state.isEmpty(nextLine)) { + terminatorRules = state.md.block.ruler.getRules('references'); + + for (; nextLine < endLine && !state.isEmpty(nextLine); nextLine++) { + // this would be a code block normally, but after paragraph + // it's considered a lazy continuation regardless of what's there + if (state.tShift[nextLine] - state.blkIndent > 3) { continue; } + + // Some tags can terminate paragraph without empty line. + terminate = false; + for (i = 0, l = terminatorRules.length; i < l; i++) { + if (terminatorRules[i](state, nextLine, endLine, true)) { + terminate = true; + break; + } + } + if (terminate) { break; } + } + } + + str = state.getLines(startLine, nextLine, state.blkIndent, false).trim(); + max = str.length; + + for (pos = 1; pos < max; pos++) { + ch = str.charCodeAt(pos); + if (ch === 0x5B /* [ */) { + return false; + } else if (ch === 0x5D /* ] */) { + labelEnd = pos; + break; + } else if (ch === 0x0A /* \n */) { + lines++; + } else if (ch === 0x5C /* \ */) { + pos++; + if (pos < max && str.charCodeAt(pos) === 0x0A) { + lines++; + } + } + } + + if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return false; } + + // [label]: destination 'title' + // ^^^ skip optional whitespace here + for (pos = labelEnd + 2; pos < max; pos++) { + ch = str.charCodeAt(pos); + if (ch === 0x0A) { + lines++; + } else if (ch === 0x20) { + /*eslint no-empty:0*/ + } else { + break; + } + } + + // [label]: destination 'title' + // ^^^^^^^^^^^ parse this + res = parseLinkDestination(str, pos, max); + if (!res.ok) { return false; } + if (!state.md.inline.validateLink(res.str)) { return false; } + href = res.str; + pos = res.pos; + lines += res.lines; + + // save cursor state, we could require to rollback later + destEndPos = pos; + destEndLineNo = lines; + + // [label]: destination 'title' + // ^^^ skipping those spaces + start = pos; + for (; pos < max; pos++) { + ch = str.charCodeAt(pos); + if (ch === 0x0A) { + lines++; + } else if (ch === 0x20) { + /*eslint no-empty:0*/ + } else { + break; + } + } + + // [label]: destination 'title' + // ^^^^^^^ parse this + res = parseLinkTitle(str, pos, max); + if (pos < max && start !== pos && res.ok) { + title = res.str; + pos = res.pos; + lines += res.lines; + } else { + title = ''; + pos = destEndPos; + lines = destEndLineNo; + } + + // skip trailing spaces until the rest of the line + while (pos < max && str.charCodeAt(pos) === 0x20/* space */) { pos++; } + + if (pos < max && str.charCodeAt(pos) !== 0x0A) { + // garbage at the end of the line + return false; + } + + if (silent) { return true; } + + label = normalizeReference(str.slice(1, labelEnd)); + if (typeof state.env.references === 'undefined') { + state.env.references = {}; + } + if (typeof state.env.references[label] === 'undefined') { + state.env.references[label] = { title: title, href: href }; + } + + state.line = startLine + lines + 1; + return true; +}; diff --git a/lib/rules_core/references.js b/lib/rules_core/references.js deleted file mode 100644 index 2558c8e..0000000 --- a/lib/rules_core/references.js +++ /dev/null @@ -1,107 +0,0 @@ -'use strict'; - - -var parseLinkDestination = require('../helpers/parse_link_destination'); -var parseLinkTitle = require('../helpers/parse_link_title'); -var normalizeReference = require('../helpers/normalize_reference'); - - -function parseReference(str, md, env) { - var state, pos, code, start, href, title, label, ch, max, - labelEnd = -1; - - if (str.charCodeAt(0) !== 0x5B/* [ */) { return -1; } - - if (str.indexOf(']:') === -1) { return -1; } - - state = new md.inline.State(str, md, env, []); - max = state.posMax; - - for (pos = 1; pos < max; pos++) { - ch = str.charCodeAt(pos); - if (ch === 0x5B /* [ */) { - return -1; - } else if (ch === 0x5D /* ] */) { - labelEnd = pos; - break; - } else if (ch === 0x5C /* \ */) { - pos++; - } - } - - if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return -1; } - - // [label]: destination 'title' - // ^^^ skip optional whitespace here - for (pos = labelEnd + 2; pos < max; pos++) { - code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } - } - - // [label]: destination 'title' - // ^^^^^^^^^^^ parse this - if (!parseLinkDestination(state, pos)) { return -1; } - href = state.linkContent; - pos = state.pos; - - // [label]: destination 'title' - // ^^^ skipping those spaces - start = pos; - for (pos = pos + 1; pos < max; pos++) { - code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } - } - - // [label]: destination 'title' - // ^^^^^^^ parse this - if (pos < max && start !== pos && parseLinkTitle(state, pos)) { - title = state.linkContent; - pos = state.pos; - } else { - title = ''; - pos = start; - } - - // ensure that the end of the line is empty - while (pos < max && state.src.charCodeAt(pos) === 0x20/* space */) { pos++; } - if (pos < max && state.src.charCodeAt(pos) !== 0x0A) { return -1; } - - label = normalizeReference(str.slice(1, labelEnd)); - if (typeof env.references[label] === 'undefined') { - env.references[label] = { title: title, href: href }; - } - - return pos; -} - - -module.exports = function references(state) { - var tokens = state.tokens, i, l, content, pos; - - state.env.references = state.env.references || {}; - - if (state.inlineMode) { - return; - } - - // Scan definitions in paragraph inlines - for (i = 1, l = tokens.length - 1; i < l; i++) { - if (tokens[i].type === 'inline' && - tokens[i - 1].type === 'paragraph_open' && - tokens[i + 1].type === 'paragraph_close') { - - content = tokens[i].content; - while (content.length) { - pos = parseReference(content, state.md, state.env); - if (pos < 0) { break; } - content = content.slice(pos).trim(); - } - - tokens[i].content = content; - if (!content.length) { - tokens[i - 1].tight = true; - tokens[i + 1].tight = true; - } - } - } -}; diff --git a/lib/rules_inline/image.js b/lib/rules_inline/image.js index 45ae9f7..0a6a9bc 100644 --- a/lib/rules_inline/image.js +++ b/lib/rules_inline/image.js @@ -16,6 +16,7 @@ module.exports = function image(state, silent) { labelStart, pos, ref, + res, title, tokens, start, @@ -49,9 +50,10 @@ module.exports = function image(state, silent) { // [link]( "title" ) // ^^^^^^ parsing link destination start = pos; - if (parseLinkDestination(state, pos)) { - href = state.linkContent; - pos = state.pos; + res = parseLinkDestination(state.src, pos, state.posMax); + if (res.ok && state.md.inline.validateLink(res.str)) { + href = res.str; + pos = res.pos; } else { href = ''; } @@ -66,9 +68,10 @@ module.exports = function image(state, silent) { // [link]( "title" ) // ^^^^^^^ parsing link title - if (pos < max && start !== pos && parseLinkTitle(state, pos)) { - title = state.linkContent; - pos = state.pos; + res = parseLinkTitle(state.src, pos, state.posMax); + if (pos < max && start !== pos && res.ok) { + title = res.str; + pos = res.pos; // [link]( "title" ) // ^^ skipping these spaces diff --git a/lib/rules_inline/link.js b/lib/rules_inline/link.js index 2b9a6eb..524d5cf 100644 --- a/lib/rules_inline/link.js +++ b/lib/rules_inline/link.js @@ -15,6 +15,7 @@ module.exports = function link(state, silent) { labelEnd, labelStart, pos, + res, ref, title, oldPos = state.pos, @@ -47,9 +48,10 @@ module.exports = function link(state, silent) { // [link]( "title" ) // ^^^^^^ parsing link destination start = pos; - if (parseLinkDestination(state, pos)) { - href = state.linkContent; - pos = state.pos; + res = parseLinkDestination(state.src, pos, state.posMax); + if (res.ok && state.md.inline.validateLink(res.str)) { + href = res.str; + pos = res.pos; } else { href = ''; } @@ -64,9 +66,10 @@ module.exports = function link(state, silent) { // [link]( "title" ) // ^^^^^^^ parsing link title - if (pos < max && start !== pos && parseLinkTitle(state, pos)) { - title = state.linkContent; - pos = state.pos; + res = parseLinkTitle(state.src, pos, state.posMax); + if (pos < max && start !== pos && res.ok) { + title = res.str; + pos = res.pos; // [link]( "title" ) // ^^ skipping these spaces diff --git a/lib/rules_inline/state_inline.js b/lib/rules_inline/state_inline.js index 666d5f1..ed75e41 100644 --- a/lib/rules_inline/state_inline.js +++ b/lib/rules_inline/state_inline.js @@ -20,8 +20,6 @@ function StateInline(src, md, env, outTokens) { // Link parser state vars - this.linkContent = ''; // Temporary storage for link url - this.labelUnmatchedScopes = 0; // Track unpaired `[` for link labels // (backtrack optimization) } diff --git a/test/fixtures/markdown-it/abbr.txt b/test/fixtures/markdown-it/abbr.txt index b4abde9..009ebc3 100644 --- a/test/fixtures/markdown-it/abbr.txt +++ b/test/fixtures/markdown-it/abbr.txt @@ -55,6 +55,21 @@ JS HTTP is a collection of low-level javascript HTTP-related modules

JS HTTP is a collection of low-level javascript HTTP-related modules

. +Mixing up abbreviations and references: + +. +*[foo]: 123 +[bar]: 456 +*[baz]: 789 +[quux]: 012 +and a paragraph continuation + +foo [bar] baz [quux] +. +

and a paragraph continuation

+

foo bar baz quux

+. + Don't match the middle of the string: . diff --git a/test/fixtures/markdown-it/commonmark_extras.txt b/test/fixtures/markdown-it/commonmark_extras.txt index 98a8e2a..904191c 100644 --- a/test/fixtures/markdown-it/commonmark_extras.txt +++ b/test/fixtures/markdown-it/commonmark_extras.txt @@ -106,6 +106,25 @@ Should not throw exception on mailformed URI . +Multiline title in definitions: + +. +[foo]: test ' +1 +2 +3 +' + +[foo] +. +

foo

+. + + Coverage. Directive can terminate paragraph. . a