Move reference rule to the block chain

11 years ago · 8cb29935a3
13 changed files with 269 additions and 175 deletions
--- a/lib/helpers/parse_link_destination.js
+++ b/lib/helpers/parse_link_destination.js
@ -1,8 +1,5 @@
 // Parse link destination
 //
-// on success it returns a string and updates state.pos;
-// on failure it returns null
-//
 'use strict';


@ -10,22 +7,27 @@ var normalizeLink = require('../common/utils').normalizeLink;
 var unescapeMd    = require('../common/utils').unescapeMd;


-module.exports = function parseLinkDestination(state, pos) {
-  var code, level, link,
+module.exports = function parseLinkDestination(str, pos, max) {
+  var code, level,
+      lines = 0,
      start = pos,
-      max = state.posMax;
-
-  if (state.src.charCodeAt(pos) === 0x3C /* < */) {
+      result = {
+        ok: false,
+        pos: 0,
+        lines: 0,
+        str: ''
+      };
+
+  if (str.charCodeAt(pos) === 0x3C /* < */) {
    pos++;
    while (pos < max) {
-      code = state.src.charCodeAt(pos);
-      if (code === 0x0A /* \n */) { return false; }
+      code = str.charCodeAt(pos);
+      if (code === 0x0A /* \n */) { return result; }
      if (code === 0x3E /* > */) {
-        link = normalizeLink(unescapeMd(state.src.slice(start + 1, pos)));
-        if (!state.md.inline.validateLink(link)) { return false; }
-        state.pos = pos + 1;
-        state.linkContent = link;
-        return true;
+        result.pos = pos + 1;
+        result.str = normalizeLink(unescapeMd(str.slice(start + 1, pos)));
+        result.ok = true;
+        return result;
      }
      if (code === 0x5C /* \ */ && pos + 1 < max) {
        pos += 2;
@ -36,14 +38,14 @@ module.exports = function parseLinkDestination(state, pos) {
    }

    // no closing '>'
-    return false;
+    return result;
  }

  // this should be ... } else { ... branch

  level = 0;
  while (pos < max) {
-    code = state.src.charCodeAt(pos);
+    code = str.charCodeAt(pos);

    if (code === 0x20) { break; }

@ -68,12 +70,11 @@ module.exports = function parseLinkDestination(state, pos) {
    pos++;
  }

-  if (start === pos) { return false; }
-
-  link = normalizeLink(unescapeMd(state.src.slice(start, pos)));
-  if (!state.md.inline.validateLink(link)) { return false; }
+  if (start === pos) { return result; }

-  state.linkContent = link;
-  state.pos = pos;
-  return true;
+  result.str = normalizeLink(unescapeMd(str.slice(start, pos)));
+  result.lines = lines;
+  result.pos = pos;
+  result.ok = true;
+  return result;
 };
--- a/lib/helpers/parse_link_title.js
+++ b/lib/helpers/parse_link_title.js
@ -1,21 +1,28 @@
 // Parse link title
 //
-// on success it returns a string and updates state.pos;
-// on failure it returns null
-//
 'use strict';


 var unescapeMd = require('../common/utils').unescapeMd;


-module.exports = function parseLinkTitle(state, pos) {
+module.exports = function parseLinkTitle(str, pos, max) {
  var code,
+      marker,
+      lines = 0,
      start = pos,
-      max = state.posMax,
-      marker = state.src.charCodeAt(pos);
+      result = {
+        ok: false,
+        pos: 0,
+        lines: 0,
+        str: ''
+      };
+
+  if (pos >= max) { return result; }

-  if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return false; }
+  marker = str.charCodeAt(pos);
+
+  if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return result; }

  pos++;

@ -23,19 +30,24 @@ module.exports = function parseLinkTitle(state, pos) {
  if (marker === 0x28) { marker = 0x29; }

  while (pos < max) {
-    code = state.src.charCodeAt(pos);
+    code = str.charCodeAt(pos);
    if (code === marker) {
-      state.pos = pos + 1;
-      state.linkContent = unescapeMd(state.src.slice(start + 1, pos));
-      return true;
-    }
-    if (code === 0x5C /* \ */ && pos + 1 < max) {
-      pos += 2;
-      continue;
+      result.pos = pos + 1;
+      result.lines = lines;
+      result.str = unescapeMd(str.slice(start + 1, pos));
+      result.ok = true;
+      return result;
+    } else if (code === 0x0A) {
+      lines++;
+    } else if (code === 0x5C /* \ */ && pos + 1 < max) {
+      pos++;
+      if (str.charCodeAt(pos) === 0x0A) {
+        lines++;
+      }
    }

    pos++;
  }

-  return false;
+  return result;
 };
--- a/lib/parser_block.js
+++ b/lib/parser_block.js
@ -11,17 +11,18 @@ var Ruler           = require('./ruler');

 var _rules = [
  [ 'code',       require('./rules_block/code') ],
-  [ 'fences',     require('./rules_block/fences'),     [ 'paragraph', 'blockquote', 'list' ] ],
-  [ 'blockquote', require('./rules_block/blockquote'), [ 'paragraph', 'blockquote', 'list' ] ],
-  [ 'hr',         require('./rules_block/hr'),         [ 'paragraph', 'blockquote', 'list' ] ],
-  [ 'list',       require('./rules_block/list'),       [ 'paragraph', 'blockquote' ] ],
-  [ 'abbr',       require('./rules_block/abbr'),       [ 'paragraph' ] ],
-  [ 'footnote',   require('./rules_block/footnote'),   [ 'paragraph' ] ],
-  [ 'heading',    require('./rules_block/heading'),    [ 'paragraph', 'blockquote' ] ],
+  [ 'fences',     require('./rules_block/fences'),     [ 'paragraph', 'reference', 'blockquote', 'list' ] ],
+  [ 'blockquote', require('./rules_block/blockquote'), [ 'paragraph', 'reference', 'blockquote', 'list' ] ],
+  [ 'hr',         require('./rules_block/hr'),         [ 'paragraph', 'reference', 'blockquote', 'list' ] ],
+  [ 'list',       require('./rules_block/list'),       [ 'paragraph', 'reference', 'blockquote' ] ],
+  [ 'abbr',       require('./rules_block/abbr'),       [ 'paragraph', 'reference' ] ],
+  [ 'footnote',   require('./rules_block/footnote'),   [ 'paragraph', 'reference' ] ],
+  [ 'reference',  require('./rules_block/reference'),  [ 'reference' ] ],
+  [ 'heading',    require('./rules_block/heading'),    [ 'paragraph', 'reference', 'blockquote' ] ],
  [ 'lheading',   require('./rules_block/lheading') ],
-  [ 'htmlblock',  require('./rules_block/htmlblock'),  [ 'paragraph', 'blockquote' ] ],
-  [ 'table',      require('./rules_block/table'),      [ 'paragraph' ] ],
-  [ 'deflist',    require('./rules_block/deflist'),    [ 'paragraph' ] ],
+  [ 'htmlblock',  require('./rules_block/htmlblock'),  [ 'paragraph', 'reference', 'blockquote' ] ],
+  [ 'table',      require('./rules_block/table'),      [ 'paragraph', 'reference' ] ],
+  [ 'deflist',    require('./rules_block/deflist'),    [ 'paragraph', 'reference' ] ],
  [ 'paragraph',  require('./rules_block/paragraph') ]
 ];

--- a/lib/parser_core.js
+++ b/lib/parser_core.js
@ -12,7 +12,6 @@ var Ruler  = require('./ruler');

 var _rules = [
  [ 'block',          require('./rules_core/block')          ],
-  [ 'references',     require('./rules_core/references')     ],
  [ 'inline',         require('./rules_core/inline')         ],
  [ 'footnote_tail',  require('./rules_core/footnote_tail')  ],
  [ 'abbr2',          require('./rules_core/abbr2')          ],
--- a/lib/presets/commonmark.js
+++ b/lib/presets/commonmark.js
@ -34,7 +34,6 @@ module.exports = {
      rules: [
        'block',
        'inline',
-        'references',
        'abbr2'
      ]
    },
@ -49,6 +48,7 @@ module.exports = {
        'htmlblock',
        'lheading',
        'list',
+        'reference',
        'paragraph'
      ]
    },
--- a/lib/presets/default.js
+++ b/lib/presets/default.js
@ -34,11 +34,9 @@ module.exports = {
      rules: [
        'block',
        'inline',
-        'references',
        'replacements',
        'linkify',
        'smartquotes',
-        'references',
        'abbr2',
        'footnote_tail'
      ]
@ -55,6 +53,7 @@ module.exports = {
        'lheading',
        'list',
        'paragraph',
+        'reference',
        'table'
      ]
    },
--- a/lib/rules_block/reference.js
+++ b/lib/rules_block/reference.js
@ -0,0 +1,151 @@
+'use strict';
+
+
+var parseLinkDestination = require('../helpers/parse_link_destination');
+var parseLinkTitle       = require('../helpers/parse_link_title');
+var normalizeReference   = require('../helpers/normalize_reference');
+
+
+module.exports = function reference(state, startLine, _endLine, silent) {
+  var ch,
+      destEndPos,
+      destEndLineNo,
+      endLine,
+      href,
+      i,
+      l,
+      label,
+      labelEnd,
+      res,
+      start,
+      str,
+      terminate,
+      terminatorRules,
+      title,
+      lines = 0,
+      pos = state.bMarks[startLine] + state.tShift[startLine],
+      max = state.eMarks[startLine],
+      nextLine = startLine + 1;
+
+  if (pos >= max) { return false; }
+  if (state.src.charCodeAt(pos) !== 0x5B/* [ */) { return false; }
+
+  endLine = state.lineMax;
+
+  // jump line-by-line until empty one or EOF
+  if (nextLine < endLine && !state.isEmpty(nextLine)) {
+    terminatorRules = state.md.block.ruler.getRules('references');
+
+    for (; nextLine < endLine && !state.isEmpty(nextLine); nextLine++) {
+      // this would be a code block normally, but after paragraph
+      // it's considered a lazy continuation regardless of what's there
+      if (state.tShift[nextLine] - state.blkIndent > 3) { continue; }
+
+      // Some tags can terminate paragraph without empty line.
+      terminate = false;
+      for (i = 0, l = terminatorRules.length; i < l; i++) {
+        if (terminatorRules[i](state, nextLine, endLine, true)) {
+          terminate = true;
+          break;
+        }
+      }
+      if (terminate) { break; }
+    }
+  }
+
+  str = state.getLines(startLine, nextLine, state.blkIndent, false).trim();
+  max = str.length;
+
+  for (pos = 1; pos < max; pos++) {
+    ch = str.charCodeAt(pos);
+    if (ch === 0x5B /* [ */) {
+      return false;
+    } else if (ch === 0x5D /* ] */) {
+      labelEnd = pos;
+      break;
+    } else if (ch === 0x0A /* \n */) {
+      lines++;
+    } else if (ch === 0x5C /* \ */) {
+      pos++;
+      if (pos < max && str.charCodeAt(pos) === 0x0A) {
+        lines++;
+      }
+    }
+  }
+
+  if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return false; }
+
+  // [label]:   destination   'title'
+  //         ^^^ skip optional whitespace here
+  for (pos = labelEnd + 2; pos < max; pos++) {
+    ch = str.charCodeAt(pos);
+    if (ch === 0x0A) {
+      lines++;
+    } else if (ch === 0x20) {
+      /*eslint no-empty:0*/
+    } else {
+      break;
+    }
+  }
+
+  // [label]:   destination   'title'
+  //            ^^^^^^^^^^^ parse this
+  res = parseLinkDestination(str, pos, max);
+  if (!res.ok) { return false; }
+  if (!state.md.inline.validateLink(res.str)) { return false; }
+  href = res.str;
+  pos = res.pos;
+  lines += res.lines;
+
+  // save cursor state, we could require to rollback later
+  destEndPos = pos;
+  destEndLineNo = lines;
+
+  // [label]:   destination   'title'
+  //                       ^^^ skipping those spaces
+  start = pos;
+  for (; pos < max; pos++) {
+    ch = str.charCodeAt(pos);
+    if (ch === 0x0A) {
+      lines++;
+    } else if (ch === 0x20) {
+      /*eslint no-empty:0*/
+    } else {
+      break;
+    }
+  }
+
+  // [label]:   destination   'title'
+  //                          ^^^^^^^ parse this
+  res = parseLinkTitle(str, pos, max);
+  if (pos < max && start !== pos && res.ok) {
+    title = res.str;
+    pos = res.pos;
+    lines += res.lines;
+  } else {
+    title = '';
+    pos = destEndPos;
+    lines = destEndLineNo;
+  }
+
+  // skip trailing spaces until the rest of the line
+  while (pos < max && str.charCodeAt(pos) === 0x20/* space */) { pos++; }
+
+  if (pos < max && str.charCodeAt(pos) !== 0x0A) {
+    // garbage at the end of the line
+    return false;
+  }
+
+  if (silent) { return true; }
+
+  label = normalizeReference(str.slice(1, labelEnd));
+  if (typeof state.env.references === 'undefined') {
+    state.env.references = {};
+  }
+  if (typeof state.env.references[label] === 'undefined') {
+    state.env.references[label] = { title: title, href: href };
+  }
+
+  state.line = startLine + lines + 1;
+  return true;
+};
--- a/lib/rules_core/references.js
+++ b/lib/rules_core/references.js
@ -1,107 +0,0 @@
-'use strict';
-
-
-var parseLinkDestination = require('../helpers/parse_link_destination');
-var parseLinkTitle       = require('../helpers/parse_link_title');
-var normalizeReference   = require('../helpers/normalize_reference');
-
-
-function parseReference(str, md, env) {
-  var state, pos, code, start, href, title, label, ch, max,
-      labelEnd = -1;
-
-  if (str.charCodeAt(0) !== 0x5B/* [ */) { return -1; }
-
-  if (str.indexOf(']:') === -1) { return -1; }
-
-  state = new md.inline.State(str, md, env, []);
-  max = state.posMax;
-
-  for (pos = 1; pos < max; pos++) {
-    ch = str.charCodeAt(pos);
-    if (ch === 0x5B /* [ */) {
-      return -1;
-    } else if (ch === 0x5D /* ] */) {
-      labelEnd = pos;
-      break;
-    } else if (ch === 0x5C /* \ */) {
-      pos++;
-    }
-  }
-
-  if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return -1; }
-
-  // [label]:   destination   'title'
-  //         ^^^ skip optional whitespace here
-  for (pos = labelEnd + 2; pos < max; pos++) {
-    code = state.src.charCodeAt(pos);
-    if (code !== 0x20 && code !== 0x0A) { break; }
-  }
-
-  // [label]:   destination   'title'
-  //            ^^^^^^^^^^^ parse this
-  if (!parseLinkDestination(state, pos)) { return -1; }
-  href = state.linkContent;
-  pos = state.pos;
-
-  // [label]:   destination   'title'
-  //                       ^^^ skipping those spaces
-  start = pos;
-  for (pos = pos + 1; pos < max; pos++) {
-    code = state.src.charCodeAt(pos);
-    if (code !== 0x20 && code !== 0x0A) { break; }
-  }
-
-  // [label]:   destination   'title'
-  //                          ^^^^^^^ parse this
-  if (pos < max && start !== pos && parseLinkTitle(state, pos)) {
-    title = state.linkContent;
-    pos = state.pos;
-  } else {
-    title = '';
-    pos = start;
-  }
-
-  // ensure that the end of the line is empty
-  while (pos < max && state.src.charCodeAt(pos) === 0x20/* space */) { pos++; }
-  if (pos < max && state.src.charCodeAt(pos) !== 0x0A) { return -1; }
-
-  label = normalizeReference(str.slice(1, labelEnd));
-  if (typeof env.references[label] === 'undefined') {
-    env.references[label] = { title: title, href: href };
-  }
-
-  return pos;
-}
-
-
-module.exports = function references(state) {
-  var tokens = state.tokens, i, l, content, pos;
-
-  state.env.references = state.env.references || {};
-
-  if (state.inlineMode) {
-    return;
-  }
-
-  // Scan definitions in paragraph inlines
-  for (i = 1, l = tokens.length - 1; i < l; i++) {
-    if (tokens[i].type === 'inline' &&
-        tokens[i - 1].type === 'paragraph_open' &&
-        tokens[i + 1].type === 'paragraph_close') {
-
-      content = tokens[i].content;
-      while (content.length) {
-        pos = parseReference(content, state.md, state.env);
-        if (pos < 0) { break; }
-        content = content.slice(pos).trim();
-      }
-
-      tokens[i].content = content;
-      if (!content.length) {
-        tokens[i - 1].tight = true;
-        tokens[i + 1].tight = true;
-      }
-    }
-  }
-};
--- a/lib/rules_inline/image.js
+++ b/lib/rules_inline/image.js
@ -16,6 +16,7 @@ module.exports = function image(state, silent) {
      labelStart,
      pos,
      ref,
+      res,
      title,
      tokens,
      start,
@ -49,9 +50,10 @@ module.exports = function image(state, silent) {
    // [link](  <href>  "title"  )
    //          ^^^^^^ parsing link destination
    start = pos;
-    if (parseLinkDestination(state, pos)) {
-      href = state.linkContent;
-      pos = state.pos;
+    res = parseLinkDestination(state.src, pos, state.posMax);
+    if (res.ok && state.md.inline.validateLink(res.str)) {
+      href = res.str;
+      pos = res.pos;
    } else {
      href = '';
    }
@ -66,9 +68,10 @@ module.exports = function image(state, silent) {

    // [link](  <href>  "title"  )
    //                  ^^^^^^^ parsing link title
-    if (pos < max && start !== pos && parseLinkTitle(state, pos)) {
-      title = state.linkContent;
-      pos = state.pos;
+    res = parseLinkTitle(state.src, pos, state.posMax);
+    if (pos < max && start !== pos && res.ok) {
+      title = res.str;
+      pos = res.pos;

      // [link](  <href>  "title"  )
      //                         ^^ skipping these spaces
--- a/lib/rules_inline/link.js
+++ b/lib/rules_inline/link.js
@ -15,6 +15,7 @@ module.exports = function link(state, silent) {
      labelEnd,
      labelStart,
      pos,
+      res,
      ref,
      title,
      oldPos = state.pos,
@ -47,9 +48,10 @@ module.exports = function link(state, silent) {
    // [link](  <href>  "title"  )
    //          ^^^^^^ parsing link destination
    start = pos;
-    if (parseLinkDestination(state, pos)) {
-      href = state.linkContent;
-      pos = state.pos;
+    res = parseLinkDestination(state.src, pos, state.posMax);
+    if (res.ok && state.md.inline.validateLink(res.str)) {
+      href = res.str;
+      pos = res.pos;
    } else {
      href = '';
    }
@ -64,9 +66,10 @@ module.exports = function link(state, silent) {

    // [link](  <href>  "title"  )
    //                  ^^^^^^^ parsing link title
-    if (pos < max && start !== pos && parseLinkTitle(state, pos)) {
-      title = state.linkContent;
-      pos = state.pos;
+    res = parseLinkTitle(state.src, pos, state.posMax);
+    if (pos < max && start !== pos && res.ok) {
+      title = res.str;
+      pos = res.pos;

      // [link](  <href>  "title"  )
      //                         ^^ skipping these spaces
--- a/lib/rules_inline/state_inline.js
+++ b/lib/rules_inline/state_inline.js
@ -20,8 +20,6 @@ function StateInline(src, md, env, outTokens) {

  // Link parser state vars

-  this.linkContent = '';  // Temporary storage for link url
-
  this.labelUnmatchedScopes = 0; // Track unpaired `[` for link labels
                                 // (backtrack optimization)
 }
--- a/test/fixtures/markdown-it/abbr.txt
+++ b/test/fixtures/markdown-it/abbr.txt
@ -55,6 +55,21 @@ JS HTTP is a collection of low-level javascript HTTP-related modules
 <p><abbr title="is awesome">JS HTTP</abbr> is a collection of low-level javascript <abbr title="hyper text blah blah">HTTP</abbr>-related modules</p>
 .

+Mixing up abbreviations and references:
+
+.
+*[foo]: 123
+[bar]: 456
+*[baz]: 789
+[quux]: 012
+and a paragraph continuation
+
+foo [bar] baz [quux]
+.
+<p>and a paragraph continuation</p>
+<p><abbr title="123">foo</abbr> <a href="456">bar</a> <abbr title="789">baz</abbr> <a href="012">quux</a></p>
+.
+
 Don't match the middle of the string:

 .
--- a/test/fixtures/markdown-it/commonmark_extras.txt
+++ b/test/fixtures/markdown-it/commonmark_extras.txt
@ -106,6 +106,25 @@ Should not throw exception on mailformed URI
 .


+Multiline title in definitions:
+
+.
+[foo]: test '
+1
+2
+3
+'
+
+[foo]
+.
+<p><a href="test" title="
+1
+2
+3
+">foo</a></p>
+.
+
+
 Coverage. Directive can terminate paragraph.
 .
 a