Blockquote parsing improved

12 years ago · e3a7ddf4a8
8 changed files with 104 additions and 32 deletions
--- a/lib/helpers.js
+++ b/lib/helpers.js
@ -70,7 +70,8 @@ function getLines(state, begin, end, indent, keepLastLF) {
    first = state.bMarks[line] + Math.min(state.tShift[line], indent);

    if (line + 1 < end || keepLastLF) {
-      last = state.bMarks[line + 1];
+      // TODO: boundary check?
+      last = state.eMarks[line] + 1;
    } else {
      last = state.eMarks[line];
    }
--- a/lib/lexer_block.js
+++ b/lib/lexer_block.js
@ -124,6 +124,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine, stopOnTwoNe
    if (line >= endLine) { break; }

    if (state.tShift[line] < state.blkIndent) { break; }
+    if (state.bqMarks[line] < state.bqLevel) { break; }

    state.tight = !hasEmptyLines;

@ -149,7 +150,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine, stopOnTwoNe

    if (line < endLine && isEmpty(state, line)) {
      hasEmptyLines = true;
-      line++;
+      state.line = line = line + 1;

      // two empty lines should stop the parser
      if (line < endLine && stopOnTwoNewlines && isEmpty(state, line)) { break; }
--- a/lib/lexer_block/blockquote.js
+++ b/lib/lexer_block/blockquote.js
@ -3,12 +3,12 @@
 'use strict';


-var getLines = require('../helpers').getLines;
-var isEmpty  = require('../helpers').isEmpty;
+var skipSpaces      = require('../helpers').skipSpaces;


 module.exports = function blockquote(state, startLine, endLine, silent) {
-  var nextLine, subState, insideLines, lineMax,
+  var nextLine, lastLineEmpty, oldTShift, oldBMarks, i,
+      rules_named = state.lexerBlock.rules_named,
      pos = state.bMarks[startLine] + state.tShift[startLine],
      max = state.eMarks[startLine];

@ -21,37 +21,97 @@ module.exports = function blockquote(state, startLine, endLine, silent) {
  // so no point trying to find the end of it in silent mode
  if (silent) { return true; }

-  lineMax = state.lineMax;
-  insideLines = 1;
-  state.tokens.push({ type: 'blockquote_open' });
-  nextLine = startLine + 1;
-  for (;;) {
-    if (nextLine < lineMax) {
-      pos = state.bMarks[nextLine] + state.tShift[nextLine];
-      max = state.eMarks[nextLine];
-
-      if (pos < max && state.src.charCodeAt(pos++) === 0x3E/* > */) {
-        if (nextLine >= endLine) { break; }
-        nextLine++;
-        insideLines++;
-        continue;
-      }
+  // skip one optional space after '>'
+  if (state.src.charCodeAt(pos) === 0x20) { pos++; }
+
+  state.bqMarks[startLine]++;
+  state.bqLevel++;
+
+  oldBMarks = [ state.bMarks[startLine] ];
+  state.bMarks[startLine] = pos;
+
+  // check if we have an empty blockquote
+  pos = pos < max ? skipSpaces(state, pos) : pos;
+  lastLineEmpty = pos >= max;
+
+  oldTShift = [ state.tShift[startLine] ];
+  state.tShift[startLine] = pos - state.bMarks[startLine];
+
+  // Search the end of the block
+  //
+  // Block ends with either:
+  //  1. an empty line outside:
+  //     ```
+  //     > test
+  //
+  //     ```
+  //  2. an empty line inside:
+  //     ```
+  //     >
+  //     test
+  //     ```
+  //  3. another tag
+  //     ```
+  //     > test
+  //      - - -
+  //     ```
+  for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
+    pos = state.bMarks[nextLine] + state.tShift[nextLine];
+    max = state.eMarks[nextLine];
+
+    if (pos >= max) {
+      // Case 1: line is not inside the blockquote, and this line is empty.
+      break;
    }

-    if (insideLines === 0) { break; }
+    if (state.src.charCodeAt(pos++) === 0x3E/* > */) {
+      state.bqMarks[nextLine]++;
+      // This line is inside the blockquote.
+
+      // skip one optional space after '>'
+      if (state.src.charCodeAt(pos) === 0x20) { pos++; }
+
+      oldBMarks.push(state.bMarks[nextLine]);
+      state.bMarks[nextLine] = pos;

-    while (nextLine < lineMax) {
-      if (isEmpty(state, nextLine)) { break; }
-      nextLine++;
+      pos = pos < max ? skipSpaces(state, pos) : pos;
+      lastLineEmpty = pos >= max;
+
+      oldTShift.push(state.tShift[nextLine]);
+      state.tShift[nextLine] = pos - state.bMarks[nextLine];
+      continue;
    }
-    subState = state.clone(getLines(state, startLine, nextLine, 0, true)
-                                  .replace(/^ {0,3}> ?/mg, ''));
-    state.lexerBlock.tokenize(subState, 0, insideLines);
-    nextLine = startLine = subState.line + startLine;
-    insideLines = 0;
+
+    // Case 2: line is not inside the blockquote, and the last line was empty.
+    if (lastLineEmpty) { break; }
+
+    // Case 3: another tag found.
+    if (rules_named.fences(state, nextLine, endLine, true)) { break; }
+    if (rules_named.hr(state, nextLine, endLine, true)) { break; }
+    if (rules_named.list(state, nextLine, endLine, true)) { break; }
+    if (rules_named.heading(state, nextLine, endLine, true)) { break; }
+    // setex header can't interrupt paragraph
+    // if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
+    if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
+    if (rules_named.table(state, nextLine, endLine, true)) { break; }
+    //if (rules_named.tag(state, nextLine, endLine, true)) { break; }
+    //if (rules_named.def(state, nextLine, endLine, true)) { break; }
+
+    oldBMarks.push(state.bMarks[nextLine]);
+    oldTShift.push(state.tShift[nextLine]);
  }
+
+  state.tokens.push({ type: 'blockquote_open' });
+  state.lexerBlock.tokenize(state, startLine, nextLine);
  state.tokens.push({ type: 'blockquote_close' });

-  state.line = nextLine;
+  // Restore original tShift; this might not be necessary since the parser
+  // has already been here, but just to make sure we can do that.
+  for (i = 0; i < oldTShift.length; i++) {
+    state.bMarks[i + startLine] = oldBMarks[i];
+    state.tShift[i + startLine] = oldTShift[i];
+  }
+  state.bqLevel--;
+
  return true;
 };
--- a/lib/lexer_block/code.js
+++ b/lib/lexer_block/code.js
@ -15,6 +15,7 @@ module.exports = function code(state, startLine, endLine, silent) {
  last = nextLine = startLine + 1;

  while (nextLine < endLine) {
+    if (state.bqMarks[nextLine] < state.bqLevel) { break; }
    if (isEmpty(state, nextLine)) {
      nextLine++;
      if (state.options.pedantic) {
--- a/lib/lexer_block/fences.js
+++ b/lib/lexer_block/fences.js
@ -61,6 +61,7 @@ module.exports = function fences(state, startLine, endLine, silent) {
      //  test
      break;
    }
+    if (pos < max && state.bqMarks[nextLine] < state.bqLevel) { break; }

    if (state.src.charCodeAt(pos) !== marker) { continue; }

--- a/lib/lexer_block/lheading.js
+++ b/lib/lexer_block/lheading.js
@ -14,6 +14,7 @@ module.exports = function lheading(state, startLine, endLine, silent) {

  if (next >= endLine) { return false; }
  if (state.tShift[next] < state.blkIndent) { return false; }
+  if (state.bqMarks[next] < state.bqLevel) { return false; }

  // Scan next line
  if (state.tShift[next] - state.blkIndent > 3) { return false; }
--- a/lib/lexer_block/list.js
+++ b/lib/lexer_block/list.js
@ -133,6 +133,8 @@ module.exports = function list(state, startLine, endLine, silent) {
  prevEmptyEnd = false;

  while (nextLine < endLine) {
+    if (state.bqMarks[nextLine] < state.bqLevel) { break; }
+
    contentStart = skipSpaces(state, posAfterMarker);
    max = state.eMarks[nextLine];

@ -215,8 +217,6 @@ module.exports = function list(state, startLine, endLine, silent) {
    }

    if (markerCharCode !== state.src.charCodeAt(posAfterMarker - 1)) { break; }
-
-
  }

  // Finilize list
--- a/lib/state.js
+++ b/lib/state.js
@ -86,6 +86,13 @@ function State(src, lexerBlock, lexerInline, renderer, tokens, options) {
  this.lineMax    = this.bMarks.length - 1; // don't count last fake line
  this.tight      = false; // loose/tight mode for lists

+  // Stuff for blockquotes
+  this.bqLevel    = 0;
+  this.bqMarks    = [];
+  for (start = 0; start < this.bMarks.length; start++) {
+    this.bqMarks.push(0);
+  }
+
  // renderer
  this.result = '';
 }