Better algorithm for parsing lists

11 years ago · 60f36941fa
3 changed files with 85 additions and 96 deletions
--- a/lib/lexer_block.js
+++ b/lib/lexer_block.js
@ -5,6 +5,7 @@
 var skipEmptyLines  = require('./helpers').skipEmptyLines;
 var isEmpty         = require('./helpers').isEmpty;
 var rules = [];
@ -110,7 +111,7 @@ LexerBlock.prototype.after = function (name, fn) {
 // Generate tokens for input range
 //
-LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
+LexerBlock.prototype.tokenize = function (state, startLine, endLine, stopOnTwoNewlines) {
  var ok, i,
      rules = this.rules,
      len = this.rules.length,
@ -118,8 +119,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
      hasEmptyLines = false;
  while (line < endLine) {
-    line = skipEmptyLines(state, line, endLine);
+    state.line = line = skipEmptyLines(state, line, endLine);
    state.line = line;
    if (line >= endLine) { break; }
    // Try all possible rules.
@ -139,7 +139,16 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
    if (line === state.line) {
      throw new Error('None of rules updated state.line');
    }
    line = state.line;
    if (line < endLine && isEmpty(state, line)) {
      hasEmptyLines = true;
      line++;
      // two empty lines should stop the parser
      if (line < endLine && stopOnTwoNewlines && isEmpty(state, line)) { break; }
    }
  }
  state.tight = !hasEmptyLines;
--- a/lib/lexer_block/blockquote.js
+++ b/lib/lexer_block/blockquote.js
@ -27,23 +27,21 @@ module.exports = function blockquote(state, startLine, endLine, silent) {
  lineMax = state.lineMax;
  insideLines = 1;
  state.tokens.push({ type: 'blockquote_open' });
-  for (nextLine = startLine + 1; nextLine < lineMax; ) {
+  nextLine = startLine + 1;
  for (;;) {
    if (nextLine < lineMax) {
      pos = state.bMarks[nextLine] + state.tShift[nextLine];
      max = state.eMarks[nextLine];
      if (pos < max && state.src.charCodeAt(pos++) === 0x3E/* > */) {
-      if (nextLine < endLine) {
+        if (nextLine >= endLine) { break; }
        nextLine++;
        insideLines++;
        continue;
      } else {
        break;
      }
    }
-    if (insideLines === 0) {
+    if (insideLines === 0) { break; }
      break;
    }
    while (nextLine < lineMax) {
      if (isEmpty(state, nextLine)) { break; }
--- a/lib/lexer_block/list.js
+++ b/lib/lexer_block/list.js
@ -77,8 +77,7 @@ function skipOrderedListMarker(state, startLine) {
 module.exports = function list(state, startLine, endLine, silent) {
-  var line,
+  var nextLine,
      nextLine,
      indent,
      start,
      posAfterMarker,
@ -86,13 +85,14 @@ module.exports = function list(state, startLine, endLine, silent) {
      indentAfterMarker,
      markerValue,
      isOrdered,
-      lastNonEmptyLine,
+      lastLine,
      hasNextItem,
      subState,
-      posNext,
+      subString,
      contentStart,
      listTokIdx,
-      rules_named = state.lexerBlock.rules_named;
+      lineMax,
      endOfList;
      //rules_named = state.lexerBlock.rules_named;
  // Detect list type and position after marker
  if ((posAfterMarker = skipOrderedListMarker(state, startLine)) >= 0) {
@ -130,13 +130,21 @@ module.exports = function list(state, startLine, endLine, silent) {
  // Iterate list items
  //
-  line = startLine;
+  nextLine = startLine;
-  nextLine = line + 1;
+  lineMax = state.lineMax;
  endOfList = false;
-  while (line < endLine) {
+  while (nextLine < endLine && !endOfList) {
    if (isOrdered) {
      posAfterMarker = skipOrderedListMarker(state, nextLine);
      if (posAfterMarker < 0) { break; }
    } else {
      posAfterMarker = skipBulletListMarker(state, nextLine);
      if (posAfterMarker < 0) { break; }
    }
    contentStart = skipSpaces(state, posAfterMarker);
-    max = state.eMarks[line];
+    max = state.eMarks[nextLine];
    if (contentStart >= max) {
      // trimming space in "-    \n  3" case, indent is 1 here
@ -155,96 +163,70 @@ module.exports = function list(state, startLine, endLine, silent) {
    // "  -  test"
    //  ^^^^^ - calculating total length of this thing
-    indent = (posAfterMarker - state.bMarks[line]) + indentAfterMarker;
+    indent = (posAfterMarker - state.bMarks[nextLine]) + indentAfterMarker;
    //
    // Scan lines inside list items
    //
-    lastNonEmptyLine = line;
+    lastLine = startLine;
    hasNextItem = false;
-    for (; nextLine < endLine; nextLine++) {
+    // Run sublexer & write tokens
-      if (isEmpty(state, nextLine)) {
+    state.tokens.push({ type: 'list_item_open' });
        // TODO: check right fenced code block
        // Problem - can be in nested list, should detect indent right
        // two successive newlines end the list
        if (lastNonEmptyLine < nextLine - 1) { break; }
        continue;
      }
    nextLine++;
    for (;;) {
      // if this line is indented more than with N spaces,
      // it's the new paragraph of the same list item
-      if (state.tShift[nextLine] >= indent) {
+      if (nextLine < lineMax) {
-        lastNonEmptyLine = nextLine;
+        if (isEmpty(state, nextLine)) {
          nextLine++;
          continue;
        }
-
+        if (state.tShift[nextLine] >= indent) {
-      // paragraph after linebreak - not a continuation
+          if (nextLine < endLine) { lastLine = nextLine; }
-      if (lastNonEmptyLine < nextLine - 1) { break; }
+          nextLine++;
-
+          continue;
      //
      // if we are here, then next line is not empty and not last.
      //
      // Check that list is not terminated with another block type
      if (rules_named.fences(state, nextLine, endLine, true)) { break; }
      if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
      if (rules_named.hr(state, nextLine, endLine, true)) { break; }
      //////////////////////////////////////////////////////////////////////////
      // In other block types this check (block ot the same type) is skipped.
      // check if next item of the same type exists,
      // and remember the new position after marker
      if (isOrdered) {
        posNext = skipOrderedListMarker(state, nextLine);
      } else {
        posNext = skipBulletListMarker(state, nextLine);
        }
      if (posNext >= 0) {
        hasNextItem = true;
        break;
      }
      // Another type of list item - need to terminate this list.
      if (rules_named.list(state, nextLine, endLine, true)) { break; }
      //////////////////////////////////////////////////////////////////////////
      if (lastLine < 0) { break; }
-      if (rules_named.heading(state, nextLine, endLine, true)) { break; }
+      subString = state.src.slice(contentStart, state.eMarks[lastLine])
-      if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
+                    .replace(RegExp('^ {' + indent + '}', 'mg'), '');
-      if (rules_named.table(state, nextLine, endLine, true)) { break; }
+      if (lastLine < lineMax) {
-      //if (rules_named.tag(state, nextLine, endLine, true)) { break; }
+        // TODO: we should slice up to next empty line, not up to the end of the document
-      //if (rules_named.def(state, nextLine, endLine, true)) { break; }
+        // (or even better - up to the next valid token)
-
+        //
-      lastNonEmptyLine = nextLine;
+        // This has no impact on the algorithm except for performance
        subString += state.src.slice(state.eMarks[lastLine]);
      }
-    // Run sublexer & write tokens
+      subState = state.clone(subString);
-    state.tokens.push({ type: 'list_item_open' });
+      state.lexerBlock.tokenize(subState, 0, lastLine - startLine + 1, true);
      nextLine = startLine = subState.line + startLine;
      lastLine = -1;
      contentStart = state.eMarks[startLine];
      // TODO: need to detect loose type.
      // Problem: blocks. separated by empty lines can be member of sublists.
    subState = state.clone(state.src.slice(
                                      contentStart,
                                      state.eMarks[lastNonEmptyLine])
                                    .replace(RegExp('^ {1,' + indent + '}', 'mg'), ''));
    state.lexerBlock.tokenize(subState, 0, subState.lineMax);
      // If any of list item is loose, mark list as loose
      if (!subState.tight) {
        state.tokens[listTokIdx].tight = false;
      }
-    state.tokens.push({ type: 'list_item_close' });
+      if (nextLine >= endLine) { break; }
    if (!hasNextItem) { break; }
-    posAfterMarker = posNext;
+      if (isEmpty(state, nextLine)) {
    line = nextLine;
        nextLine++;
        if (nextLine >= endLine || isEmpty(state, nextLine)) {
          // two newlines end the list
          break;
        }
      }
    }
    state.tokens.push({ type: 'list_item_close' });
  }
  // Finilize list