Browse Source

Better algorithm for parsing lists

pull/14/head
Alex Kocharin 10 years ago
committed by Vitaly Puzrin
parent
commit
60f36941fa
  1. 15
      lib/lexer_block.js
  2. 20
      lib/lexer_block/blockquote.js
  3. 146
      lib/lexer_block/list.js

15
lib/lexer_block.js

@ -5,6 +5,7 @@
var skipEmptyLines = require('./helpers').skipEmptyLines;
var isEmpty = require('./helpers').isEmpty;
var rules = [];
@ -110,7 +111,7 @@ LexerBlock.prototype.after = function (name, fn) {
// Generate tokens for input range
//
LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
LexerBlock.prototype.tokenize = function (state, startLine, endLine, stopOnTwoNewlines) {
var ok, i,
rules = this.rules,
len = this.rules.length,
@ -118,8 +119,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
hasEmptyLines = false;
while (line < endLine) {
line = skipEmptyLines(state, line, endLine);
state.line = line;
state.line = line = skipEmptyLines(state, line, endLine);
if (line >= endLine) { break; }
// Try all possible rules.
@ -139,7 +139,16 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
if (line === state.line) {
throw new Error('None of rules updated state.line');
}
line = state.line;
if (line < endLine && isEmpty(state, line)) {
hasEmptyLines = true;
line++;
// two empty lines should stop the parser
if (line < endLine && stopOnTwoNewlines && isEmpty(state, line)) { break; }
}
}
state.tight = !hasEmptyLines;

20
lib/lexer_block/blockquote.js

@ -27,23 +27,21 @@ module.exports = function blockquote(state, startLine, endLine, silent) {
lineMax = state.lineMax;
insideLines = 1;
state.tokens.push({ type: 'blockquote_open' });
for (nextLine = startLine + 1; nextLine < lineMax; ) {
pos = state.bMarks[nextLine] + state.tShift[nextLine];
max = state.eMarks[nextLine];
if (pos < max && state.src.charCodeAt(pos++) === 0x3E/* > */) {
if (nextLine < endLine) {
nextLine = startLine + 1;
for (;;) {
if (nextLine < lineMax) {
pos = state.bMarks[nextLine] + state.tShift[nextLine];
max = state.eMarks[nextLine];
if (pos < max && state.src.charCodeAt(pos++) === 0x3E/* > */) {
if (nextLine >= endLine) { break; }
nextLine++;
insideLines++;
continue;
} else {
break;
}
}
if (insideLines === 0) {
break;
}
if (insideLines === 0) { break; }
while (nextLine < lineMax) {
if (isEmpty(state, nextLine)) { break; }

146
lib/lexer_block/list.js

@ -3,8 +3,8 @@
'use strict';
var isEmpty = require('../helpers').isEmpty;
var skipSpaces = require('../helpers').skipSpaces;
var isEmpty = require('../helpers').isEmpty;
var skipSpaces = require('../helpers').skipSpaces;
// Search `[-+*][\n ]`, returns next pos arter marker on success
@ -77,8 +77,7 @@ function skipOrderedListMarker(state, startLine) {
module.exports = function list(state, startLine, endLine, silent) {
var line,
nextLine,
var nextLine,
indent,
start,
posAfterMarker,
@ -86,13 +85,14 @@ module.exports = function list(state, startLine, endLine, silent) {
indentAfterMarker,
markerValue,
isOrdered,
lastNonEmptyLine,
hasNextItem,
lastLine,
subState,
posNext,
subString,
contentStart,
listTokIdx,
rules_named = state.lexerBlock.rules_named;
lineMax,
endOfList;
//rules_named = state.lexerBlock.rules_named;
// Detect list type and position after marker
if ((posAfterMarker = skipOrderedListMarker(state, startLine)) >= 0) {
@ -130,13 +130,21 @@ module.exports = function list(state, startLine, endLine, silent) {
// Iterate list items
//
line = startLine;
nextLine = line + 1;
nextLine = startLine;
lineMax = state.lineMax;
endOfList = false;
while (line < endLine) {
while (nextLine < endLine && !endOfList) {
if (isOrdered) {
posAfterMarker = skipOrderedListMarker(state, nextLine);
if (posAfterMarker < 0) { break; }
} else {
posAfterMarker = skipBulletListMarker(state, nextLine);
if (posAfterMarker < 0) { break; }
}
contentStart = skipSpaces(state, posAfterMarker);
max = state.eMarks[line];
max = state.eMarks[nextLine];
if (contentStart >= max) {
// trimming space in "- \n 3" case, indent is 1 here
@ -155,96 +163,70 @@ module.exports = function list(state, startLine, endLine, silent) {
// " - test"
// ^^^^^ - calculating total length of this thing
indent = (posAfterMarker - state.bMarks[line]) + indentAfterMarker;
indent = (posAfterMarker - state.bMarks[nextLine]) + indentAfterMarker;
//
// Scan lines inside list items
//
lastNonEmptyLine = line;
hasNextItem = false;
for (; nextLine < endLine; nextLine++) {
if (isEmpty(state, nextLine)) {
// TODO: check right fenced code block
// Problem - can be in nested list, should detect indent right
lastLine = startLine;
// two successive newlines end the list
if (lastNonEmptyLine < nextLine - 1) { break; }
continue;
}
// Run sublexer & write tokens
state.tokens.push({ type: 'list_item_open' });
nextLine++;
for (;;) {
// if this line is indented more than with N spaces,
// it's the new paragraph of the same list item
if (state.tShift[nextLine] >= indent) {
lastNonEmptyLine = nextLine;
continue;
if (nextLine < lineMax) {
if (isEmpty(state, nextLine)) {
nextLine++;
continue;
}
if (state.tShift[nextLine] >= indent) {
if (nextLine < endLine) { lastLine = nextLine; }
nextLine++;
continue;
}
}
// paragraph after linebreak - not a continuation
if (lastNonEmptyLine < nextLine - 1) { break; }
//
// if we are here, then next line is not empty and not last.
//
if (lastLine < 0) { break; }
// Check that list is not terminated with another block type
if (rules_named.fences(state, nextLine, endLine, true)) { break; }
if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
if (rules_named.hr(state, nextLine, endLine, true)) { break; }
//////////////////////////////////////////////////////////////////////////
// In other block types this check (block ot the same type) is skipped.
// check if next item of the same type exists,
// and remember the new position after marker
if (isOrdered) {
posNext = skipOrderedListMarker(state, nextLine);
} else {
posNext = skipBulletListMarker(state, nextLine);
}
if (posNext >= 0) {
hasNextItem = true;
break;
subString = state.src.slice(contentStart, state.eMarks[lastLine])
.replace(RegExp('^ {' + indent + '}', 'mg'), '');
if (lastLine < lineMax) {
// TODO: we should slice up to next empty line, not up to the end of the document
// (or even better - up to the next valid token)
//
// This has no impact on the algorithm except for performance
subString += state.src.slice(state.eMarks[lastLine]);
}
// Another type of list item - need to terminate this list.
if (rules_named.list(state, nextLine, endLine, true)) { break; }
//////////////////////////////////////////////////////////////////////////
subState = state.clone(subString);
state.lexerBlock.tokenize(subState, 0, lastLine - startLine + 1, true);
nextLine = startLine = subState.line + startLine;
lastLine = -1;
contentStart = state.eMarks[startLine];
// TODO: need to detect loose type.
// Problem: blocks. separated by empty lines can be member of sublists.
if (rules_named.heading(state, nextLine, endLine, true)) { break; }
if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
if (rules_named.table(state, nextLine, endLine, true)) { break; }
//if (rules_named.tag(state, nextLine, endLine, true)) { break; }
//if (rules_named.def(state, nextLine, endLine, true)) { break; }
lastNonEmptyLine = nextLine;
}
// Run sublexer & write tokens
state.tokens.push({ type: 'list_item_open' });
// TODO: need to detect loose type.
// Problem: blocks. separated by empty lines can be member of sublists.
// If any of list item is loose, mark list as loose
if (!subState.tight) {
state.tokens[listTokIdx].tight = false;
}
subState = state.clone(state.src.slice(
contentStart,
state.eMarks[lastNonEmptyLine])
.replace(RegExp('^ {1,' + indent + '}', 'mg'), ''));
state.lexerBlock.tokenize(subState, 0, subState.lineMax);
if (nextLine >= endLine) { break; }
// If any of list item is loose, mark list as loose
if (!subState.tight) {
state.tokens[listTokIdx].tight = false;
if (isEmpty(state, nextLine)) {
nextLine++;
if (nextLine >= endLine || isEmpty(state, nextLine)) {
// two newlines end the list
break;
}
}
}
state.tokens.push({ type: 'list_item_close' });
if (!hasNextItem) { break; }
posAfterMarker = posNext;
line = nextLine;
nextLine++;
}
// Finilize list

Loading…
Cancel
Save