Browse Source

Better algorithm for parsing lists

pull/14/head
Alex Kocharin 11 years ago
committed by Vitaly Puzrin
parent
commit
60f36941fa
  1. 15
      lib/lexer_block.js
  2. 12
      lib/lexer_block/blockquote.js
  3. 128
      lib/lexer_block/list.js

15
lib/lexer_block.js

@ -5,6 +5,7 @@
var skipEmptyLines = require('./helpers').skipEmptyLines; var skipEmptyLines = require('./helpers').skipEmptyLines;
var isEmpty = require('./helpers').isEmpty;
var rules = []; var rules = [];
@ -110,7 +111,7 @@ LexerBlock.prototype.after = function (name, fn) {
// Generate tokens for input range // Generate tokens for input range
// //
LexerBlock.prototype.tokenize = function (state, startLine, endLine) { LexerBlock.prototype.tokenize = function (state, startLine, endLine, stopOnTwoNewlines) {
var ok, i, var ok, i,
rules = this.rules, rules = this.rules,
len = this.rules.length, len = this.rules.length,
@ -118,8 +119,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
hasEmptyLines = false; hasEmptyLines = false;
while (line < endLine) { while (line < endLine) {
line = skipEmptyLines(state, line, endLine); state.line = line = skipEmptyLines(state, line, endLine);
state.line = line;
if (line >= endLine) { break; } if (line >= endLine) { break; }
// Try all possible rules. // Try all possible rules.
@ -139,7 +139,16 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
if (line === state.line) { if (line === state.line) {
throw new Error('None of rules updated state.line'); throw new Error('None of rules updated state.line');
} }
line = state.line; line = state.line;
if (line < endLine && isEmpty(state, line)) {
hasEmptyLines = true;
line++;
// two empty lines should stop the parser
if (line < endLine && stopOnTwoNewlines && isEmpty(state, line)) { break; }
}
} }
state.tight = !hasEmptyLines; state.tight = !hasEmptyLines;

12
lib/lexer_block/blockquote.js

@ -27,23 +27,21 @@ module.exports = function blockquote(state, startLine, endLine, silent) {
lineMax = state.lineMax; lineMax = state.lineMax;
insideLines = 1; insideLines = 1;
state.tokens.push({ type: 'blockquote_open' }); state.tokens.push({ type: 'blockquote_open' });
for (nextLine = startLine + 1; nextLine < lineMax; ) { nextLine = startLine + 1;
for (;;) {
if (nextLine < lineMax) {
pos = state.bMarks[nextLine] + state.tShift[nextLine]; pos = state.bMarks[nextLine] + state.tShift[nextLine];
max = state.eMarks[nextLine]; max = state.eMarks[nextLine];
if (pos < max && state.src.charCodeAt(pos++) === 0x3E/* > */) { if (pos < max && state.src.charCodeAt(pos++) === 0x3E/* > */) {
if (nextLine < endLine) { if (nextLine >= endLine) { break; }
nextLine++; nextLine++;
insideLines++; insideLines++;
continue; continue;
} else {
break;
} }
} }
if (insideLines === 0) { if (insideLines === 0) { break; }
break;
}
while (nextLine < lineMax) { while (nextLine < lineMax) {
if (isEmpty(state, nextLine)) { break; } if (isEmpty(state, nextLine)) { break; }

128
lib/lexer_block/list.js

@ -77,8 +77,7 @@ function skipOrderedListMarker(state, startLine) {
module.exports = function list(state, startLine, endLine, silent) { module.exports = function list(state, startLine, endLine, silent) {
var line, var nextLine,
nextLine,
indent, indent,
start, start,
posAfterMarker, posAfterMarker,
@ -86,13 +85,14 @@ module.exports = function list(state, startLine, endLine, silent) {
indentAfterMarker, indentAfterMarker,
markerValue, markerValue,
isOrdered, isOrdered,
lastNonEmptyLine, lastLine,
hasNextItem,
subState, subState,
posNext, subString,
contentStart, contentStart,
listTokIdx, listTokIdx,
rules_named = state.lexerBlock.rules_named; lineMax,
endOfList;
//rules_named = state.lexerBlock.rules_named;
// Detect list type and position after marker // Detect list type and position after marker
if ((posAfterMarker = skipOrderedListMarker(state, startLine)) >= 0) { if ((posAfterMarker = skipOrderedListMarker(state, startLine)) >= 0) {
@ -130,13 +130,21 @@ module.exports = function list(state, startLine, endLine, silent) {
// Iterate list items // Iterate list items
// //
line = startLine; nextLine = startLine;
nextLine = line + 1; lineMax = state.lineMax;
endOfList = false;
while (line < endLine) { while (nextLine < endLine && !endOfList) {
if (isOrdered) {
posAfterMarker = skipOrderedListMarker(state, nextLine);
if (posAfterMarker < 0) { break; }
} else {
posAfterMarker = skipBulletListMarker(state, nextLine);
if (posAfterMarker < 0) { break; }
}
contentStart = skipSpaces(state, posAfterMarker); contentStart = skipSpaces(state, posAfterMarker);
max = state.eMarks[line]; max = state.eMarks[nextLine];
if (contentStart >= max) { if (contentStart >= max) {
// trimming space in "- \n 3" case, indent is 1 here // trimming space in "- \n 3" case, indent is 1 here
@ -155,96 +163,70 @@ module.exports = function list(state, startLine, endLine, silent) {
// " - test" // " - test"
// ^^^^^ - calculating total length of this thing // ^^^^^ - calculating total length of this thing
indent = (posAfterMarker - state.bMarks[line]) + indentAfterMarker; indent = (posAfterMarker - state.bMarks[nextLine]) + indentAfterMarker;
// //
// Scan lines inside list items // Scan lines inside list items
// //
lastNonEmptyLine = line; lastLine = startLine;
hasNextItem = false;
for (; nextLine < endLine; nextLine++) { // Run sublexer & write tokens
if (isEmpty(state, nextLine)) { state.tokens.push({ type: 'list_item_open' });
// TODO: check right fenced code block
// Problem - can be in nested list, should detect indent right
// two successive newlines end the list
if (lastNonEmptyLine < nextLine - 1) { break; }
continue;
}
nextLine++;
for (;;) {
// if this line is indented more than with N spaces, // if this line is indented more than with N spaces,
// it's the new paragraph of the same list item // it's the new paragraph of the same list item
if (state.tShift[nextLine] >= indent) { if (nextLine < lineMax) {
lastNonEmptyLine = nextLine; if (isEmpty(state, nextLine)) {
nextLine++;
continue; continue;
} }
if (state.tShift[nextLine] >= indent) {
// paragraph after linebreak - not a continuation if (nextLine < endLine) { lastLine = nextLine; }
if (lastNonEmptyLine < nextLine - 1) { break; } nextLine++;
continue;
//
// if we are here, then next line is not empty and not last.
//
// Check that list is not terminated with another block type
if (rules_named.fences(state, nextLine, endLine, true)) { break; }
if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
if (rules_named.hr(state, nextLine, endLine, true)) { break; }
//////////////////////////////////////////////////////////////////////////
// In other block types this check (block ot the same type) is skipped.
// check if next item of the same type exists,
// and remember the new position after marker
if (isOrdered) {
posNext = skipOrderedListMarker(state, nextLine);
} else {
posNext = skipBulletListMarker(state, nextLine);
} }
if (posNext >= 0) {
hasNextItem = true;
break;
} }
// Another type of list item - need to terminate this list.
if (rules_named.list(state, nextLine, endLine, true)) { break; }
//////////////////////////////////////////////////////////////////////////
if (lastLine < 0) { break; }
if (rules_named.heading(state, nextLine, endLine, true)) { break; } subString = state.src.slice(contentStart, state.eMarks[lastLine])
if (rules_named.lheading(state, nextLine, endLine, true)) { break; } .replace(RegExp('^ {' + indent + '}', 'mg'), '');
if (rules_named.table(state, nextLine, endLine, true)) { break; } if (lastLine < lineMax) {
//if (rules_named.tag(state, nextLine, endLine, true)) { break; } // TODO: we should slice up to next empty line, not up to the end of the document
//if (rules_named.def(state, nextLine, endLine, true)) { break; } // (or even better - up to the next valid token)
//
lastNonEmptyLine = nextLine; // This has no impact on the algorithm except for performance
subString += state.src.slice(state.eMarks[lastLine]);
} }
// Run sublexer & write tokens subState = state.clone(subString);
state.tokens.push({ type: 'list_item_open' }); state.lexerBlock.tokenize(subState, 0, lastLine - startLine + 1, true);
nextLine = startLine = subState.line + startLine;
lastLine = -1;
contentStart = state.eMarks[startLine];
// TODO: need to detect loose type. // TODO: need to detect loose type.
// Problem: blocks. separated by empty lines can be member of sublists. // Problem: blocks. separated by empty lines can be member of sublists.
subState = state.clone(state.src.slice(
contentStart,
state.eMarks[lastNonEmptyLine])
.replace(RegExp('^ {1,' + indent + '}', 'mg'), ''));
state.lexerBlock.tokenize(subState, 0, subState.lineMax);
// If any of list item is loose, mark list as loose // If any of list item is loose, mark list as loose
if (!subState.tight) { if (!subState.tight) {
state.tokens[listTokIdx].tight = false; state.tokens[listTokIdx].tight = false;
} }
state.tokens.push({ type: 'list_item_close' }); if (nextLine >= endLine) { break; }
if (!hasNextItem) { break; }
posAfterMarker = posNext; if (isEmpty(state, nextLine)) {
line = nextLine;
nextLine++; nextLine++;
if (nextLine >= endLine || isEmpty(state, nextLine)) {
// two newlines end the list
break;
}
}
}
state.tokens.push({ type: 'list_item_close' });
} }
// Finilize list // Finilize list

Loading…
Cancel
Save