Browse Source

lhead, fences, indent cache

pull/14/head
Vitaly Puzrin 10 years ago
parent
commit
9c99ff47a1
  1. 26
      index.js
  2. 274
      lib/lexer_block.js
  3. 25
      lib/renderer.js

26
index.js

@ -9,7 +9,7 @@ var LexerInline = require('./lib/lexer_inline');
// Parser state class
//
function State(src, lexerBlock, lexerInline, renderer, options) {
var ch, s, start, pos, len;
var ch, s, start, pos, len, indent, indent_found;
// TODO: Temporary solution. Check if more effective possible,
// withous str change
@ -37,17 +37,33 @@ function State(src, lexerBlock, lexerInline, renderer, options) {
this.tokens = [];
this.bMarks = []; // lines begin/end markers for fast jumps
this.eMarks = []; //
this.bMarks = []; // line begin offsets for fast jumps
this.eMarks = []; // line end offsets for fast jumps
this.tShift = []; // indent for each line
// Generate markers.
s = this.src;
for(start = pos = 0, len = s.length; pos < len; pos++) {
indent = 0;
indent_found = false;
for(start = pos = indent = 0, len = s.length; pos < len; pos++) {
ch = s.charCodeAt(pos);
// TODO: check other spaces and tabs too or keep existing regexp replace ??
if (!indent_found && ch === 0x20/* space */) {
indent++;
}
if (!indent_found && ch !== 0x20/* space */) {
this.tShift.push(indent);
indent_found = true;
}
if (ch === 0x0D || ch === 0x0A) {
this.bMarks.push(start);
this.eMarks.push(pos);
indent_found = false;
indent = 0;
start = pos + 1;
}
if (ch === 0x0D && pos < len && s.charCodeAt(pos) === 0x0A) {
@ -58,6 +74,7 @@ function State(src, lexerBlock, lexerInline, renderer, options) {
if (ch !== 0x0D || ch !== 0x0A) {
this.bMarks.push(start);
this.eMarks.push(len);
this.tShift.push(indent);
}
// inline lexer variables
@ -71,7 +88,6 @@ function State(src, lexerBlock, lexerInline, renderer, options) {
// renderer
this.result = '';
}

274
lib/lexer_block.js

@ -16,22 +16,12 @@ function isWhiteSpace(ch) {
// Check if line from `pos` is empty or contains spaces only
function isEmpty(state, line) {
var ch, pos = state.bMarks[line], max = state.src.length;
while (pos < max) {
ch = state.src.charCodeAt(pos++);
if (ch === 0x0A || ch === 0x0D) { return true; }
if (!isWhiteSpace(ch)) { return false; }
}
return true; // EOL reached
return state.bMarks[line] + state.tShift[line] >= state.eMarks[line];
}
// Return absolute position of char with default indent an given line,
// or -1 if no requested indent
function getIndent(state, line, indent) {
/*function getIndent(state, line, indent) {
var ch, pos, max;
if (line >= state.lineMax) { return -1; }
@ -41,7 +31,6 @@ function getIndent(state, line, indent) {
while (pos < max && indent > 0) {
ch = state.src.charCodeAt(pos++);
if (ch === 0x09) { indent -= 4; continue; }
if (isWhiteSpace(ch)) { indent--; continue; }
return -1;
}
@ -49,18 +38,38 @@ function getIndent(state, line, indent) {
if (indent > 0) { return -1; }
return pos;
}
}*/
// Skip empty lines, starting from `state.line`
// Seek first non empty line from given one and return it's number
function skipEmptyLines(state, from) {
while (from < state.lineMax) {
if (!isEmpty(state, from)) {
state.line = from;
return;
}
from++;
for (var max = state.lineMax; from < max; from++) {
if (!isEmpty(state, from)) { break; }
}
return from;
}
// Skip spaces from given position. Returns new position
function skipSpaces(state, pos) {
for (var max = state.src.length; pos < max; pos++) {
if (!isWhiteSpace(state.src.charCodeAt(pos))) { break; }
}
return pos;
}
// Skip char codes from given position
function skipChars(state, pos, code) {
for (var max = state.src.length; pos < max; pos++) {
if (code !== state.src.charCodeAt(pos)) { break; }
}
state.line = from;
return pos;
}
// Skip char codes reverse from given position
function skipCharsBack(state, pos, code, min) {
for (; pos >= min; pos--) {
if (code !== state.src.charCodeAt(pos)) { break; }
}
return pos;
}
@ -70,11 +79,11 @@ function skipEmptyLines(state, from) {
var rules = [];
// code
// code (4 spaced padded)
rules.push(function code(state, startLine, endLine, silent) {
var nextLine, last;
if (getIndent(state, startLine, 4) === -1) { return false; }
if (state.tShift[startLine] < 4) { return false; }
last = nextLine = startLine + 1;
@ -86,7 +95,7 @@ rules.push(function code(state, startLine, endLine, silent) {
}
continue;
}
if (getIndent(state, nextLine, 4) !== -1) {
if (state.tShift[nextLine] >= 4) {
nextLine++;
last = nextLine;
continue;
@ -107,18 +116,92 @@ rules.push(function code(state, startLine, endLine, silent) {
});
// heading
// fences (``` lang, ~~~ lang)
rules.push(function fences(state, startLine, endLine, silent) {
var marker, len, params, nextLine,
pos = state.bMarks[startLine] + state.tShift[startLine],
max = state.eMarks[startLine];
if (pos + 3 > max) { return false; }
marker = state.src.charCodeAt(pos);
if (marker !== 0x7E/* ~ */ && marker !== 0x60 /* ` */) {
return false;
}
// scan marker length
len = 1;
while (state.src.charCodeAt(++pos) === marker) {
len++;
}
if (len < 3) { return false; }
params = state.src.slice(pos, max).trim();
if (!/\S/.test(params)) { return false; }
// search end of block
nextLine = startLine;
do {
nextLine++;
if (nextLine > endLine) { return false; }
pos = state.bMarks[nextLine] + state.tShift[nextLine];
max = state.eMarks[nextLine];
if (pos + 3 > max) { continue; }
// check markers
if (state.src.charCodeAt(pos) !== marker &&
state.src.charCodeAt(pos + 1) !== marker &&
state.src.charCodeAt(pos + 2) !== marker) {
continue;
}
pos += 3;
// make sure tail has spaces only
//pos = pos < max ? skipSpaces(state, pos) : pos;
// stmd allow any combonation of markers and spaces in tail
if (pos < max) { continue; }
// found!
break;
} while (true);
if (silent) { return true; }
state.tokens.push({
type: 'fence',
params: params.split(/\s+/g),
startLine: startLine + 1,
endLine: nextLine
});
state.line = skipEmptyLines(state, nextLine + 1);
return true;
});
// heading (#, ##, ...)
rules.push(function heading(state, startLine, endLine, silent) {
var ch, level,
pos = state.bMarks[startLine],
max = state.eMarks[startLine];
max = state.eMarks[startLine],
start = pos;
ch = state.src.charCodeAt(pos);
pos += state.tShift[startLine];
// skip leading spaces
while (isWhiteSpace(ch) && pos < max) {
ch = state.src.charCodeAt(++pos);
}
if (pos >= max) { return false; }
ch = state.src.charCodeAt(pos);
if (ch !== 0x23/* # */ || pos >= max) { return false; }
@ -130,15 +213,10 @@ rules.push(function heading(state, startLine, endLine, silent) {
ch = state.src.charCodeAt(++pos);
}
if (!isWhiteSpace(ch) || pos >= max || level > 6) { return false; }
if (level > 6 || (pos < max && !isWhiteSpace(ch))) { return false; }
// skip spaces before heading text
ch = state.src.charCodeAt(++pos);
while (isWhiteSpace(ch) && pos < max) {
ch = state.src.charCodeAt(++pos);
}
if (pos >= max) { return false; }
pos = pos < max ? skipSpaces(state, pos) : pos;
// Now pos contains offset of first heared char
// Let's cut tails like ' ### ' from the end of string
@ -146,15 +224,15 @@ rules.push(function heading(state, startLine, endLine, silent) {
max--;
ch = state.src.charCodeAt(max);
while (isWhiteSpace(ch) && max > pos) {
while (max > start && isWhiteSpace(ch)) {
ch = state.src.charCodeAt(--max);
}
if (ch === 0x23/* # */) {
while (ch === 0x23/* # */ && max > pos) {
while (max > start && ch === 0x23/* # */) {
ch = state.src.charCodeAt(--max);
}
if (isWhiteSpace(ch)) {
while (isWhiteSpace(ch) && max > pos) {
while (max > start && isWhiteSpace(ch)) {
ch = state.src.charCodeAt(--max);
}
} else if (ch === 0x5C/* \ */) {
@ -165,81 +243,92 @@ rules.push(function heading(state, startLine, endLine, silent) {
if (silent) { return true; }
if (silent) {
return true;
}
state.tokens.push({ type: 'heading_open', level: level });
state.lexerInline.tokenize(state, pos, max);
// only if header is not empty
if (pos < max) {
state.lexerInline.tokenize(state, pos, max);
}
state.tokens.push({ type: 'heading_close', level: level });
skipEmptyLines(state, ++startLine);
state.line = skipEmptyLines(state, ++startLine);
return true;
});
// lheading (---, ===)
rules.push(function lheading(state, startLine, endLine, silent) {
var marker, pos, mem, max,
next = startLine + 1;
if (next >= state.lineMax) { return false; }
// Scan next line
pos = state.bMarks[next] + state.tShift[next];
max = state.eMarks[next];
if (pos + 3 > max) { return false; }
marker = state.src.charCodeAt(pos);
if (marker !== 0x2D/* - */ && marker !== 0x3D/* = */) { return false; }
mem = pos;
pos = skipChars(state, pos, marker);
if (pos - mem < 3) { return false; }
pos = skipSpaces(state, pos);
if (pos < max) { return false; }
state.tokens.push({ type: 'heading_open', level: marker === 0x3D/* = */ ? 1 : 2 });
state.lexerInline.tokenize(state, state.bMarks[startLine], state.eMarks[startLine]);
state.tokens.push({ type: 'heading_close', level: marker === 0x3D/* = */ ? 1 : 2 });
state.line = skipEmptyLines(state, ++next);
return true;
});
// Horizontal rule
rules.push(function hr(state, startLine, endLine, silent) {
var ch, marker,
var marker, cnt, ch,
pos = state.bMarks[startLine],
space_max = pos + 3,
max = state.eMarks[startLine];
ch = state.src.charCodeAt(pos);
// should not have > 3 leading spaces
if (state.tShift[startLine] > 3) { return false; }
// quick test first char
if (!isWhiteSpace(ch) &&
ch !== 0x2A/* * */ &&
ch !== 0x2D/* - */ &&
ch !== 0x5F/* _ */) {
return false;
}
pos += state.tShift[startLine];
// skip up to 3 leading spaces
while (isWhiteSpace(ch) && pos < max && pos < space_max) {
pos++;
ch = state.src.charCodeAt(pos);
}
if (pos > max) { return false; }
marker = state.src.charCodeAt(pos++);
// Check hr marker
if (ch !== 0x2A/* * */ &&
ch !== 0x2D/* - */ &&
ch !== 0x5F/* _ */) {
if (marker !== 0x2A/* * */ &&
marker !== 0x2D/* - */ &&
marker !== 0x5F/* _ */) {
return false;
}
// remember marker type
marker = ch;
if (pos + 2 < max &&
state.src.charCodeAt(pos + 1) === marker &&
state.src.charCodeAt(pos + 2) === marker) {
// Style 1: ***, ---, ___
pos += 3;
} else if (pos + 4 < max &&
isWhiteSpace(state.src.charCodeAt(pos + 1)) &&
state.src.charCodeAt(pos + 2) === marker &&
isWhiteSpace(state.src.charCodeAt(pos + 3)) &&
state.src.charCodeAt(pos + 4) === marker) {
// Style 2: * * *, - - -, _ _ _
pos += 5;
} else {
return false;
}
// markers can be mixed with spaces, but there should be at least 3 one
// check that line tail has spaces only
while(pos < max) {
cnt = 1;
while (pos < max) {
ch = state.src.charCodeAt(pos++);
if (isWhiteSpace(ch)) {
return false;
}
if (ch !== marker && !isWhiteSpace(ch)) { return false; }
if (ch === marker) { cnt++; }
}
if (cnt < 3) { return false; }
if (silent) { return true; }
state.tokens.push({ type: 'hr' });
skipEmptyLines(state, ++startLine);
state.line = skipEmptyLines(state, ++startLine);
return true;
});
@ -252,9 +341,10 @@ rules.push(function paragraph(state, startLine, endLine) {
// jump line-by-line until empty one or EOF
while (nextLine < endLine && !isEmpty(state, nextLine)) {
// Force paragraph termination of next tag found
if (rules_named.fences(state, nextLine, endLine, true)) { break; }
if (rules_named.hr(state, nextLine, endLine, true)) { break; }
if (rules_named.heading(state, nextLine, endLine, true)) { break; }
//if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
//if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
//if (rules_named.tag(state, nextLine, endLine, true)) { break; }
//if (rules_named.def(state, nextLine, endLine, true)) { break; }
@ -269,7 +359,7 @@ rules.push(function paragraph(state, startLine, endLine) {
);
state.tokens.push({ type: 'paragraph_close' });
skipEmptyLines(state, nextLine);
state.line = skipEmptyLines(state, nextLine);
return true;
});

25
lib/renderer.js

@ -5,6 +5,12 @@ function escapeHTML(str) {
return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
}
var MD_UNESCAPE_RE = /\\([!"#$%&\'()*+,.\/:;<=>?@[\\\]^_`{|}~-])/g;
function unescapeMD(str) {
return str.replace(MD_UNESCAPE_RE, '$1');
}
function joinLines(state, begin, end) {
return state.src.slice(
state.bMarks[begin],
@ -16,10 +22,21 @@ var rules = {};
rules.code = function (state, token) {
// TODO: check if we need variable indent cut
var lines = joinLines(state, token.startLine, token.endLine).replace(/^ {4}/gm, '');
var content = joinLines(state, token.startLine, token.endLine).replace(/^ {4}/gm, '');
state.result += '<pre><code>' + escapeHTML(content) + '</code></pre>\n';
};
rules.fence = function (state, token) {
var content = joinLines(state, token.startLine, token.endLine);
var langMark = '';
if (token.params.length) {
langMark = ' class="language-' + escapeHTML(token.params[0]) + '"';
}
state.result += '<pre><code>' + escapeHTML(lines) + '</code></pre>\n';
state.result += '<pre><code' + langMark + '>' + escapeHTML(content) + '</code></pre>\n';
};
@ -45,7 +62,7 @@ rules.paragraph_close = function (state, token) {
rules.text = function (state, token) {
state.result += escapeHTML(state.src.slice(token.begin, token.end));
state.result += escapeHTML(unescapeMD(state.src.slice(token.begin, token.end)));
};

Loading…
Cancel
Save