From 52b4e44de4c39836dbab3ca6903b76e76708e8b1 Mon Sep 17 00:00:00 2001 From: Vitaly Puzrin Date: Sat, 6 Sep 2014 17:39:01 +0400 Subject: [PATCH] Splitted block lexer & index to files --- index.js | 137 +------------ lib/helpers.js | 54 ++++++ lib/lexer_block.js | 365 +---------------------------------- lib/lexer_block/code.js | 42 ++++ lib/lexer_block/fences.js | 79 ++++++++ lib/lexer_block/heading.js | 72 +++++++ lib/lexer_block/hr.js | 48 +++++ lib/lexer_block/lheading.js | 42 ++++ lib/lexer_block/paragraph.js | 38 ++++ lib/parser.js | 58 ++++++ lib/state.js | 89 +++++++++ 11 files changed, 529 insertions(+), 495 deletions(-) create mode 100644 lib/helpers.js create mode 100644 lib/lexer_block/code.js create mode 100644 lib/lexer_block/fences.js create mode 100644 lib/lexer_block/heading.js create mode 100644 lib/lexer_block/hr.js create mode 100644 lib/lexer_block/lheading.js create mode 100644 lib/lexer_block/paragraph.js create mode 100644 lib/parser.js create mode 100644 lib/state.js diff --git a/index.js b/index.js index 34ba3c0..4c412a6 100644 --- a/index.js +++ b/index.js @@ -1,139 +1,4 @@ 'use strict'; -var Renderer = require('./lib/renderer'); -var LexerBlock = require('./lib/lexer_block'); -var LexerInline = require('./lib/lexer_inline'); - - -// Parser state class -// -function State(src, lexerBlock, lexerInline, renderer, options) { - var ch, s, start, pos, len, indent, indent_found; - - // TODO: Temporary solution. Check if more effective possible, - // withous str change - // - // - replace tabs with spaces - // - remove `\r` to simplify newlines check (???) - - this.src = src - .replace(/\t/g, ' ') - .replace(/\r/g, '') - .replace(/\u00a0/g, ' ') - .replace(/\u2424/g, '\n'); - - // Shortcuts to simplify nested calls - this.lexerBlock = lexerBlock; - this.lexerInline = lexerInline; - this.renderer = renderer; - - // TODO: (?) set directly for faster access. - this.options = options; - - // - // Internal state vartiables - // - - this.tokens = []; - - this.bMarks = []; // line begin offsets for fast jumps - this.eMarks = []; // line end offsets for fast jumps - this.tShift = []; // indent for each line - - // Generate markers. - s = this.src; - indent = 0; - indent_found = false; - - for(start = pos = indent = 0, len = s.length; pos < len; pos++) { - ch = s.charCodeAt(pos); - - // TODO: check other spaces and tabs too or keep existing regexp replace ?? - if (!indent_found && ch === 0x20/* space */) { - indent++; - } - if (!indent_found && ch !== 0x20/* space */) { - this.tShift.push(indent); - indent_found = true; - } - - - if (ch === 0x0D || ch === 0x0A) { - this.bMarks.push(start); - this.eMarks.push(pos); - indent_found = false; - indent = 0; - start = pos + 1; - } - if (ch === 0x0D && pos < len && s.charCodeAt(pos) === 0x0A) { - pos++; - start++; - } - } - if (ch !== 0x0D || ch !== 0x0A) { - this.bMarks.push(start); - this.eMarks.push(len); - this.tShift.push(indent); - } - - // inline lexer variables - this.pos = 0; // char index in src - - // block lexer variables - this.blkLevel = 0; - this.blkIndent = 0; - this.line = 0; // line index in src - this.lineMax = this.bMarks.length; - - // renderer - this.result = ''; -} - - -// Main class -// -function Remarkable(options) { - this.options = {}; - this.state = null; - - this.lexerInline = new LexerInline(); - this.lexerBlock = new LexerBlock(); - this.renderer = new Renderer(); - - if (options) { this.set(options); } -} - - -Remarkable.prototype.set = function (options) { - Object.keys(options).forEach(function (key) { - this.options[key] = options[key]; - }, this); -}; - - -Remarkable.prototype.render = function (src) { - - if (!src) { return ''; } - - var state = new State( - src, - this.lexerBlock, - this.lexerInline, - this.renderer, - this.options - ); - - // TODO: skip leading empty lines - - state.lexerBlock.tokenize(state, state.line, state.lineMax); - - // TODO: ??? eat empty paragraphs from tail - - //console.log(state.tokens) - - return this.renderer.render(state); -}; - - -module.exports = Remarkable; +module.exports = require('./lib/parser'); diff --git a/lib/helpers.js b/lib/helpers.js new file mode 100644 index 0000000..b09c958 --- /dev/null +++ b/lib/helpers.js @@ -0,0 +1,54 @@ +// Common functions for lexers + +'use strict'; + + +function isWhiteSpace(ch) { + return ch === 0x20; +} + +// Check if line has zero length or contains spaces only +function isEmpty(state, line) { + return state.bMarks[line] + state.tShift[line] >= state.eMarks[line]; +} + +// Scan lines from given one and return first not empty +function skipEmptyLines(state, from) { + for (var max = state.lineMax; from < max; from++) { + if (state.bMarks[from] + state.tShift[from] < state.eMarks[from]) { + break; + } + } + return from; +} + +// Skip spaces from given position. +function skipSpaces(state, pos) { + for (var max = state.src.length; pos < max; pos++) { + if (!isWhiteSpace(state.src.charCodeAt(pos))) { break; } + } + return pos; +} + +// Skip char codes from given position +function skipChars(state, pos, code) { + for (var max = state.src.length; pos < max; pos++) { + if (state.src.charCodeAt(pos) !== code) { break; } + } + return pos; +} + +// Skip char codes reverse from given position +/*function skipCharsBack(state, pos, code, min) { + for (; pos >= min; pos--) { + if (code !== state.src.charCodeAt(pos)) { break; } + } + return pos; +}*/ + + +exports.isWhiteSpace = isWhiteSpace; +exports.isEmpty = isEmpty; +exports.skipEmptyLines = skipEmptyLines; +exports.skipSpaces = skipSpaces; +exports.skipChars = skipChars; \ No newline at end of file diff --git a/lib/lexer_block.js b/lib/lexer_block.js index f61368f..f3f4982 100644 --- a/lib/lexer_block.js +++ b/lib/lexer_block.js @@ -4,368 +4,15 @@ 'use strict'; -//////////////////////////////////////////////////////////////////////////////// -// Helpers - - -// Check if character is white space -function isWhiteSpace(ch) { - // TODO: check other spaces and tabs - return ch === 0x20; -} - -// Check if line from `pos` is empty or contains spaces only -function isEmpty(state, line) { - return state.bMarks[line] + state.tShift[line] >= state.eMarks[line]; -} - -// Return absolute position of char with default indent an given line, -// or -1 if no requested indent -/*function getIndent(state, line, indent) { - var ch, pos, max; - - if (line >= state.lineMax) { return -1; } - - pos = state.bMarks[line]; - max = state.eMarks[line]; - - while (pos < max && indent > 0) { - ch = state.src.charCodeAt(pos++); - if (isWhiteSpace(ch)) { indent--; continue; } - return -1; - } - - if (indent > 0) { return -1; } - - return pos; -}*/ - -// Seek first non empty line from given one and return it's number -function skipEmptyLines(state, from) { - for (var max = state.lineMax; from < max; from++) { - if (!isEmpty(state, from)) { break; } - } - return from; -} - -// Skip spaces from given position. Returns new position -function skipSpaces(state, pos) { - for (var max = state.src.length; pos < max; pos++) { - if (!isWhiteSpace(state.src.charCodeAt(pos))) { break; } - } - return pos; -} - -// Skip char codes from given position -function skipChars(state, pos, code) { - for (var max = state.src.length; pos < max; pos++) { - if (code !== state.src.charCodeAt(pos)) { break; } - } - return pos; -} - -// Skip char codes reverse from given position -function skipCharsBack(state, pos, code, min) { - for (; pos >= min; pos--) { - if (code !== state.src.charCodeAt(pos)) { break; } - } - return pos; -} - - -//////////////////////////////////////////////////////////////////////////////// -// Lexer rules - var rules = []; +rules.push(require('./lexer_block/code')); +rules.push(require('./lexer_block/fences')); +rules.push(require('./lexer_block/heading')); +rules.push(require('./lexer_block/lheading')); +rules.push(require('./lexer_block/hr')); +rules.push(require('./lexer_block/paragraph')); -// code (4 spaced padded) -rules.push(function code(state, startLine, endLine, silent) { - var nextLine, last; - - if (state.tShift[startLine] < 4) { return false; } - - last = nextLine = startLine + 1; - - while (nextLine < endLine) { - if (isEmpty(state, nextLine)) { - nextLine++; - if (state.options.pedantic) { - last = nextLine; - } - continue; - } - if (state.tShift[nextLine] >= 4) { - nextLine++; - last = nextLine; - continue; - } - break; - } - - if (silent) { return true; } - - state.tokens.push({ - type: 'code', - startLine: startLine, - endLine: last - }); - - state.line = nextLine; - return true; -}); - - -// fences (``` lang, ~~~ lang) -rules.push(function fences(state, startLine, endLine, silent) { - var marker, len, params, nextLine, - pos = state.bMarks[startLine] + state.tShift[startLine], - max = state.eMarks[startLine]; - - if (pos + 3 > max) { return false; } - - marker = state.src.charCodeAt(pos); - - if (marker !== 0x7E/* ~ */ && marker !== 0x60 /* ` */) { - return false; - } - - // scan marker length - len = 1; - while (state.src.charCodeAt(++pos) === marker) { - len++; - } - - if (len < 3) { return false; } - - params = state.src.slice(pos, max).trim(); - - if (!/\S/.test(params)) { return false; } - - // search end of block - nextLine = startLine; - - do { - nextLine++; - - if (nextLine > endLine) { return false; } - - pos = state.bMarks[nextLine] + state.tShift[nextLine]; - max = state.eMarks[nextLine]; - - if (pos + 3 > max) { continue; } - - // check markers - if (state.src.charCodeAt(pos) !== marker && - state.src.charCodeAt(pos + 1) !== marker && - state.src.charCodeAt(pos + 2) !== marker) { - continue; - } - - pos += 3; - - // make sure tail has spaces only - //pos = pos < max ? skipSpaces(state, pos) : pos; - - // stmd allow any combonation of markers and spaces in tail - - if (pos < max) { continue; } - - // found! - break; - - } while (true); - - if (silent) { return true; } - - state.tokens.push({ - type: 'fence', - params: params.split(/\s+/g), - startLine: startLine + 1, - endLine: nextLine - }); - - state.line = skipEmptyLines(state, nextLine + 1); - return true; -}); - - -// heading (#, ##, ...) -rules.push(function heading(state, startLine, endLine, silent) { - var ch, level, - pos = state.bMarks[startLine], - max = state.eMarks[startLine], - start = pos; - - pos += state.tShift[startLine]; - - if (pos >= max) { return false; } - - ch = state.src.charCodeAt(pos); - - if (ch !== 0x23/* # */ || pos >= max) { return false; } - - // count heading level - level = 1; - ch = state.src.charCodeAt(++pos); - while (ch === 0x23/* # */ && pos < max && level <= 6) { - level++; - ch = state.src.charCodeAt(++pos); - } - - if (level > 6 || (pos < max && !isWhiteSpace(ch))) { return false; } - - // skip spaces before heading text - pos = pos < max ? skipSpaces(state, pos) : pos; - - // Now pos contains offset of first heared char - // Let's cut tails like ' ### ' from the end of string - - max--; - ch = state.src.charCodeAt(max); - - while (max > start && isWhiteSpace(ch)) { - ch = state.src.charCodeAt(--max); - } - if (ch === 0x23/* # */) { - while (max > start && ch === 0x23/* # */) { - ch = state.src.charCodeAt(--max); - } - if (isWhiteSpace(ch)) { - while (max > start && isWhiteSpace(ch)) { - ch = state.src.charCodeAt(--max); - } - } else if (ch === 0x5C/* \ */) { - max++; - } - } - max++; - - if (silent) { return true; } - - state.tokens.push({ type: 'heading_open', level: level }); - // only if header is not empty - if (pos < max) { - state.lexerInline.tokenize(state, pos, max); - } - state.tokens.push({ type: 'heading_close', level: level }); - - state.line = skipEmptyLines(state, ++startLine); - return true; -}); - - - -// lheading (---, ===) -rules.push(function lheading(state, startLine, endLine, silent) { - var marker, pos, mem, max, - next = startLine + 1; - - if (next >= state.lineMax) { return false; } - - // Scan next line - pos = state.bMarks[next] + state.tShift[next]; - max = state.eMarks[next]; - - if (pos + 3 > max) { return false; } - - marker = state.src.charCodeAt(pos); - - if (marker !== 0x2D/* - */ && marker !== 0x3D/* = */) { return false; } - - mem = pos; - pos = skipChars(state, pos, marker); - - if (pos - mem < 3) { return false; } - - pos = skipSpaces(state, pos); - - if (pos < max) { return false; } - - state.tokens.push({ type: 'heading_open', level: marker === 0x3D/* = */ ? 1 : 2 }); - state.lexerInline.tokenize(state, state.bMarks[startLine], state.eMarks[startLine]); - state.tokens.push({ type: 'heading_close', level: marker === 0x3D/* = */ ? 1 : 2 }); - - state.line = skipEmptyLines(state, ++next); - return true; -}); - - -// Horizontal rule -rules.push(function hr(state, startLine, endLine, silent) { - var marker, cnt, ch, - pos = state.bMarks[startLine], - max = state.eMarks[startLine]; - - // should not have > 3 leading spaces - if (state.tShift[startLine] > 3) { return false; } - - pos += state.tShift[startLine]; - - if (pos > max) { return false; } - - marker = state.src.charCodeAt(pos++); - - // Check hr marker - if (marker !== 0x2A/* * */ && - marker !== 0x2D/* - */ && - marker !== 0x5F/* _ */) { - return false; - } - - // markers can be mixed with spaces, but there should be at least 3 one - - cnt = 1; - while (pos < max) { - ch = state.src.charCodeAt(pos++); - if (ch !== marker && !isWhiteSpace(ch)) { return false; } - if (ch === marker) { cnt++; } - } - - if (cnt < 3) { return false; } - - if (silent) { return true; } - - state.tokens.push({ type: 'hr' }); - - state.line = skipEmptyLines(state, ++startLine); - return true; -}); - - -// Paragraph -rules.push(function paragraph(state, startLine, endLine) { - var nextLine = startLine + 1, - rules_named = state.lexerBlock.rules_named; - - // jump line-by-line until empty one or EOF - while (nextLine < endLine && !isEmpty(state, nextLine)) { - // Force paragraph termination of next tag found - if (rules_named.fences(state, nextLine, endLine, true)) { break; } - if (rules_named.hr(state, nextLine, endLine, true)) { break; } - if (rules_named.heading(state, nextLine, endLine, true)) { break; } - if (rules_named.lheading(state, nextLine, endLine, true)) { break; } - //if (rules_named.blockquote(state, nextLine, endLine, true)) { break; } - //if (rules_named.tag(state, nextLine, endLine, true)) { break; } - //if (rules_named.def(state, nextLine, endLine, true)) { break; } - nextLine++; - } - - state.tokens.push({ type: 'paragraph_open' }); - state.lexerInline.tokenize( - state, - state.bMarks[startLine], - state.eMarks[nextLine - 1] - ); - state.tokens.push({ type: 'paragraph_close' }); - - state.line = skipEmptyLines(state, nextLine); - return true; -}); - - -//////////////////////////////////////////////////////////////////////////////// -// Lexer class function functionName(fn) { var ret = fn.toString(); diff --git a/lib/lexer_block/code.js b/lib/lexer_block/code.js new file mode 100644 index 0000000..bd01948 --- /dev/null +++ b/lib/lexer_block/code.js @@ -0,0 +1,42 @@ +// Code block (4 spaces padded) + +'use strict'; + + +var isEmpty = require('../helpers').isEmpty; + + +module.exports = function code(state, startLine, endLine, silent) { + var nextLine, last; + + if (state.tShift[startLine] < 4) { return false; } + + last = nextLine = startLine + 1; + + while (nextLine < endLine) { + if (isEmpty(state, nextLine)) { + nextLine++; + if (state.options.pedantic) { + last = nextLine; + } + continue; + } + if (state.tShift[nextLine] >= 4) { + nextLine++; + last = nextLine; + continue; + } + break; + } + + if (silent) { return true; } + + state.tokens.push({ + type: 'code', + startLine: startLine, + endLine: last + }); + + state.line = nextLine; + return true; +}; \ No newline at end of file diff --git a/lib/lexer_block/fences.js b/lib/lexer_block/fences.js new file mode 100644 index 0000000..8bd246a --- /dev/null +++ b/lib/lexer_block/fences.js @@ -0,0 +1,79 @@ +// fences (``` lang, ~~~ lang) + +'use strict'; + + +var skipEmptyLines = require('../helpers').skipEmptyLines; + + +module.exports =function fences(state, startLine, endLine, silent) { + var marker, len, params, nextLine, + pos = state.bMarks[startLine] + state.tShift[startLine], + max = state.eMarks[startLine]; + + if (pos + 3 > max) { return false; } + + marker = state.src.charCodeAt(pos); + + if (marker !== 0x7E/* ~ */ && marker !== 0x60 /* ` */) { + return false; + } + + // scan marker length + len = 1; + while (state.src.charCodeAt(++pos) === marker) { + len++; + } + + if (len < 3) { return false; } + + params = state.src.slice(pos, max).trim(); + + if (!/\S/.test(params)) { return false; } + + // search end of block + nextLine = startLine; + + do { + nextLine++; + + if (nextLine > endLine) { return false; } + + pos = state.bMarks[nextLine] + state.tShift[nextLine]; + max = state.eMarks[nextLine]; + + if (pos + 3 > max) { continue; } + + // check markers + if (state.src.charCodeAt(pos) !== marker && + state.src.charCodeAt(pos + 1) !== marker && + state.src.charCodeAt(pos + 2) !== marker) { + continue; + } + + pos += 3; + + // make sure tail has spaces only + //pos = pos < max ? skipSpaces(state, pos) : pos; + + // stmd allow any combonation of markers and spaces in tail + + if (pos < max) { continue; } + + // found! + break; + + } while (true); + + if (silent) { return true; } + + state.tokens.push({ + type: 'fence', + params: params.split(/\s+/g), + startLine: startLine + 1, + endLine: nextLine + }); + + state.line = skipEmptyLines(state, nextLine + 1); + return true; +}; \ No newline at end of file diff --git a/lib/lexer_block/heading.js b/lib/lexer_block/heading.js new file mode 100644 index 0000000..8bd8078 --- /dev/null +++ b/lib/lexer_block/heading.js @@ -0,0 +1,72 @@ +// heading (#, ##, ...) + +'use strict'; + + +var isWhiteSpace = require('../helpers').isWhiteSpace; +var skipEmptyLines = require('../helpers').skipEmptyLines; +var skipSpaces = require('../helpers').skipSpaces; + + +module.exports = function heading(state, startLine, endLine, silent) { + var ch, level, + pos = state.bMarks[startLine], + max = state.eMarks[startLine], + start = pos; + + pos += state.tShift[startLine]; + + if (pos >= max) { return false; } + + ch = state.src.charCodeAt(pos); + + if (ch !== 0x23/* # */ || pos >= max) { return false; } + + // count heading level + level = 1; + ch = state.src.charCodeAt(++pos); + while (ch === 0x23/* # */ && pos < max && level <= 6) { + level++; + ch = state.src.charCodeAt(++pos); + } + + if (level > 6 || (pos < max && !isWhiteSpace(ch))) { return false; } + + // skip spaces before heading text + pos = pos < max ? skipSpaces(state, pos) : pos; + + // Now pos contains offset of first heared char + // Let's cut tails like ' ### ' from the end of string + + max--; + ch = state.src.charCodeAt(max); + + while (max > start && isWhiteSpace(ch)) { + ch = state.src.charCodeAt(--max); + } + if (ch === 0x23/* # */) { + while (max > start && ch === 0x23/* # */) { + ch = state.src.charCodeAt(--max); + } + if (isWhiteSpace(ch)) { + while (max > start && isWhiteSpace(ch)) { + ch = state.src.charCodeAt(--max); + } + } else if (ch === 0x5C/* \ */) { + max++; + } + } + max++; + + if (silent) { return true; } + + state.tokens.push({ type: 'heading_open', level: level }); + // only if header is not empty + if (pos < max) { + state.lexerInline.tokenize(state, pos, max); + } + state.tokens.push({ type: 'heading_close', level: level }); + + state.line = skipEmptyLines(state, ++startLine); + return true; +}; \ No newline at end of file diff --git a/lib/lexer_block/hr.js b/lib/lexer_block/hr.js new file mode 100644 index 0000000..c31dfd0 --- /dev/null +++ b/lib/lexer_block/hr.js @@ -0,0 +1,48 @@ +// Horizontal rule + +'use strict'; + + +var isWhiteSpace = require('../helpers').isWhiteSpace; +var skipEmptyLines = require('../helpers').skipEmptyLines; + + +module.exports = function hr(state, startLine, endLine, silent) { + var marker, cnt, ch, + pos = state.bMarks[startLine], + max = state.eMarks[startLine]; + + // should not have > 3 leading spaces + if (state.tShift[startLine] > 3) { return false; } + + pos += state.tShift[startLine]; + + if (pos > max) { return false; } + + marker = state.src.charCodeAt(pos++); + + // Check hr marker + if (marker !== 0x2A/* * */ && + marker !== 0x2D/* - */ && + marker !== 0x5F/* _ */) { + return false; + } + + // markers can be mixed with spaces, but there should be at least 3 one + + cnt = 1; + while (pos < max) { + ch = state.src.charCodeAt(pos++); + if (ch !== marker && !isWhiteSpace(ch)) { return false; } + if (ch === marker) { cnt++; } + } + + if (cnt < 3) { return false; } + + if (silent) { return true; } + + state.tokens.push({ type: 'hr' }); + + state.line = skipEmptyLines(state, ++startLine); + return true; +}; diff --git a/lib/lexer_block/lheading.js b/lib/lexer_block/lheading.js new file mode 100644 index 0000000..10ab968 --- /dev/null +++ b/lib/lexer_block/lheading.js @@ -0,0 +1,42 @@ +// lheading (---, ===) + +'use strict'; + + +var skipEmptyLines = require('../helpers').skipEmptyLines; +var skipSpaces = require('../helpers').skipSpaces; +var skipChars = require('../helpers').skipChars; + + +module.exports = function lheading(state, startLine, endLine, silent) { + var marker, pos, mem, max, + next = startLine + 1; + + if (next >= state.lineMax) { return false; } + + // Scan next line + pos = state.bMarks[next] + state.tShift[next]; + max = state.eMarks[next]; + + if (pos + 3 > max) { return false; } + + marker = state.src.charCodeAt(pos); + + if (marker !== 0x2D/* - */ && marker !== 0x3D/* = */) { return false; } + + mem = pos; + pos = skipChars(state, pos, marker); + + if (pos - mem < 3) { return false; } + + pos = skipSpaces(state, pos); + + if (pos < max) { return false; } + + state.tokens.push({ type: 'heading_open', level: marker === 0x3D/* = */ ? 1 : 2 }); + state.lexerInline.tokenize(state, state.bMarks[startLine], state.eMarks[startLine]); + state.tokens.push({ type: 'heading_close', level: marker === 0x3D/* = */ ? 1 : 2 }); + + state.line = skipEmptyLines(state, ++next); + return true; +}; diff --git a/lib/lexer_block/paragraph.js b/lib/lexer_block/paragraph.js new file mode 100644 index 0000000..875d001 --- /dev/null +++ b/lib/lexer_block/paragraph.js @@ -0,0 +1,38 @@ +// Paragraph + +'use strict'; + + +var isEmpty = require('../helpers').isEmpty; +var skipEmptyLines = require('../helpers').skipEmptyLines; + + +module.exports = function paragraph(state, startLine, endLine) { + var nextLine = startLine + 1, + rules_named = state.lexerBlock.rules_named; + + // jump line-by-line until empty one or EOF + while (nextLine < endLine && !isEmpty(state, nextLine)) { + // Some tags can terminate paragraph without empty line. + // Try those tags in validation more (without tokens generation) + if (rules_named.fences(state, nextLine, endLine, true)) { break; } + if (rules_named.hr(state, nextLine, endLine, true)) { break; } + if (rules_named.heading(state, nextLine, endLine, true)) { break; } + if (rules_named.lheading(state, nextLine, endLine, true)) { break; } + //if (rules_named.blockquote(state, nextLine, endLine, true)) { break; } + //if (rules_named.tag(state, nextLine, endLine, true)) { break; } + //if (rules_named.def(state, nextLine, endLine, true)) { break; } + nextLine++; + } + + state.tokens.push({ type: 'paragraph_open' }); + state.lexerInline.tokenize( + state, + state.bMarks[startLine], + state.eMarks[nextLine - 1] + ); + state.tokens.push({ type: 'paragraph_close' }); + + state.line = skipEmptyLines(state, nextLine); + return true; +}; diff --git a/lib/parser.js b/lib/parser.js new file mode 100644 index 0000000..8496343 --- /dev/null +++ b/lib/parser.js @@ -0,0 +1,58 @@ +// Main perser class + +'use strict'; + + +var State = require('./state'); +var Renderer = require('./renderer'); +var LexerBlock = require('./lexer_block'); +var LexerInline = require('./lexer_inline'); + + +// Main class +// +function Parser(options) { + this.options = {}; + this.state = null; + + this.lexerInline = new LexerInline(); + this.lexerBlock = new LexerBlock(); + this.renderer = new Renderer(); + + if (options) { this.set(options); } +} + + +Parser.prototype.set = function (options) { + Object.keys(options).forEach(function (key) { + this.options[key] = options[key]; + }, this); +}; + + +Parser.prototype.render = function (src) { + var state; + + if (!src) { return ''; } + + state = new State( + src, + this.lexerBlock, + this.lexerInline, + this.renderer, + this.options + ); + + // TODO: skip leading empty lines + + state.lexerBlock.tokenize(state, state.line, state.lineMax); + + // TODO: ??? eat empty paragraphs from tail + + //console.log(state.tokens) + + return this.renderer.render(state); +}; + + +module.exports = Parser; diff --git a/lib/state.js b/lib/state.js new file mode 100644 index 0000000..3f460a3 --- /dev/null +++ b/lib/state.js @@ -0,0 +1,89 @@ +// Parser state class + +'use strict'; + + +function State(src, lexerBlock, lexerInline, renderer, options) { + var ch, s, start, pos, len, indent, indent_found; + + // TODO: Temporary solution. Check if more effective possible, + // withous str change + // + // - replace tabs with spaces + // - remove `\r` to simplify newlines check (???) + + this.src = src + .replace(/\t/g, ' ') + .replace(/\r/g, '') + .replace(/\u00a0/g, ' ') + .replace(/\u2424/g, '\n'); + + // Shortcuts to simplify nested calls + this.lexerBlock = lexerBlock; + this.lexerInline = lexerInline; + this.renderer = renderer; + + // TODO: (?) set directly for faster access. + this.options = options; + + // + // Internal state vartiables + // + + this.tokens = []; + + this.bMarks = []; // line begin offsets for fast jumps + this.eMarks = []; // line end offsets for fast jumps + this.tShift = []; // indent for each line + + // Generate markers. + s = this.src; + indent = 0; + indent_found = false; + + for(start = pos = indent = 0, len = s.length; pos < len; pos++) { + ch = s.charCodeAt(pos); + + // TODO: check other spaces and tabs too or keep existing regexp replace ?? + if (!indent_found && ch === 0x20/* space */) { + indent++; + } + if (!indent_found && ch !== 0x20/* space */) { + this.tShift.push(indent); + indent_found = true; + } + + + if (ch === 0x0D || ch === 0x0A) { + this.bMarks.push(start); + this.eMarks.push(pos); + indent_found = false; + indent = 0; + start = pos + 1; + } + if (ch === 0x0D && pos < len && s.charCodeAt(pos) === 0x0A) { + pos++; + start++; + } + } + if (ch !== 0x0D || ch !== 0x0A) { + this.bMarks.push(start); + this.eMarks.push(len); + this.tShift.push(indent); + } + + // inline lexer variables + this.pos = 0; // char index in src + + // block lexer variables + this.blkLevel = 0; + this.blkIndent = 0; + this.line = 0; // line index in src + this.lineMax = this.bMarks.length; + + // renderer + this.result = ''; +} + + +module.exports = State; \ No newline at end of file