Splitted block lexer & index to files

10 years ago · 52b4e44de4
11 changed files with 529 additions and 495 deletions
--- a/index.js
+++ b/index.js
@ -1,139 +1,4 @@
 'use strict';


-var Renderer = require('./lib/renderer');
-var LexerBlock = require('./lib/lexer_block');
-var LexerInline = require('./lib/lexer_inline');
-
-
-// Parser state class
-//
-function State(src, lexerBlock, lexerInline, renderer, options) {
-  var ch, s, start, pos, len, indent, indent_found;
-
-  // TODO: Temporary solution. Check if more effective possible,
-  // withous str change
-  //
-  // - replace tabs with spaces
-  // - remove `\r` to simplify newlines check (???)
-
-  this.src = src
-              .replace(/\t/g, '    ')
-              .replace(/\r/g, '')
-              .replace(/\u00a0/g, ' ')
-              .replace(/\u2424/g, '\n');
-
-  // Shortcuts to simplify nested calls
-  this.lexerBlock  = lexerBlock;
-  this.lexerInline = lexerInline;
-  this.renderer    = renderer;
-
-  // TODO: (?) set directly for faster access.
-  this.options = options;
-
-  //
-  // Internal state vartiables
-  //
-
-  this.tokens = [];
-
-  this.bMarks = []; // line begin offsets for fast jumps
-  this.eMarks = []; // line end offsets for fast jumps
-  this.tShift = []; // indent for each line
-
-  // Generate markers.
-  s = this.src;
-  indent = 0;
-  indent_found = false;
-
-  for(start = pos = indent = 0, len = s.length; pos < len; pos++) {
-    ch = s.charCodeAt(pos);
-
-    // TODO: check other spaces and tabs too or keep existing regexp replace ??
-    if (!indent_found && ch === 0x20/* space */) {
-      indent++;
-    }
-    if (!indent_found && ch !== 0x20/* space */) {
-      this.tShift.push(indent);
-      indent_found = true;
-    }
-
-
-    if (ch === 0x0D || ch === 0x0A) {
-      this.bMarks.push(start);
-      this.eMarks.push(pos);
-      indent_found = false;
-      indent = 0;
-      start = pos + 1;
-    }
-    if (ch === 0x0D && pos < len && s.charCodeAt(pos) === 0x0A) {
-      pos++;
-      start++;
-    }
-  }
-  if (ch !== 0x0D || ch !== 0x0A) {
-    this.bMarks.push(start);
-    this.eMarks.push(len);
-    this.tShift.push(indent);
-  }
-
-  // inline lexer variables
-  this.pos        = 0; // char index in src
-
-  // block lexer variables
-  this.blkLevel   = 0;
-  this.blkIndent  = 0;
-  this.line       = 0; // line index in src
-  this.lineMax = this.bMarks.length;
-
-  // renderer
-  this.result = '';
-}
-
-
-// Main class
-//
-function Remarkable(options) {
-  this.options = {};
-  this.state = null;
-
-  this.lexerInline  = new LexerInline();
-  this.lexerBlock   = new LexerBlock();
-  this.renderer     = new Renderer();
-
-  if (options) { this.set(options); }
-}
-
-
-Remarkable.prototype.set = function (options) {
-  Object.keys(options).forEach(function (key) {
-    this.options[key] = options[key];
-  }, this);
-};
-
-
-Remarkable.prototype.render = function (src) {
-
-  if (!src) { return ''; }
-
-  var state = new State(
-    src,
-    this.lexerBlock,
-    this.lexerInline,
-    this.renderer,
-    this.options
-  );
-
-  // TODO: skip leading empty lines
-
-  state.lexerBlock.tokenize(state, state.line, state.lineMax);
-
-  // TODO: ??? eat empty paragraphs from tail
-
-  //console.log(state.tokens)
-
-  return this.renderer.render(state);
-};
-
-
-module.exports = Remarkable;
+module.exports = require('./lib/parser');
--- a/lib/helpers.js
+++ b/lib/helpers.js
@ -0,0 +1,54 @@
+// Common functions for lexers
+
+'use strict';
+
+
+function isWhiteSpace(ch) {
+  return ch === 0x20;
+}
+
+// Check if line has zero length or contains spaces only
+function isEmpty(state, line) {
+  return state.bMarks[line] + state.tShift[line] >= state.eMarks[line];
+}
+
+// Scan lines from given one and return first not empty
+function skipEmptyLines(state, from) {
+  for (var max = state.lineMax; from < max; from++) {
+    if (state.bMarks[from] + state.tShift[from] < state.eMarks[from]) {
+      break;
+    }
+  }
+  return from;
+}
+
+// Skip spaces from given position.
+function skipSpaces(state, pos) {
+  for (var max = state.src.length; pos < max; pos++) {
+    if (!isWhiteSpace(state.src.charCodeAt(pos))) { break; }
+  }
+  return pos;
+}
+
+// Skip char codes from given position
+function skipChars(state, pos, code) {
+  for (var max = state.src.length; pos < max; pos++) {
+    if (state.src.charCodeAt(pos) !== code) { break; }
+  }
+  return pos;
+}
+
+// Skip char codes reverse from given position
+/*function skipCharsBack(state, pos, code, min) {
+  for (; pos >= min; pos--) {
+    if (code !== state.src.charCodeAt(pos)) { break; }
+  }
+  return pos;
+}*/
+
+
+exports.isWhiteSpace = isWhiteSpace;
+exports.isEmpty = isEmpty;
+exports.skipEmptyLines = skipEmptyLines;
+exports.skipSpaces = skipSpaces;
+exports.skipChars = skipChars;
--- a/lib/lexer_block.js
+++ b/lib/lexer_block.js
@ -4,368 +4,15 @@
 'use strict';


-////////////////////////////////////////////////////////////////////////////////
-// Helpers
-
-
-// Check if character is white space
-function isWhiteSpace(ch) {
-  // TODO: check other spaces and tabs
-  return ch === 0x20;
-}
-
-// Check if line from `pos` is empty or contains spaces only
-function isEmpty(state, line) {
-  return state.bMarks[line] + state.tShift[line] >= state.eMarks[line];
-}
-
-// Return absolute position of char with default indent an given line,
-// or -1 if no requested indent
-/*function getIndent(state, line, indent) {
-  var ch, pos, max;
-
-  if (line >= state.lineMax) { return -1; }
-
-  pos = state.bMarks[line];
-  max = state.eMarks[line];
-
-  while (pos < max && indent > 0) {
-    ch = state.src.charCodeAt(pos++);
-    if (isWhiteSpace(ch)) { indent--; continue; }
-    return -1;
-  }
-
-  if (indent > 0) { return -1; }
-
-  return pos;
-}*/
-
-// Seek first non empty line from given one and return it's number
-function skipEmptyLines(state, from) {
-  for (var max = state.lineMax; from < max; from++) {
-    if (!isEmpty(state, from)) { break; }
-  }
-  return from;
-}
-
-// Skip spaces from given position. Returns new position
-function skipSpaces(state, pos) {
-  for (var max = state.src.length; pos < max; pos++) {
-    if (!isWhiteSpace(state.src.charCodeAt(pos))) { break; }
-  }
-  return pos;
-}
-
-// Skip char codes from given position
-function skipChars(state, pos, code) {
-  for (var max = state.src.length; pos < max; pos++) {
-    if (code !== state.src.charCodeAt(pos)) { break; }
-  }
-  return pos;
-}
-
-// Skip char codes reverse from given position
-function skipCharsBack(state, pos, code, min) {
-  for (; pos >= min; pos--) {
-    if (code !== state.src.charCodeAt(pos)) { break; }
-  }
-  return pos;
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-// Lexer rules
-
 var rules = [];

+rules.push(require('./lexer_block/code'));
+rules.push(require('./lexer_block/fences'));
+rules.push(require('./lexer_block/heading'));
+rules.push(require('./lexer_block/lheading'));
+rules.push(require('./lexer_block/hr'));
+rules.push(require('./lexer_block/paragraph'));

-// code (4 spaced padded)
-rules.push(function code(state, startLine, endLine, silent) {
-  var nextLine, last;
-
-  if (state.tShift[startLine] < 4) { return false; }
-
-  last = nextLine = startLine + 1;
-
-  while (nextLine < endLine) {
-    if (isEmpty(state, nextLine)) {
-      nextLine++;
-      if (state.options.pedantic) {
-        last = nextLine;
-      }
-      continue;
-    }
-    if (state.tShift[nextLine] >= 4) {
-      nextLine++;
-      last = nextLine;
-      continue;
-    }
-    break;
-  }
-
-  if (silent) { return true; }
-
-  state.tokens.push({
-    type: 'code',
-    startLine: startLine,
-    endLine: last
-  });
-
-  state.line = nextLine;
-  return true;
-});
-
-
-// fences (``` lang, ~~~ lang)
-rules.push(function fences(state, startLine, endLine, silent) {
-  var marker, len, params, nextLine,
-      pos = state.bMarks[startLine] + state.tShift[startLine],
-      max = state.eMarks[startLine];
-
-  if (pos + 3 > max) { return false; }
-
-  marker = state.src.charCodeAt(pos);
-
-  if (marker !== 0x7E/* ~ */ && marker !== 0x60 /* ` */) {
-    return false;
-  }
-
-  // scan marker length
-  len = 1;
-  while (state.src.charCodeAt(++pos) === marker) {
-    len++;
-  }
-
-  if (len < 3) { return false; }
-
-  params = state.src.slice(pos, max).trim();
-
-  if (!/\S/.test(params)) { return false; }
-
-  // search end of block
-  nextLine = startLine;
-
-  do {
-    nextLine++;
-
-    if (nextLine > endLine) { return false; }
-
-    pos = state.bMarks[nextLine] + state.tShift[nextLine];
-    max = state.eMarks[nextLine];
-
-    if (pos + 3 > max) { continue; }
-
-    // check markers
-    if (state.src.charCodeAt(pos) !== marker &&
-        state.src.charCodeAt(pos + 1) !== marker &&
-        state.src.charCodeAt(pos + 2) !== marker) {
-      continue;
-    }
-
-    pos += 3;
-
-    // make sure tail has spaces only
-    //pos = pos < max ? skipSpaces(state, pos) : pos;
-
-    // stmd allow any combonation of markers and spaces in tail
-
-    if (pos < max) { continue; }
-
-    // found!
-    break;
-
-  } while (true);
-
-  if (silent) { return true; }
-
-  state.tokens.push({
-    type: 'fence',
-    params: params.split(/\s+/g),
-    startLine: startLine + 1,
-    endLine: nextLine
-  });
-
-  state.line = skipEmptyLines(state, nextLine + 1);
-  return true;
-});
-
-
-// heading (#, ##, ...)
-rules.push(function heading(state, startLine, endLine, silent) {
-  var ch, level,
-      pos = state.bMarks[startLine],
-      max = state.eMarks[startLine],
-      start = pos;
-
-  pos += state.tShift[startLine];
-
-  if (pos >= max) { return false; }
-
-  ch  = state.src.charCodeAt(pos);
-
-  if (ch !== 0x23/* # */ || pos >= max) { return false; }
-
-  // count heading level
-  level = 1;
-  ch = state.src.charCodeAt(++pos);
-  while (ch === 0x23/* # */ && pos < max && level <= 6) {
-    level++;
-    ch = state.src.charCodeAt(++pos);
-  }
-
-  if (level > 6 || (pos < max && !isWhiteSpace(ch))) { return false; }
-
-  // skip spaces before heading text
-  pos = pos < max ? skipSpaces(state, pos) : pos;
-
-  // Now pos contains offset of first heared char
-  // Let's cut tails like '    ###  ' from the end of string
-
-  max--;
-  ch = state.src.charCodeAt(max);
-
-  while (max > start && isWhiteSpace(ch)) {
-    ch = state.src.charCodeAt(--max);
-  }
-  if (ch === 0x23/* # */) {
-    while (max > start && ch === 0x23/* # */) {
-      ch = state.src.charCodeAt(--max);
-    }
-    if (isWhiteSpace(ch)) {
-      while (max > start && isWhiteSpace(ch)) {
-        ch = state.src.charCodeAt(--max);
-      }
-    } else if (ch === 0x5C/* \ */) {
-      max++;
-    }
-  }
-  max++;
-
-  if (silent) { return true; }
-
-  state.tokens.push({ type: 'heading_open', level: level });
-  // only if header is not empty
-  if (pos < max) {
-    state.lexerInline.tokenize(state, pos, max);
-  }
-  state.tokens.push({ type: 'heading_close', level: level });
-
-  state.line = skipEmptyLines(state, ++startLine);
-  return true;
-});
-
-
-
-// lheading (---, ===)
-rules.push(function lheading(state, startLine, endLine, silent) {
-  var marker, pos, mem, max,
-      next = startLine + 1;
-
-  if (next >= state.lineMax) { return false; }
-
-  // Scan next line
-  pos = state.bMarks[next] + state.tShift[next];
-  max = state.eMarks[next];
-
-  if (pos + 3 > max) { return false; }
-
-  marker = state.src.charCodeAt(pos);
-
-  if (marker !== 0x2D/* - */ && marker !== 0x3D/* = */) { return false; }
-
-  mem = pos;
-  pos = skipChars(state, pos, marker);
-
-  if (pos - mem < 3) { return false; }
-
-  pos = skipSpaces(state, pos);
-
-  if (pos < max) { return false; }
-
-  state.tokens.push({ type: 'heading_open', level: marker === 0x3D/* = */ ? 1 : 2 });
-  state.lexerInline.tokenize(state, state.bMarks[startLine], state.eMarks[startLine]);
-  state.tokens.push({ type: 'heading_close', level: marker === 0x3D/* = */ ? 1 : 2 });
-
-  state.line = skipEmptyLines(state, ++next);
-  return true;
-});
-
-
-// Horizontal rule
-rules.push(function hr(state, startLine, endLine, silent) {
-  var marker, cnt, ch,
-      pos = state.bMarks[startLine],
-      max = state.eMarks[startLine];
-
-  // should not have > 3 leading spaces
-  if (state.tShift[startLine] > 3) { return false; }
-
-  pos += state.tShift[startLine];
-
-  if (pos > max) { return false; }
-
-  marker = state.src.charCodeAt(pos++);
-
-  // Check hr marker
-  if (marker !== 0x2A/* * */ &&
-      marker !== 0x2D/* - */ &&
-      marker !== 0x5F/* _ */) {
-    return false;
-  }
-
-  // markers can be mixed with spaces, but there should be at least 3 one
-
-  cnt = 1;
-  while (pos < max) {
-    ch = state.src.charCodeAt(pos++);
-    if (ch !== marker && !isWhiteSpace(ch)) { return false; }
-    if (ch === marker) { cnt++; }
-  }
-
-  if (cnt < 3) { return false; }
-
-  if (silent) { return true; }
-
-  state.tokens.push({ type: 'hr' });
-
-  state.line = skipEmptyLines(state, ++startLine);
-  return true;
-});
-
-
-// Paragraph
-rules.push(function paragraph(state, startLine, endLine) {
-  var nextLine = startLine + 1,
-      rules_named = state.lexerBlock.rules_named;
-
-  // jump line-by-line until empty one or EOF
-  while (nextLine < endLine && !isEmpty(state, nextLine)) {
-    // Force paragraph termination of next tag found
-    if (rules_named.fences(state, nextLine, endLine, true)) { break; }
-    if (rules_named.hr(state, nextLine, endLine, true)) { break; }
-    if (rules_named.heading(state, nextLine, endLine, true)) { break; }
-    if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
-    //if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
-    //if (rules_named.tag(state, nextLine, endLine, true)) { break; }
-    //if (rules_named.def(state, nextLine, endLine, true)) { break; }
-    nextLine++;
-  }
-
-  state.tokens.push({ type: 'paragraph_open' });
-  state.lexerInline.tokenize(
-    state,
-    state.bMarks[startLine],
-    state.eMarks[nextLine - 1]
-  );
-  state.tokens.push({ type: 'paragraph_close' });
-
-  state.line = skipEmptyLines(state, nextLine);
-  return true;
-});
-
-
-////////////////////////////////////////////////////////////////////////////////
-// Lexer class

 function functionName(fn) {
  var ret = fn.toString();
--- a/lib/lexer_block/code.js
+++ b/lib/lexer_block/code.js
@ -0,0 +1,42 @@
+// Code block (4 spaces padded)
+
+'use strict';
+
+
+var isEmpty = require('../helpers').isEmpty;
+
+
+module.exports = function code(state, startLine, endLine, silent) {
+  var nextLine, last;
+
+  if (state.tShift[startLine] < 4) { return false; }
+
+  last = nextLine = startLine + 1;
+
+  while (nextLine < endLine) {
+    if (isEmpty(state, nextLine)) {
+      nextLine++;
+      if (state.options.pedantic) {
+        last = nextLine;
+      }
+      continue;
+    }
+    if (state.tShift[nextLine] >= 4) {
+      nextLine++;
+      last = nextLine;
+      continue;
+    }
+    break;
+  }
+
+  if (silent) { return true; }
+
+  state.tokens.push({
+    type: 'code',
+    startLine: startLine,
+    endLine: last
+  });
+
+  state.line = nextLine;
+  return true;
+};
--- a/lib/lexer_block/fences.js
+++ b/lib/lexer_block/fences.js
@ -0,0 +1,79 @@
+// fences (``` lang, ~~~ lang)
+
+'use strict';
+
+
+var skipEmptyLines = require('../helpers').skipEmptyLines;
+
+
+module.exports =function fences(state, startLine, endLine, silent) {
+  var marker, len, params, nextLine,
+      pos = state.bMarks[startLine] + state.tShift[startLine],
+      max = state.eMarks[startLine];
+
+  if (pos + 3 > max) { return false; }
+
+  marker = state.src.charCodeAt(pos);
+
+  if (marker !== 0x7E/* ~ */ && marker !== 0x60 /* ` */) {
+    return false;
+  }
+
+  // scan marker length
+  len = 1;
+  while (state.src.charCodeAt(++pos) === marker) {
+    len++;
+  }
+
+  if (len < 3) { return false; }
+
+  params = state.src.slice(pos, max).trim();
+
+  if (!/\S/.test(params)) { return false; }
+
+  // search end of block
+  nextLine = startLine;
+
+  do {
+    nextLine++;
+
+    if (nextLine > endLine) { return false; }
+
+    pos = state.bMarks[nextLine] + state.tShift[nextLine];
+    max = state.eMarks[nextLine];
+
+    if (pos + 3 > max) { continue; }
+
+    // check markers
+    if (state.src.charCodeAt(pos) !== marker &&
+        state.src.charCodeAt(pos + 1) !== marker &&
+        state.src.charCodeAt(pos + 2) !== marker) {
+      continue;
+    }
+
+    pos += 3;
+
+    // make sure tail has spaces only
+    //pos = pos < max ? skipSpaces(state, pos) : pos;
+
+    // stmd allow any combonation of markers and spaces in tail
+
+    if (pos < max) { continue; }
+
+    // found!
+    break;
+
+  } while (true);
+
+  if (silent) { return true; }
+
+  state.tokens.push({
+    type: 'fence',
+    params: params.split(/\s+/g),
+    startLine: startLine + 1,
+    endLine: nextLine
+  });
+
+  state.line = skipEmptyLines(state, nextLine + 1);
+  return true;
+};
--- a/lib/lexer_block/heading.js
+++ b/lib/lexer_block/heading.js
@ -0,0 +1,72 @@
+// heading (#, ##, ...)
+
+'use strict';
+
+
+var isWhiteSpace    = require('../helpers').isWhiteSpace;
+var skipEmptyLines  = require('../helpers').skipEmptyLines;
+var skipSpaces      = require('../helpers').skipSpaces;
+
+
+module.exports = function heading(state, startLine, endLine, silent) {
+  var ch, level,
+      pos = state.bMarks[startLine],
+      max = state.eMarks[startLine],
+      start = pos;
+
+  pos += state.tShift[startLine];
+
+  if (pos >= max) { return false; }
+
+  ch  = state.src.charCodeAt(pos);
+
+  if (ch !== 0x23/* # */ || pos >= max) { return false; }
+
+  // count heading level
+  level = 1;
+  ch = state.src.charCodeAt(++pos);
+  while (ch === 0x23/* # */ && pos < max && level <= 6) {
+    level++;
+    ch = state.src.charCodeAt(++pos);
+  }
+
+  if (level > 6 || (pos < max && !isWhiteSpace(ch))) { return false; }
+
+  // skip spaces before heading text
+  pos = pos < max ? skipSpaces(state, pos) : pos;
+
+  // Now pos contains offset of first heared char
+  // Let's cut tails like '    ###  ' from the end of string
+
+  max--;
+  ch = state.src.charCodeAt(max);
+
+  while (max > start && isWhiteSpace(ch)) {
+    ch = state.src.charCodeAt(--max);
+  }
+  if (ch === 0x23/* # */) {
+    while (max > start && ch === 0x23/* # */) {
+      ch = state.src.charCodeAt(--max);
+    }
+    if (isWhiteSpace(ch)) {
+      while (max > start && isWhiteSpace(ch)) {
+        ch = state.src.charCodeAt(--max);
+      }
+    } else if (ch === 0x5C/* \ */) {
+      max++;
+    }
+  }
+  max++;
+
+  if (silent) { return true; }
+
+  state.tokens.push({ type: 'heading_open', level: level });
+  // only if header is not empty
+  if (pos < max) {
+    state.lexerInline.tokenize(state, pos, max);
+  }
+  state.tokens.push({ type: 'heading_close', level: level });
+
+  state.line = skipEmptyLines(state, ++startLine);
+  return true;
+};
--- a/lib/lexer_block/hr.js
+++ b/lib/lexer_block/hr.js
@ -0,0 +1,48 @@
+// Horizontal rule
+
+'use strict';
+
+
+var isWhiteSpace    = require('../helpers').isWhiteSpace;
+var skipEmptyLines  = require('../helpers').skipEmptyLines;
+
+
+module.exports = function hr(state, startLine, endLine, silent) {
+  var marker, cnt, ch,
+      pos = state.bMarks[startLine],
+      max = state.eMarks[startLine];
+
+  // should not have > 3 leading spaces
+  if (state.tShift[startLine] > 3) { return false; }
+
+  pos += state.tShift[startLine];
+
+  if (pos > max) { return false; }
+
+  marker = state.src.charCodeAt(pos++);
+
+  // Check hr marker
+  if (marker !== 0x2A/* * */ &&
+      marker !== 0x2D/* - */ &&
+      marker !== 0x5F/* _ */) {
+    return false;
+  }
+
+  // markers can be mixed with spaces, but there should be at least 3 one
+
+  cnt = 1;
+  while (pos < max) {
+    ch = state.src.charCodeAt(pos++);
+    if (ch !== marker && !isWhiteSpace(ch)) { return false; }
+    if (ch === marker) { cnt++; }
+  }
+
+  if (cnt < 3) { return false; }
+
+  if (silent) { return true; }
+
+  state.tokens.push({ type: 'hr' });
+
+  state.line = skipEmptyLines(state, ++startLine);
+  return true;
+};
--- a/lib/lexer_block/lheading.js
+++ b/lib/lexer_block/lheading.js
@ -0,0 +1,42 @@
+// lheading (---, ===)
+
+'use strict';
+
+
+var skipEmptyLines  = require('../helpers').skipEmptyLines;
+var skipSpaces      = require('../helpers').skipSpaces;
+var skipChars       = require('../helpers').skipChars;
+
+
+module.exports = function lheading(state, startLine, endLine, silent) {
+  var marker, pos, mem, max,
+      next = startLine + 1;
+
+  if (next >= state.lineMax) { return false; }
+
+  // Scan next line
+  pos = state.bMarks[next] + state.tShift[next];
+  max = state.eMarks[next];
+
+  if (pos + 3 > max) { return false; }
+
+  marker = state.src.charCodeAt(pos);
+
+  if (marker !== 0x2D/* - */ && marker !== 0x3D/* = */) { return false; }
+
+  mem = pos;
+  pos = skipChars(state, pos, marker);
+
+  if (pos - mem < 3) { return false; }
+
+  pos = skipSpaces(state, pos);
+
+  if (pos < max) { return false; }
+
+  state.tokens.push({ type: 'heading_open', level: marker === 0x3D/* = */ ? 1 : 2 });
+  state.lexerInline.tokenize(state, state.bMarks[startLine], state.eMarks[startLine]);
+  state.tokens.push({ type: 'heading_close', level: marker === 0x3D/* = */ ? 1 : 2 });
+
+  state.line = skipEmptyLines(state, ++next);
+  return true;
+};
--- a/lib/lexer_block/paragraph.js
+++ b/lib/lexer_block/paragraph.js
@ -0,0 +1,38 @@
+// Paragraph
+
+'use strict';
+
+
+var isEmpty = require('../helpers').isEmpty;
+var skipEmptyLines  = require('../helpers').skipEmptyLines;
+
+
+module.exports = function paragraph(state, startLine, endLine) {
+  var nextLine = startLine + 1,
+      rules_named = state.lexerBlock.rules_named;
+
+  // jump line-by-line until empty one or EOF
+  while (nextLine < endLine && !isEmpty(state, nextLine)) {
+    // Some tags can terminate paragraph without empty line.
+    // Try those tags in validation more (without tokens generation)
+    if (rules_named.fences(state, nextLine, endLine, true)) { break; }
+    if (rules_named.hr(state, nextLine, endLine, true)) { break; }
+    if (rules_named.heading(state, nextLine, endLine, true)) { break; }
+    if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
+    //if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
+    //if (rules_named.tag(state, nextLine, endLine, true)) { break; }
+    //if (rules_named.def(state, nextLine, endLine, true)) { break; }
+    nextLine++;
+  }
+
+  state.tokens.push({ type: 'paragraph_open' });
+  state.lexerInline.tokenize(
+    state,
+    state.bMarks[startLine],
+    state.eMarks[nextLine - 1]
+  );
+  state.tokens.push({ type: 'paragraph_close' });
+
+  state.line = skipEmptyLines(state, nextLine);
+  return true;
+};
--- a/lib/parser.js
+++ b/lib/parser.js
@ -0,0 +1,58 @@
+// Main perser class
+
+'use strict';
+
+
+var State       = require('./state');
+var Renderer    = require('./renderer');
+var LexerBlock  = require('./lexer_block');
+var LexerInline = require('./lexer_inline');
+
+
+// Main class
+//
+function Parser(options) {
+  this.options = {};
+  this.state = null;
+
+  this.lexerInline  = new LexerInline();
+  this.lexerBlock   = new LexerBlock();
+  this.renderer     = new Renderer();
+
+  if (options) { this.set(options); }
+}
+
+
+Parser.prototype.set = function (options) {
+  Object.keys(options).forEach(function (key) {
+    this.options[key] = options[key];
+  }, this);
+};
+
+
+Parser.prototype.render = function (src) {
+  var state;
+
+  if (!src) { return ''; }
+
+  state = new State(
+    src,
+    this.lexerBlock,
+    this.lexerInline,
+    this.renderer,
+    this.options
+  );
+
+  // TODO: skip leading empty lines
+
+  state.lexerBlock.tokenize(state, state.line, state.lineMax);
+
+  // TODO: ??? eat empty paragraphs from tail
+
+  //console.log(state.tokens)
+
+  return this.renderer.render(state);
+};
+
+
+module.exports = Parser;
--- a/lib/state.js
+++ b/lib/state.js
@ -0,0 +1,89 @@
+// Parser state class
+
+'use strict';
+
+
+function State(src, lexerBlock, lexerInline, renderer, options) {
+  var ch, s, start, pos, len, indent, indent_found;
+
+  // TODO: Temporary solution. Check if more effective possible,
+  // withous str change
+  //
+  // - replace tabs with spaces
+  // - remove `\r` to simplify newlines check (???)
+
+  this.src = src
+              .replace(/\t/g, '    ')
+              .replace(/\r/g, '')
+              .replace(/\u00a0/g, ' ')
+              .replace(/\u2424/g, '\n');
+
+  // Shortcuts to simplify nested calls
+  this.lexerBlock  = lexerBlock;
+  this.lexerInline = lexerInline;
+  this.renderer    = renderer;
+
+  // TODO: (?) set directly for faster access.
+  this.options = options;
+
+  //
+  // Internal state vartiables
+  //
+
+  this.tokens = [];
+
+  this.bMarks = []; // line begin offsets for fast jumps
+  this.eMarks = []; // line end offsets for fast jumps
+  this.tShift = []; // indent for each line
+
+  // Generate markers.
+  s = this.src;
+  indent = 0;
+  indent_found = false;
+
+  for(start = pos = indent = 0, len = s.length; pos < len; pos++) {
+    ch = s.charCodeAt(pos);
+
+    // TODO: check other spaces and tabs too or keep existing regexp replace ??
+    if (!indent_found && ch === 0x20/* space */) {
+      indent++;
+    }
+    if (!indent_found && ch !== 0x20/* space */) {
+      this.tShift.push(indent);
+      indent_found = true;
+    }
+
+
+    if (ch === 0x0D || ch === 0x0A) {
+      this.bMarks.push(start);
+      this.eMarks.push(pos);
+      indent_found = false;
+      indent = 0;
+      start = pos + 1;
+    }
+    if (ch === 0x0D && pos < len && s.charCodeAt(pos) === 0x0A) {
+      pos++;
+      start++;
+    }
+  }
+  if (ch !== 0x0D || ch !== 0x0A) {
+    this.bMarks.push(start);
+    this.eMarks.push(len);
+    this.tShift.push(indent);
+  }
+
+  // inline lexer variables
+  this.pos        = 0; // char index in src
+
+  // block lexer variables
+  this.blkLevel   = 0;
+  this.blkIndent  = 0;
+  this.line       = 0; // line index in src
+  this.lineMax = this.bMarks.length;
+
+  // renderer
+  this.result = '';
+}
+
+
+module.exports = State;