From c72bba2090f2a13ec83c2f3825cefbda5e57e821 Mon Sep 17 00:00:00 2001 From: Vitaly Puzrin Date: Fri, 5 Sep 2014 08:24:53 +0400 Subject: [PATCH] Added lexer/renderer stubs --- Makefile | 2 +- benchmark/implementations/current/index.js | 2 +- benchmark/profile.js | 12 + bin/remarkable.js | 89 ++++++ index.js | 112 ++++++- lib/lexer_block.js | 301 ++++++++++++++++++ lib/lexer_inline.js | 134 ++++++++ lib/renderer.js | 65 ++++ package.json | 3 + test/defaults.js | 16 + test/fixtures/defaults/code/code_blocks.html | 12 + test/fixtures/defaults/code/code_blocks.md | 14 + .../defaults/hr/horizontal_rules.html | 39 +++ test/fixtures/defaults/hr/horizontal_rules.md | 67 ++++ test/utils.js | 38 +++ 15 files changed, 902 insertions(+), 4 deletions(-) create mode 100644 benchmark/profile.js create mode 100755 bin/remarkable.js create mode 100644 lib/lexer_block.js create mode 100644 lib/lexer_inline.js create mode 100644 lib/renderer.js create mode 100644 test/defaults.js create mode 100644 test/fixtures/defaults/code/code_blocks.html create mode 100644 test/fixtures/defaults/code/code_blocks.md create mode 100644 test/fixtures/defaults/hr/horizontal_rules.html create mode 100644 test/fixtures/defaults/hr/horizontal_rules.md create mode 100644 test/utils.js diff --git a/Makefile b/Makefile index 2d8f5f8..7c94ba7 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ lint: test: lint - # NODE_ENV=test mocha -R spec + NODE_ENV=test mocha -R spec gh-pages: diff --git a/benchmark/implementations/current/index.js b/benchmark/implementations/current/index.js index 898ca50..b0e9faf 100644 --- a/benchmark/implementations/current/index.js +++ b/benchmark/implementations/current/index.js @@ -1,6 +1,6 @@ 'use strict' -var Remarkable = new require('../../../'); +var Remarkable = require('../../../'); var md = new Remarkable(); exports.run = function(data) { diff --git a/benchmark/profile.js b/benchmark/profile.js new file mode 100644 index 0000000..8c6578f --- /dev/null +++ b/benchmark/profile.js @@ -0,0 +1,12 @@ +'use strict'; + +var fs = require('fs'); +var Remarkable = require('../'); + +var md = new Remarkable(); + +var data = fs.readFileSync(__dirname +'/samples/lorem1.txt', 'utf8'); + +for (var i=0; i<20000; i++) { + md.render(data); +} diff --git a/bin/remarkable.js b/bin/remarkable.js new file mode 100755 index 0000000..5ce1148 --- /dev/null +++ b/bin/remarkable.js @@ -0,0 +1,89 @@ +#!/usr/bin/env node + + +'use strict'; + + +var fs = require('fs'); +var argparse = require('argparse'); + +var Remarkable = require('..'); + +//////////////////////////////////////////////////////////////////////////////// + +var cli = new argparse.ArgumentParser({ + prog: 'js-yaml', + version: require('../package.json').version, + addHelp: true +}); + +cli.addArgument(['file'], { + help: 'File to read', + nargs: '?', + defaultValue: '-' +}); + +cli.addArgument(['-t', '--trace'], { + help: 'Show stack trace on error', + action: 'storeTrue' +}); + +var options = cli.parseArgs(); + + +function readFile(filename, encoding, callback) { + if (options.file === '-') { + // read from stdin + + var chunks = []; + + process.stdin.on('data', function(chunk) { + chunks.push(chunk); + }); + + process.stdin.on('end', function() { + return callback(null, Buffer.concat(chunks).toString(encoding)); + }); + } else { + fs.readFile(filename, encoding, callback); + } +} + + +//////////////////////////////////////////////////////////////////////////////// + +readFile(options.file, 'utf8', function (error, input) { + var output, md; + + if (error) { + if ('ENOENT' === error.code) { + console.error('File not found: ' + options.file); + process.exit(2); + } + + console.error( + options.trace && error.stack || + error.message || + String(error)); + + process.exit(1); + } + + md = new Remarkable(); + + try { + output = md.render(input); + + } catch (error) { + console.error( + options.trace && error.stack || + error.message || + String(error)); + + process.exit(1); + } + + process.stdout.write(output); + + process.exit(0); +}); diff --git a/index.js b/index.js index f7c89d1..e6bc247 100644 --- a/index.js +++ b/index.js @@ -1,15 +1,123 @@ 'use strict'; + +var Renderer = require('./lib/renderer'); +var LexerBlock = require('./lib/lexer_block'); +var LexerInline = require('./lib/lexer_inline'); + + +// Parser state class +// +function State(src, lexerBlock, lexerInline, renderer, options) { + var ch, s, start, pos, len; + + // TODO: Temporary solution. Check if more effective possible, + // withous str change + // + // - replace tabs with spaces + // - remove `\r` to simplify newlines check (???) + + this.src = src + .replace(/\t/g, ' ') + .replace(/\r/g, '') + .replace(/\u00a0/g, ' ') + .replace(/\u2424/g, '\n'); + + // Shortcuts to simplify nested calls + this.lexerBlock = lexerBlock; + this.lexerInline = lexerInline; + this.renderer = renderer; + + // TODO: (?) set directly for faster access. + this.options = options; + + // + // Internal state vartiables + // + + this.tokens = []; + + this.bMarks = []; // lines begin/end markers for fast jumps + this.eMarks = []; // + + // Generate markers. + s = this.src; + for(start = pos = 0, len = s.length; pos < len; pos++) { + ch = s.charCodeAt(pos); + + if (ch === 0x0D || ch === 0x0A) { + this.bMarks.push(start); + this.eMarks.push(pos); + start = pos + 1; + } + if (ch === 0x0D && pos < len && s.charCodeAt(pos) === 0x0A) { + pos++; + start++; + } + } + if (ch !== 0x0D || ch !== 0x0A) { + this.bMarks.push(start); + this.eMarks.push(len); + } + + // inline lexer variables + this.pos = 0; // char index in src + + // block lexer variables + this.blkLevel = 0; + this.blkIndent = 0; + this.line = 0; // line index in src + this.lineMax = this.bMarks.length; + + // renderer + this.result = ''; + +} + + +// Main class +// function Remarkable(options) { + this.options = {}; + this.state = null; + + this.lexerInline = new LexerInline(); + this.lexerBlock = new LexerBlock(); + this.renderer = new Renderer(); + if (options) { this.set(options); } } -Remarkable.prototype.set = function (options) { +Remarkable.prototype.set = function (options) { + Object.keys(options).forEach(function (key) { + this.options[key] = options[key]; + }, this); }; + Remarkable.prototype.render = function (src) { - return ''; + + if (!src) { return ''; } + + var state = new State( + src, + this.lexerBlock, + this.lexerInline, + this.renderer, + this.options + ); + + // TODO: skip leading empty lines + + state.lexerBlock.tokenize(state, state.line, state.lineMax); + + // TODO: ??? eat empty paragraphs from tail + + //console.log(state.tokens) + + return this.renderer.render(state); }; + module.exports = Remarkable; diff --git a/lib/lexer_block.js b/lib/lexer_block.js new file mode 100644 index 0000000..8f6dd0e --- /dev/null +++ b/lib/lexer_block.js @@ -0,0 +1,301 @@ +// Block lexer + + +'use strict'; + + +//////////////////////////////////////////////////////////////////////////////// +// Helpers + + +// Check if character is white space +function isWhiteSpace(ch) { + // TODO: check other spaces and tabs + return ch === 0x20; +} + +// Check if line from `pos` is empty or contains spaces only +function isEmpty(state, line) { + var ch, pos = state.bMarks[line], max = state.src.length; + + while (pos < max) { + ch = state.src.charCodeAt(pos++); + + if (ch === 0x0A || ch === 0x0D) { return true; } + + if (!isWhiteSpace(ch)) { return false; } + } + + return true; // EOL reached +} + +// Return absolute position of char with default indent an given line, +// or -1 if no requested indent +function getIndent(state, line, indent) { + var ch, pos, max; + + if (line >= state.lineMax) { return -1; } + + pos = state.bMarks[line]; + max = state.eMarks[line]; + + while (pos < max && indent > 0) { + ch = state.src.charCodeAt(pos++); + if (ch === 0x09) { indent -= 4; continue; } + if (isWhiteSpace(ch)) { indent--; continue; } + return -1; + } + + if (indent > 0) { return -1; } + + return pos; +} + +// Skip empty lines, starting from `state.line` +function skipEmptyLines(state, from) { + while (from < state.lineMax) { + if (!isEmpty(state, from)) { + state.line = from; + return; + } + from++; + } + state.line = from; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Lexer rules + +var rules = []; + + +// code +rules.push(function code(state, startLine, endLine) { + var nextLine, last; + + if (getIndent(state, startLine, 4) === -1) { return false; } + + last = nextLine = startLine + 1; + + while (nextLine < endLine) { + if (isEmpty(state, nextLine)) { + nextLine++; + if (state.options.pedantic) { + last = nextLine; + } + continue; + } + if (getIndent(state, nextLine, 4) !== -1) { + nextLine++; + last = nextLine; + continue; + } + break; + } + + state.tokens.push({ + type: 'code', + startLine: startLine, + endLine: last + }); + + state.line = nextLine; + return true; +}); + + +// Horizontal rule +rules.push(function hr(state, startLine, endLine) { + var ch, marker, + pos = state.bMarks[startLine], + space_max = pos + 3, + max = state.eMarks[startLine]; + + ch = state.src.charCodeAt(pos); + + // quick test first char + if (!isWhiteSpace(ch) && + ch !== 0x2A/* * */ && + ch !== 0x2D/* - */ && + ch !== 0x5F/* _ */) { + return false; + } + + // skip up to 3 leading spaces + while (isWhiteSpace(ch) && pos < max && pos < space_max) { + pos++; + ch = state.src.charCodeAt(pos); + } + + // Check hr marker + if (ch !== 0x2A/* * */ && + ch !== 0x2D/* - */ && + ch !== 0x5F/* _ */) { + return false; + } + + // remember marker type + marker = ch; + + if (pos + 2 < max && + state.src.charCodeAt(pos + 1) === marker && + state.src.charCodeAt(pos + 2) === marker) { + // Style 1: ***, ---, ___ + pos += 3; + } else if (pos + 4 < max && + isWhiteSpace(state.src.charCodeAt(pos + 1)) && + state.src.charCodeAt(pos + 2) === marker && + isWhiteSpace(state.src.charCodeAt(pos + 3)) && + state.src.charCodeAt(pos + 4) === marker) { + // Style 2: * * *, - - -, _ _ _ + pos += 5; + } else { + return false; + } + + // check that line tail has spaces only + while(pos < max) { + ch = state.src.charCodeAt(pos++); + if (isWhiteSpace(ch)) { + return false; + } + } + + state.tokens.push({ type: 'hr' }); + + skipEmptyLines(state, ++startLine); + return true; +}); + + +// Paragraph +rules.push(function paragraph(state, startLine, endLine) { + var nextLine = startLine + 1; + + // jump line-by-line until empty one or EOF + while (nextLine < endLine && !isEmpty(state, nextLine)) { + nextLine++; + } + + state.tokens.push({ type: 'paragraph_open' }); + state.lexerInline.tokenize( + state, + state.bMarks[startLine], + state.eMarks[nextLine - 1] + ); + state.tokens.push({ type: 'paragraph_close' }); + + skipEmptyLines(state, nextLine); + return true; +}); + + +//////////////////////////////////////////////////////////////////////////////// +// Lexer class + + +function findByName(self, name) { + for (var i = 0; i < self.rules.length; i++) { + if (self.rules[i].name === name) { + return i; + } + } + return -1; +} + + +// Block Lexer class +// +function LexerBlock() { + this.rules = []; + + for (var i = 0; i < rules.length; i++) { + this.after(null, rules[i]); + } +} + + +// Replace/delete lexer function +// +LexerBlock.prototype.at = function (name, fn) { + var index = findByName(name); + if (index === -1) { + throw new Error('Lexer rule not found: ' + name); + } + + if (fn) { + this.rules[index] = fn; + } else { + this.rules = this.rules.slice(0, index).concat(this.rules.slice(index + 1)); + } +}; + + +// Add function to lexer chain before one with given name. +// Or add to start, if name not defined +// +LexerBlock.prototype.before = function (name, fn) { + if (!name) { + this.rules.unshift(fn); + return; + } + + var index = findByName(name); + if (index === -1) { + throw new Error('Lexer rule not found: ' + name); + } + + this.rules.splice(index, 0, fn); +}; + + +// Add function to lexer chain after one with given name. +// Or add to end, if name not defined +// +LexerBlock.prototype.after = function (name, fn) { + if (!name) { + this.rules.push(fn); + return; + } + + var index = findByName(name); + if (index === -1) { + throw new Error('Lexer rule not found: ' + name); + } + + this.rules.splice(index + 1, 0, fn); +}; + + +// Generate tokens for input range +// +LexerBlock.prototype.tokenize = function (state, startLine, endLine) { + var ok, i, + rules = this.rules, + len = this.rules.length, + line = startLine; + + while (line < endLine) { + + // Try all possible rules. + // On success, rule should: + // + // - update `state.pos` + // - update `state.tokens` + // - return true + + for (i = 0; i < len; i++) { + ok = rules[i](state, line, endLine); + if (ok) { break; } + } + + if (ok) { + line = state.line; + continue; + } + } +}; + + +module.exports = LexerBlock; diff --git a/lib/lexer_inline.js b/lib/lexer_inline.js new file mode 100644 index 0000000..f2f2af1 --- /dev/null +++ b/lib/lexer_inline.js @@ -0,0 +1,134 @@ +// Inline lexer + +'use strict'; + + +//////////////////////////////////////////////////////////////////////////////// +// Lexer rules + +var rules = []; + + +// Pure text +rules.push(function text(state, begin, end) { + state.tokens.push({ + type: 'text', + begin: begin, + end: end + }); + + state.pos = end; + return true; +}); + + +//////////////////////////////////////////////////////////////////////////////// +// Lexer class + + +function findByName(self, name) { + for (var i = 0; i < self.rules.length; i++) { + if (self.rules[i].name === name) { + return i; + } + } + return -1; +} + + +// Block Lexer class +// +function LexerInline() { + this.rules = []; + + for (var i = 0; i < rules.length; i++) { + this.after(null, rules[i]); + } +} + + +// Replace/delete lexer function +// +LexerInline.prototype.at = function (name, fn) { + var index = findByName(name); + if (index === -1) { + throw new Error('Lexer rule not found: ' + name); + } + + if (fn) { + this.rules[index] = fn; + } else { + this.rules = this.rules.slice(0, index).concat(this.rules.slice(index + 1)); + } +}; + + +// Add function to lexer chain before one with given name. +// Or add to start, if name not defined +// +LexerInline.prototype.before = function (name, fn) { + if (!name) { + this.rules.unshift(fn); + return; + } + + var index = findByName(name); + if (index === -1) { + throw new Error('Lexer rule not found: ' + name); + } + + this.rules.splice(index, 0, fn); +}; + + +// Add function to lexer chain after one with given name. +// Or add to end, if name not defined +// +LexerInline.prototype.after = function (name, fn) { + if (!name) { + this.rules.push(fn); + return; + } + + var index = findByName(name); + if (index === -1) { + throw new Error('Lexer rule not found: ' + name); + } + + this.rules.splice(index + 1, 0, fn); +}; + + +// Generate tokens for input range +// +LexerInline.prototype.tokenize = function (state, begin, end) { + var ok, i, + rules = this.rules, + len = this.rules.length, + pos = begin; + + while (pos < end) { + + // Try all possible rules. + // On success, rule should: + // + // - update `state.pos` + // - update `state.tokens` + // - return true + + for (i = 0; i < len; i++) { + ok = rules[i](state, pos, end); + if (ok) { break; } + } + + if (ok) { + pos = state.pos; + continue; + } + } + + state.pos = end; +}; + + +module.exports = LexerInline; diff --git a/lib/renderer.js b/lib/renderer.js new file mode 100644 index 0000000..3a335b1 --- /dev/null +++ b/lib/renderer.js @@ -0,0 +1,65 @@ +'use strict'; + + +function escapeHTML(str) { + return str.replace(/&/g, '&').replace(//g, '>'); +} + +function joinLines(state, begin, end) { + return state.src.slice( + state.bMarks[begin], + end < state.lineMax ? state.bMarks[end] : state.src.length + ); +} + +var rules = {}; + + +rules.code = function (state, token) { + // TODO: check if we need variable indent cut + var lines = joinLines(state, token.startLine, token.endLine).replace(/^ {4}/gm, ''); + + state.result += '
' + escapeHTML(lines) + '
\n'; +}; + +rules.hr = function (state, token) { + state.result += '
\n'; +}; + + +rules.paragraph_open = function (state, token) { + state.result += '

'; +}; +rules.paragraph_close = function (state, token) { + state.result += '

\n'; +}; + + +rules.text = function (state, token) { + state.result += escapeHTML(state.src.slice(token.begin, token.end)); +}; + + +// TODO: Stub. Do extendable. +function Renderer() { +} + +Renderer.prototype.render = function (state) { + var i, len, rule, + tokens = state.tokens; + + for (i = 0, len = tokens.length; i < len; i++) { + rule = rules[tokens[i].type]; + + // TODO: temporary check + if (!rule) { + throw Error('Renderer error: unknown token ' + tokens[i].type); + } + + rule(state, tokens[i]); + } + + return state.result; +}; + +module.exports = Renderer; \ No newline at end of file diff --git a/package.json b/package.json index 1afad83..56f899f 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,9 @@ } ], "main": "index.js", + "dependencies": { + "argparse": "~ 0.1.15" + }, "devDependencies": { "ansi": "^0.3.0", "benchmark": "^1.0.0", diff --git a/test/defaults.js b/test/defaults.js new file mode 100644 index 0000000..0853118 --- /dev/null +++ b/test/defaults.js @@ -0,0 +1,16 @@ +/*global describe*/ +'use strict'; + + +var path = require('path'); + + +var utils = require('./utils'); +var Remarked = require('../'); + + +describe('Default', function () { + var md = new Remarked(); + + utils.addTests(path.join(__dirname, 'fixtures/defaults'), md); +}); \ No newline at end of file diff --git a/test/fixtures/defaults/code/code_blocks.html b/test/fixtures/defaults/code/code_blocks.html new file mode 100644 index 0000000..7d89615 --- /dev/null +++ b/test/fixtures/defaults/code/code_blocks.html @@ -0,0 +1,12 @@ +
code block on the first line
+
+

Regular text.

+
code block indented by spaces
+
+

Regular text.

+
the lines in this block  
+all contain trailing spaces  
+
+

Regular Text.

+
code block on the last line
+
diff --git a/test/fixtures/defaults/code/code_blocks.md b/test/fixtures/defaults/code/code_blocks.md new file mode 100644 index 0000000..01f9a73 --- /dev/null +++ b/test/fixtures/defaults/code/code_blocks.md @@ -0,0 +1,14 @@ + code block on the first line + +Regular text. + + code block indented by spaces + +Regular text. + + the lines in this block + all contain trailing spaces + +Regular Text. + + code block on the last line diff --git a/test/fixtures/defaults/hr/horizontal_rules.html b/test/fixtures/defaults/hr/horizontal_rules.html new file mode 100644 index 0000000..d9bd793 --- /dev/null +++ b/test/fixtures/defaults/hr/horizontal_rules.html @@ -0,0 +1,39 @@ +

Dashes:

+
+
+
+
+
---
+
+
+
+
+
+
- - -
+
+

Asterisks:

+
+
+
+
+
***
+
+
+
+
+
+
* * *
+
+

Underscores:

+
+
+
+
+
___
+
+
+
+
+
+
_ _ _
+
diff --git a/test/fixtures/defaults/hr/horizontal_rules.md b/test/fixtures/defaults/hr/horizontal_rules.md new file mode 100644 index 0000000..1594bda --- /dev/null +++ b/test/fixtures/defaults/hr/horizontal_rules.md @@ -0,0 +1,67 @@ +Dashes: + +--- + + --- + + --- + + --- + + --- + +- - - + + - - - + + - - - + + - - - + + - - - + + +Asterisks: + +*** + + *** + + *** + + *** + + *** + +* * * + + * * * + + * * * + + * * * + + * * * + + +Underscores: + +___ + + ___ + + ___ + + ___ + + ___ + +_ _ _ + + _ _ _ + + _ _ _ + + _ _ _ + + _ _ _ diff --git a/test/utils.js b/test/utils.js new file mode 100644 index 0000000..cb5356e --- /dev/null +++ b/test/utils.js @@ -0,0 +1,38 @@ +/*global describe, it*/ +'use strict'; + + +var fs = require('fs'); +var path = require('path'); +var assert = require('assert'); + + +function addTests(dir, md) { + var files = fs.readdirSync(dir); + + files.forEach(function (name) { + var filePath = path.join(dir, name); + var stat = fs.statSync(filePath); + + if (stat.isDirectory()) { + describe(name, function () { + addTests(filePath, md); + }); + return; + } + + if (stat.isFile) { + if (path.extname(filePath) !== '.md') { return; } + + var mustBe = fs.readFileSync(path.join(dir, path.basename(name, '.md') + '.html'), 'utf8'); + var src = fs.readFileSync(filePath, 'utf8'); + + it(name, function () { + assert.strictEqual(mustBe, md.render(src)); + }); + } + }); +} + + +module.exports.addTests = addTests; \ No newline at end of file