Browse Source

Added lexer/renderer stubs

pull/14/head
Vitaly Puzrin 10 years ago
parent
commit
c72bba2090
  1. 2
      Makefile
  2. 2
      benchmark/implementations/current/index.js
  3. 12
      benchmark/profile.js
  4. 89
      bin/remarkable.js
  5. 112
      index.js
  6. 301
      lib/lexer_block.js
  7. 134
      lib/lexer_inline.js
  8. 65
      lib/renderer.js
  9. 3
      package.json
  10. 16
      test/defaults.js
  11. 12
      test/fixtures/defaults/code/code_blocks.html
  12. 14
      test/fixtures/defaults/code/code_blocks.md
  13. 39
      test/fixtures/defaults/hr/horizontal_rules.html
  14. 67
      test/fixtures/defaults/hr/horizontal_rules.md
  15. 38
      test/utils.js

2
Makefile

@ -23,7 +23,7 @@ lint:
test: lint
# NODE_ENV=test mocha -R spec
NODE_ENV=test mocha -R spec
gh-pages:

2
benchmark/implementations/current/index.js

@ -1,6 +1,6 @@
'use strict'
var Remarkable = new require('../../../');
var Remarkable = require('../../../');
var md = new Remarkable();
exports.run = function(data) {

12
benchmark/profile.js

@ -0,0 +1,12 @@
'use strict';
var fs = require('fs');
var Remarkable = require('../');
var md = new Remarkable();
var data = fs.readFileSync(__dirname +'/samples/lorem1.txt', 'utf8');
for (var i=0; i<20000; i++) {
md.render(data);
}

89
bin/remarkable.js

@ -0,0 +1,89 @@
#!/usr/bin/env node
'use strict';
var fs = require('fs');
var argparse = require('argparse');
var Remarkable = require('..');
////////////////////////////////////////////////////////////////////////////////
var cli = new argparse.ArgumentParser({
prog: 'js-yaml',
version: require('../package.json').version,
addHelp: true
});
cli.addArgument(['file'], {
help: 'File to read',
nargs: '?',
defaultValue: '-'
});
cli.addArgument(['-t', '--trace'], {
help: 'Show stack trace on error',
action: 'storeTrue'
});
var options = cli.parseArgs();
function readFile(filename, encoding, callback) {
if (options.file === '-') {
// read from stdin
var chunks = [];
process.stdin.on('data', function(chunk) {
chunks.push(chunk);
});
process.stdin.on('end', function() {
return callback(null, Buffer.concat(chunks).toString(encoding));
});
} else {
fs.readFile(filename, encoding, callback);
}
}
////////////////////////////////////////////////////////////////////////////////
readFile(options.file, 'utf8', function (error, input) {
var output, md;
if (error) {
if ('ENOENT' === error.code) {
console.error('File not found: ' + options.file);
process.exit(2);
}
console.error(
options.trace && error.stack ||
error.message ||
String(error));
process.exit(1);
}
md = new Remarkable();
try {
output = md.render(input);
} catch (error) {
console.error(
options.trace && error.stack ||
error.message ||
String(error));
process.exit(1);
}
process.stdout.write(output);
process.exit(0);
});

112
index.js

@ -1,15 +1,123 @@
'use strict';
var Renderer = require('./lib/renderer');
var LexerBlock = require('./lib/lexer_block');
var LexerInline = require('./lib/lexer_inline');
// Parser state class
//
function State(src, lexerBlock, lexerInline, renderer, options) {
var ch, s, start, pos, len;
// TODO: Temporary solution. Check if more effective possible,
// withous str change
//
// - replace tabs with spaces
// - remove `\r` to simplify newlines check (???)
this.src = src
.replace(/\t/g, ' ')
.replace(/\r/g, '')
.replace(/\u00a0/g, ' ')
.replace(/\u2424/g, '\n');
// Shortcuts to simplify nested calls
this.lexerBlock = lexerBlock;
this.lexerInline = lexerInline;
this.renderer = renderer;
// TODO: (?) set directly for faster access.
this.options = options;
//
// Internal state vartiables
//
this.tokens = [];
this.bMarks = []; // lines begin/end markers for fast jumps
this.eMarks = []; //
// Generate markers.
s = this.src;
for(start = pos = 0, len = s.length; pos < len; pos++) {
ch = s.charCodeAt(pos);
if (ch === 0x0D || ch === 0x0A) {
this.bMarks.push(start);
this.eMarks.push(pos);
start = pos + 1;
}
if (ch === 0x0D && pos < len && s.charCodeAt(pos) === 0x0A) {
pos++;
start++;
}
}
if (ch !== 0x0D || ch !== 0x0A) {
this.bMarks.push(start);
this.eMarks.push(len);
}
// inline lexer variables
this.pos = 0; // char index in src
// block lexer variables
this.blkLevel = 0;
this.blkIndent = 0;
this.line = 0; // line index in src
this.lineMax = this.bMarks.length;
// renderer
this.result = '';
}
// Main class
//
function Remarkable(options) {
this.options = {};
this.state = null;
this.lexerInline = new LexerInline();
this.lexerBlock = new LexerBlock();
this.renderer = new Renderer();
if (options) { this.set(options); }
}
Remarkable.prototype.set = function (options) {
Remarkable.prototype.set = function (options) {
Object.keys(options).forEach(function (key) {
this.options[key] = options[key];
}, this);
};
Remarkable.prototype.render = function (src) {
return '';
if (!src) { return ''; }
var state = new State(
src,
this.lexerBlock,
this.lexerInline,
this.renderer,
this.options
);
// TODO: skip leading empty lines
state.lexerBlock.tokenize(state, state.line, state.lineMax);
// TODO: ??? eat empty paragraphs from tail
//console.log(state.tokens)
return this.renderer.render(state);
};
module.exports = Remarkable;

301
lib/lexer_block.js

@ -0,0 +1,301 @@
// Block lexer
'use strict';
////////////////////////////////////////////////////////////////////////////////
// Helpers
// Check if character is white space
function isWhiteSpace(ch) {
// TODO: check other spaces and tabs
return ch === 0x20;
}
// Check if line from `pos` is empty or contains spaces only
function isEmpty(state, line) {
var ch, pos = state.bMarks[line], max = state.src.length;
while (pos < max) {
ch = state.src.charCodeAt(pos++);
if (ch === 0x0A || ch === 0x0D) { return true; }
if (!isWhiteSpace(ch)) { return false; }
}
return true; // EOL reached
}
// Return absolute position of char with default indent an given line,
// or -1 if no requested indent
function getIndent(state, line, indent) {
var ch, pos, max;
if (line >= state.lineMax) { return -1; }
pos = state.bMarks[line];
max = state.eMarks[line];
while (pos < max && indent > 0) {
ch = state.src.charCodeAt(pos++);
if (ch === 0x09) { indent -= 4; continue; }
if (isWhiteSpace(ch)) { indent--; continue; }
return -1;
}
if (indent > 0) { return -1; }
return pos;
}
// Skip empty lines, starting from `state.line`
function skipEmptyLines(state, from) {
while (from < state.lineMax) {
if (!isEmpty(state, from)) {
state.line = from;
return;
}
from++;
}
state.line = from;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer rules
var rules = [];
// code
rules.push(function code(state, startLine, endLine) {
var nextLine, last;
if (getIndent(state, startLine, 4) === -1) { return false; }
last = nextLine = startLine + 1;
while (nextLine < endLine) {
if (isEmpty(state, nextLine)) {
nextLine++;
if (state.options.pedantic) {
last = nextLine;
}
continue;
}
if (getIndent(state, nextLine, 4) !== -1) {
nextLine++;
last = nextLine;
continue;
}
break;
}
state.tokens.push({
type: 'code',
startLine: startLine,
endLine: last
});
state.line = nextLine;
return true;
});
// Horizontal rule
rules.push(function hr(state, startLine, endLine) {
var ch, marker,
pos = state.bMarks[startLine],
space_max = pos + 3,
max = state.eMarks[startLine];
ch = state.src.charCodeAt(pos);
// quick test first char
if (!isWhiteSpace(ch) &&
ch !== 0x2A/* * */ &&
ch !== 0x2D/* - */ &&
ch !== 0x5F/* _ */) {
return false;
}
// skip up to 3 leading spaces
while (isWhiteSpace(ch) && pos < max && pos < space_max) {
pos++;
ch = state.src.charCodeAt(pos);
}
// Check hr marker
if (ch !== 0x2A/* * */ &&
ch !== 0x2D/* - */ &&
ch !== 0x5F/* _ */) {
return false;
}
// remember marker type
marker = ch;
if (pos + 2 < max &&
state.src.charCodeAt(pos + 1) === marker &&
state.src.charCodeAt(pos + 2) === marker) {
// Style 1: ***, ---, ___
pos += 3;
} else if (pos + 4 < max &&
isWhiteSpace(state.src.charCodeAt(pos + 1)) &&
state.src.charCodeAt(pos + 2) === marker &&
isWhiteSpace(state.src.charCodeAt(pos + 3)) &&
state.src.charCodeAt(pos + 4) === marker) {
// Style 2: * * *, - - -, _ _ _
pos += 5;
} else {
return false;
}
// check that line tail has spaces only
while(pos < max) {
ch = state.src.charCodeAt(pos++);
if (isWhiteSpace(ch)) {
return false;
}
}
state.tokens.push({ type: 'hr' });
skipEmptyLines(state, ++startLine);
return true;
});
// Paragraph
rules.push(function paragraph(state, startLine, endLine) {
var nextLine = startLine + 1;
// jump line-by-line until empty one or EOF
while (nextLine < endLine && !isEmpty(state, nextLine)) {
nextLine++;
}
state.tokens.push({ type: 'paragraph_open' });
state.lexerInline.tokenize(
state,
state.bMarks[startLine],
state.eMarks[nextLine - 1]
);
state.tokens.push({ type: 'paragraph_close' });
skipEmptyLines(state, nextLine);
return true;
});
////////////////////////////////////////////////////////////////////////////////
// Lexer class
function findByName(self, name) {
for (var i = 0; i < self.rules.length; i++) {
if (self.rules[i].name === name) {
return i;
}
}
return -1;
}
// Block Lexer class
//
function LexerBlock() {
this.rules = [];
for (var i = 0; i < rules.length; i++) {
this.after(null, rules[i]);
}
}
// Replace/delete lexer function
//
LexerBlock.prototype.at = function (name, fn) {
var index = findByName(name);
if (index === -1) {
throw new Error('Lexer rule not found: ' + name);
}
if (fn) {
this.rules[index] = fn;
} else {
this.rules = this.rules.slice(0, index).concat(this.rules.slice(index + 1));
}
};
// Add function to lexer chain before one with given name.
// Or add to start, if name not defined
//
LexerBlock.prototype.before = function (name, fn) {
if (!name) {
this.rules.unshift(fn);
return;
}
var index = findByName(name);
if (index === -1) {
throw new Error('Lexer rule not found: ' + name);
}
this.rules.splice(index, 0, fn);
};
// Add function to lexer chain after one with given name.
// Or add to end, if name not defined
//
LexerBlock.prototype.after = function (name, fn) {
if (!name) {
this.rules.push(fn);
return;
}
var index = findByName(name);
if (index === -1) {
throw new Error('Lexer rule not found: ' + name);
}
this.rules.splice(index + 1, 0, fn);
};
// Generate tokens for input range
//
LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
var ok, i,
rules = this.rules,
len = this.rules.length,
line = startLine;
while (line < endLine) {
// Try all possible rules.
// On success, rule should:
//
// - update `state.pos`
// - update `state.tokens`
// - return true
for (i = 0; i < len; i++) {
ok = rules[i](state, line, endLine);
if (ok) { break; }
}
if (ok) {
line = state.line;
continue;
}
}
};
module.exports = LexerBlock;

134
lib/lexer_inline.js

@ -0,0 +1,134 @@
// Inline lexer
'use strict';
////////////////////////////////////////////////////////////////////////////////
// Lexer rules
var rules = [];
// Pure text
rules.push(function text(state, begin, end) {
state.tokens.push({
type: 'text',
begin: begin,
end: end
});
state.pos = end;
return true;
});
////////////////////////////////////////////////////////////////////////////////
// Lexer class
function findByName(self, name) {
for (var i = 0; i < self.rules.length; i++) {
if (self.rules[i].name === name) {
return i;
}
}
return -1;
}
// Block Lexer class
//
function LexerInline() {
this.rules = [];
for (var i = 0; i < rules.length; i++) {
this.after(null, rules[i]);
}
}
// Replace/delete lexer function
//
LexerInline.prototype.at = function (name, fn) {
var index = findByName(name);
if (index === -1) {
throw new Error('Lexer rule not found: ' + name);
}
if (fn) {
this.rules[index] = fn;
} else {
this.rules = this.rules.slice(0, index).concat(this.rules.slice(index + 1));
}
};
// Add function to lexer chain before one with given name.
// Or add to start, if name not defined
//
LexerInline.prototype.before = function (name, fn) {
if (!name) {
this.rules.unshift(fn);
return;
}
var index = findByName(name);
if (index === -1) {
throw new Error('Lexer rule not found: ' + name);
}
this.rules.splice(index, 0, fn);
};
// Add function to lexer chain after one with given name.
// Or add to end, if name not defined
//
LexerInline.prototype.after = function (name, fn) {
if (!name) {
this.rules.push(fn);
return;
}
var index = findByName(name);
if (index === -1) {
throw new Error('Lexer rule not found: ' + name);
}
this.rules.splice(index + 1, 0, fn);
};
// Generate tokens for input range
//
LexerInline.prototype.tokenize = function (state, begin, end) {
var ok, i,
rules = this.rules,
len = this.rules.length,
pos = begin;
while (pos < end) {
// Try all possible rules.
// On success, rule should:
//
// - update `state.pos`
// - update `state.tokens`
// - return true
for (i = 0; i < len; i++) {
ok = rules[i](state, pos, end);
if (ok) { break; }
}
if (ok) {
pos = state.pos;
continue;
}
}
state.pos = end;
};
module.exports = LexerInline;

65
lib/renderer.js

@ -0,0 +1,65 @@
'use strict';
function escapeHTML(str) {
return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
}
function joinLines(state, begin, end) {
return state.src.slice(
state.bMarks[begin],
end < state.lineMax ? state.bMarks[end] : state.src.length
);
}
var rules = {};
rules.code = function (state, token) {
// TODO: check if we need variable indent cut
var lines = joinLines(state, token.startLine, token.endLine).replace(/^ {4}/gm, '');
state.result += '<pre><code>' + escapeHTML(lines) + '</code></pre>\n';
};
rules.hr = function (state, token) {
state.result += '<hr>\n';
};
rules.paragraph_open = function (state, token) {
state.result += '<p>';
};
rules.paragraph_close = function (state, token) {
state.result += '</p>\n';
};
rules.text = function (state, token) {
state.result += escapeHTML(state.src.slice(token.begin, token.end));
};
// TODO: Stub. Do extendable.
function Renderer() {
}
Renderer.prototype.render = function (state) {
var i, len, rule,
tokens = state.tokens;
for (i = 0, len = tokens.length; i < len; i++) {
rule = rules[tokens[i].type];
// TODO: temporary check
if (!rule) {
throw Error('Renderer error: unknown token ' + tokens[i].type);
}
rule(state, tokens[i]);
}
return state.result;
};
module.exports = Renderer;

3
package.json

@ -16,6 +16,9 @@
}
],
"main": "index.js",
"dependencies": {
"argparse": "~ 0.1.15"
},
"devDependencies": {
"ansi": "^0.3.0",
"benchmark": "^1.0.0",

16
test/defaults.js

@ -0,0 +1,16 @@
/*global describe*/
'use strict';
var path = require('path');
var utils = require('./utils');
var Remarked = require('../');
describe('Default', function () {
var md = new Remarked();
utils.addTests(path.join(__dirname, 'fixtures/defaults'), md);
});

12
test/fixtures/defaults/code/code_blocks.html

@ -0,0 +1,12 @@
<pre><code>code block on the first line
</code></pre>
<p>Regular text.</p>
<pre><code>code block indented by spaces
</code></pre>
<p>Regular text.</p>
<pre><code>the lines in this block
all contain trailing spaces
</code></pre>
<p>Regular Text.</p>
<pre><code>code block on the last line
</code></pre>

14
test/fixtures/defaults/code/code_blocks.md

@ -0,0 +1,14 @@
code block on the first line
Regular text.
code block indented by spaces
Regular text.
the lines in this block
all contain trailing spaces
Regular Text.
code block on the last line

39
test/fixtures/defaults/hr/horizontal_rules.html

@ -0,0 +1,39 @@
<p>Dashes:</p>
<hr>
<hr>
<hr>
<hr>
<pre><code>---
</code></pre>
<hr>
<hr>
<hr>
<hr>
<pre><code>- - -
</code></pre>
<p>Asterisks:</p>
<hr>
<hr>
<hr>
<hr>
<pre><code>***
</code></pre>
<hr>
<hr>
<hr>
<hr>
<pre><code>* * *
</code></pre>
<p>Underscores:</p>
<hr>
<hr>
<hr>
<hr>
<pre><code>___
</code></pre>
<hr>
<hr>
<hr>
<hr>
<pre><code>_ _ _
</code></pre>

67
test/fixtures/defaults/hr/horizontal_rules.md

@ -0,0 +1,67 @@
Dashes:
---
---
---
---
---
- - -
- - -
- - -
- - -
- - -
Asterisks:
***
***
***
***
***
* * *
* * *
* * *
* * *
* * *
Underscores:
___
___
___
___
___
_ _ _
_ _ _
_ _ _
_ _ _
_ _ _

38
test/utils.js

@ -0,0 +1,38 @@
/*global describe, it*/
'use strict';
var fs = require('fs');
var path = require('path');
var assert = require('assert');
function addTests(dir, md) {
var files = fs.readdirSync(dir);
files.forEach(function (name) {
var filePath = path.join(dir, name);
var stat = fs.statSync(filePath);
if (stat.isDirectory()) {
describe(name, function () {
addTests(filePath, md);
});
return;
}
if (stat.isFile) {
if (path.extname(filePath) !== '.md') { return; }
var mustBe = fs.readFileSync(path.join(dir, path.basename(name, '.md') + '.html'), 'utf8');
var src = fs.readFileSync(filePath, 'utf8');
it(name, function () {
assert.strictEqual(mustBe, md.render(src));
});
}
});
}
module.exports.addTests = addTests;
Loading…
Cancel
Save