From 584e55861c4e362f05b33e129a65293aa3deec27 Mon Sep 17 00:00:00 2001 From: Vitaly Puzrin Date: Sun, 4 Jan 2015 11:03:26 +0300 Subject: [PATCH] Moved input normalization to core chain --- lib/parser_block.js | 26 +---------------------- lib/parser_core.js | 1 + lib/presets/commonmark.js | 1 + lib/presets/zero.js | 1 + lib/rules_core/block.js | 2 +- lib/rules_core/normalize.js | 41 ++++++++++++++++++++++++++++++++++++ lib/rules_core/state_core.js | 1 + 7 files changed, 47 insertions(+), 26 deletions(-) create mode 100644 lib/rules_core/normalize.js diff --git a/lib/parser_block.js b/lib/parser_block.js index 7b11b71..20ce566 100644 --- a/lib/parser_block.js +++ b/lib/parser_block.js @@ -100,9 +100,6 @@ ParserBlock.prototype.tokenize = function (state, startLine, endLine) { } }; -var TABS_SCAN_RE = /[\n\t]/g; -var NEWLINES_RE = /\r[\n\u0085]|[\u2424\u2028\u0085]/g; -var NULL_RE = /\u0000/g; /** * ParserBlock.parse(str, md, env, outTokens) @@ -110,31 +107,10 @@ var NULL_RE = /\u0000/g; * Process input string and push block tokens into `outTokens` **/ ParserBlock.prototype.parse = function (src, md, env, outTokens) { - var state, lineStart = 0, lastTabPos = 0; + var state; if (!src) { return []; } - // Normalize newlines - src = src.replace(NEWLINES_RE, '\n'); - - // Strin NULL characters - src = src.replace(NULL_RE, '\uFFFD'); - - // Replace tabs with proper number of spaces (1..4) - if (src.indexOf('\t') >= 0) { - src = src.replace(TABS_SCAN_RE, function (match, offset) { - var result; - if (src.charCodeAt(offset) === 0x0A) { - lineStart = offset + 1; - lastTabPos = 0; - return match; - } - result = ' '.slice((offset - lineStart - lastTabPos) % 4); - lastTabPos = offset - lineStart + 1; - return result; - }); - } - state = new this.State(src, md, env, outTokens); this.tokenize(state, state.line, state.lineMax); diff --git a/lib/parser_core.js b/lib/parser_core.js index 7f85893..795ac67 100644 --- a/lib/parser_core.js +++ b/lib/parser_core.js @@ -11,6 +11,7 @@ var Ruler = require('./ruler'); var _rules = [ + [ 'normalize', require('./rules_core/normalize') ], [ 'block', require('./rules_core/block') ], [ 'inline', require('./rules_core/inline') ], [ 'replacements', require('./rules_core/replacements') ], diff --git a/lib/presets/commonmark.js b/lib/presets/commonmark.js index 8c8ca3b..ae1a68a 100644 --- a/lib/presets/commonmark.js +++ b/lib/presets/commonmark.js @@ -32,6 +32,7 @@ module.exports = { core: { rules: [ + 'normalize', 'block', 'inline' ] diff --git a/lib/presets/zero.js b/lib/presets/zero.js index 07bfe54..6863195 100644 --- a/lib/presets/zero.js +++ b/lib/presets/zero.js @@ -33,6 +33,7 @@ module.exports = { core: { rules: [ + 'normalize', 'block', 'inline' ] diff --git a/lib/rules_core/block.js b/lib/rules_core/block.js index 39b2d5e..0b9ce12 100644 --- a/lib/rules_core/block.js +++ b/lib/rules_core/block.js @@ -5,7 +5,7 @@ module.exports = function block(state) { if (state.inlineMode) { state.tokens.push({ type: 'inline', - content: state.src.replace(/\n/g, ' ').trim(), + content: state.src, level: 0, lines: [ 0, 1 ], children: [] diff --git a/lib/rules_core/normalize.js b/lib/rules_core/normalize.js new file mode 100644 index 0000000..7d22711 --- /dev/null +++ b/lib/rules_core/normalize.js @@ -0,0 +1,41 @@ +// Normalize input string + +'use strict'; + + +var TABS_SCAN_RE = /[\n\t]/g; +var NEWLINES_RE = /\r[\n\u0085]|[\u2424\u2028\u0085]/g; +var NULL_RE = /\u0000/g; + + +module.exports = function inline(state) { + var str, lineStart, lastTabPos; + + if (!state.normalizeInput) { return; } + + // Normalize newlines + str = state.src.replace(NEWLINES_RE, '\n'); + + // Strin NULL characters + str = str.replace(NULL_RE, '\uFFFD'); + + // Replace tabs with proper number of spaces (1..4) + if (str.indexOf('\t') >= 0) { + lineStart = 0; + lastTabPos = 0; + + str = str.replace(TABS_SCAN_RE, function (match, offset) { + var result; + if (str.charCodeAt(offset) === 0x0A) { + lineStart = offset + 1; + lastTabPos = 0; + return match; + } + result = ' '.slice((offset - lineStart - lastTabPos) % 4); + lastTabPos = offset - lineStart + 1; + return result; + }); + } + + state.src = str; +}; diff --git a/lib/rules_core/state_core.js b/lib/rules_core/state_core.js index 86a5231..704e546 100644 --- a/lib/rules_core/state_core.js +++ b/lib/rules_core/state_core.js @@ -7,5 +7,6 @@ module.exports = function StateCore(src, md, env) { this.env = env; this.tokens = []; this.inlineMode = false; + this.normalizeInput = true; this.md = md; // link to parser instance };