Browse Source

Tweak emphasis parsing algorithm

In the case of intersecting tags (e. g. "*foo _bar* baz_") last
complete tag should have a priority.
pull/14/head
Alex Kocharin 10 years ago
parent
commit
f74bbf2515
  1. 17
      lib/parser_inline.js
  2. 111
      lib/rules_inline/emphasis.js
  3. 2
      lib/rules_inline/state_inline.js
  4. 23
      lib/rules_inline/strikethrough.js
  5. 9
      test/fixtures/remarkable/strikeout.txt

17
lib/parser_inline.js

@ -61,16 +61,23 @@ ParserInline.prototype.rulesUpdate = function () {
// returns `true` if any rule reported success // returns `true` if any rule reported success
// //
ParserInline.prototype.skipToken = function (state) { ParserInline.prototype.skipToken = function (state) {
var ok, i, var i, pos = state.pos,
rules = this._rules, rules = this._rules,
len = this._rules.length; len = this._rules.length;
if (state.memo[pos] !== undefined) {
state.pos = state.memo[pos];
return true;
}
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
ok = rules[i](state, true); if (rules[i](state, true)) {
if (ok) { break; } state.memo[pos] = state.pos;
return true;
}
} }
return ok; return false;
}; };
@ -92,7 +99,7 @@ ParserInline.prototype.tokenize = function (state) {
// - return true // - return true
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
ok = rules[i](state); ok = rules[i](state, false);
if (ok) { break; } if (ok) { break; }
} }

111
lib/rules_inline/emphasis.js

@ -9,97 +9,54 @@ function isAlphaNum(code) {
(code >= 0x61 /* a */ && code <= 0x7A /* z */); (code >= 0x61 /* a */ && code <= 0x7A /* z */);
} }
// returns the amount of markers (1, 2, 3, 4+), or -1 on failure; // parse sequence of emphasis markers,
// "start" should point at a valid marker // "start" should point at a valid marker
// function scanDelims(state, start) {
// note: in case if 4+ markers it is still not a valid emphasis,
// should be treated as a special case
function parseStart(state, start) {
var pos = start, lastChar, nextChar, count, var pos = start, lastChar, nextChar, count,
can_open = true,
can_close = true,
max = state.posMax, max = state.posMax,
marker = state.src.charCodeAt(start); marker = state.src.charCodeAt(start);
lastChar = start > 0 ? state.src.charCodeAt(start - 1) : -1; lastChar = start > 0 ? state.src.charCodeAt(start - 1) : -1;
while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; } while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; }
if (pos >= max) { return -1; } if (pos >= max) { can_open = false; }
count = pos - start; count = pos - start;
// Quoting spec:
//
// Character can open emphasis iff
// 1. it is not part of a sequence of four or more unescaped markers,
// 2. it is not followed by whitespace,
// 3. it is "_" and it is not preceded by an ASCII alphanumeric character, and
// 4. either it is not followed by a marker or it is followed immediately by strong emphasis.
if (count >= 4) { if (count >= 4) {
// check condition 1 // sequence of four or more unescaped markers can't start/end an emphasis
// sequence of four or more unescaped markers can't start an emphasis can_open = can_close = false;
return count; } else {
} nextChar = pos < max ? state.src.charCodeAt(pos) : -1;
// check condition 2, marker followed by whitespace // check whitespace conditions
nextChar = state.src.charCodeAt(pos); if (nextChar === 0x20 || nextChar === 0x0A) { can_open = false; }
if (nextChar === 0x20 || nextChar === 0x0A) { return -1; } if (lastChar === 0x20 || lastChar === 0x0A) { can_close = false; }
if (marker === 0x5F /* _ */) { if (marker === 0x5F /* _ */) {
// check condition 3, if it's the beginning of the word // check if we aren't inside the word
// we need to look back for this if (isAlphaNum(lastChar)) { can_open = false; }
if (isAlphaNum(lastChar)) { return -1; } if (isAlphaNum(nextChar)) { can_close = false; }
} }
return count;
} }
// returns the amount of markers (1, 2, 3, 4+), or -1 on failure; return {
// "start" should point at a valid marker can_open: can_open,
// can_close: can_close,
// note: in case if 4+ markers it is still not a valid emphasis, delims: count
// should be treated as a special case };
function parseEnd(state, start) {
var pos = start, lastChar, count,
max = state.posMax,
marker = state.src.charCodeAt(start);
lastChar = start > 0 ? state.src.charCodeAt(start - 1) : -1;
while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; }
count = pos - start;
// Quoting spec:
//
// Character can close emphasis iff
// 1. it is not part of a sequence of four or more unescaped markers,
// 2. it is not preceded by whitespace,
// 3. it is not "_" or it is not followed by an ASCII alphanumeric character
if (count >= 4) {
// check condition 1
// sequence of four or more unescaped markers can't start an emphasis
return count;
}
// check condition 2, marker preceded by whitespace
if (lastChar === 0x20 || lastChar === 0x0A) { return -1; }
if (marker === 0x5F) {
// check condition 3, if it's the end of the word
if (pos < max && isAlphaNum(state.src.charCodeAt(pos))) { return -1; }
}
return count;
} }
module.exports = function emphasis(state, silent) { module.exports = function emphasis(state, silent) {
var startCount, var startCount,
count, count,
oldFlag,
found, found,
ok, ok,
oldCount, oldCount,
newCount, newCount,
stack, stack,
res,
max = state.posMax, max = state.posMax,
start = state.pos, start = state.pos,
haveLiteralAsterisk, haveLiteralAsterisk,
@ -110,28 +67,26 @@ module.exports = function emphasis(state, silent) {
// skip emphasis in links because it has lower priority, compare: // skip emphasis in links because it has lower priority, compare:
// [foo *bar]()* // [foo *bar]()*
// [foo `bar]()` // [foo `bar]()`
if (state.validateInsideEm || state.validateInsideLink) { return false; } if (state.validateInsideLink) { return false; }
startCount = parseStart(state, start); res = scanDelims(state, start);
if (startCount < 0) { return false; } startCount = res.delims;
if (startCount >= 4) { if (!res.can_open) {
state.pos += startCount; state.pos += startCount;
state.pending += state.src.slice(start, startCount); if (!silent) { state.pending += state.src.slice(start, state.pos); }
return true; return true;
} }
if (state.level >= state.options.maxNesting) { return false; } if (state.level >= state.options.maxNesting) { return false; }
oldFlag = state.validateInsideEm;
state.pos = start + startCount; state.pos = start + startCount;
stack = [ startCount ]; stack = [ startCount ];
state.validateInsideEm = true;
while (state.pos < max) { while (state.pos < max) {
if (state.src.charCodeAt(state.pos) === marker && !haveLiteralAsterisk) { if (state.src.charCodeAt(state.pos) === marker && !haveLiteralAsterisk) {
count = parseEnd(state, state.pos); res = scanDelims(state, state.pos);
if (count >= 1 && count < 4) { count = res.delims;
if (res.can_close) {
oldCount = stack.pop(); oldCount = stack.pop();
newCount = count; newCount = count;
@ -158,8 +113,7 @@ module.exports = function emphasis(state, silent) {
continue; continue;
} }
count = parseStart(state, state.pos); if (res.can_open) {
if (count >= 1 && count < 4) {
stack.push(count); stack.push(count);
state.pos += count; state.pos += count;
continue; continue;
@ -176,9 +130,6 @@ module.exports = function emphasis(state, silent) {
} }
} }
// restore old state
state.validateInsideEm = oldFlag;
if (!found) { if (!found) {
// parser failed to find ending tag, so it's not valid emphasis // parser failed to find ending tag, so it's not valid emphasis
state.pos = start; state.pos = start;
@ -186,10 +137,10 @@ module.exports = function emphasis(state, silent) {
} }
// found! // found!
if (!silent) {
state.posMax = state.pos; state.posMax = state.pos;
state.pos = start + startCount; state.pos = start + startCount;
if (!silent) {
if (startCount === 2 || startCount === 3) { if (startCount === 2 || startCount === 3) {
state.push({ type: 'strong_open', level: state.level++ }); state.push({ type: 'strong_open', level: state.level++ });
} }

2
lib/rules_inline/state_inline.js

@ -15,11 +15,11 @@ function StateInline(src, parser, options, env) {
this.pending = ''; this.pending = '';
this.pendingLevel = 0; this.pendingLevel = 0;
this.validateInsideEm = false;
this.validateInsideLink = false; this.validateInsideLink = false;
this.linkLevel = 0; this.linkLevel = 0;
this.link_content = ''; this.link_content = '';
this.label_nest_level = 0; // for stmd-like backtrack optimization this.label_nest_level = 0; // for stmd-like backtrack optimization
this.memo = {};
} }

23
lib/rules_inline/strikethrough.js

@ -3,11 +3,9 @@
'use strict'; 'use strict';
module.exports = function strikethrough(state, silent) { module.exports = function strikethrough(state, silent) {
var oldFlag, var found,
found,
ok, ok,
pos, pos,
stack,
max = state.posMax, max = state.posMax,
start = state.pos, start = state.pos,
lastChar, lastChar,
@ -19,7 +17,7 @@ module.exports = function strikethrough(state, silent) {
// make del lower a priority tag with respect to links, same as <em>; // make del lower a priority tag with respect to links, same as <em>;
// this code also prevents recursion // this code also prevents recursion
if (state.validateInsideEm || state.validateInsideLink) { return false; } if (state.validateInsideLink) { return false; }
if (state.level >= state.options.maxNesting) { return false; } if (state.level >= state.options.maxNesting) { return false; }
@ -39,11 +37,7 @@ module.exports = function strikethrough(state, silent) {
return true; return true;
} }
oldFlag = state.validateInsideEm;
state.pos = start + 2; state.pos = start + 2;
state.validateInsideEm = true;
stack = 1;
while (state.pos + 1 < max) { while (state.pos + 1 < max) {
if (state.src.charCodeAt(state.pos) === 0x7E/* ~ */) { if (state.src.charCodeAt(state.pos) === 0x7E/* ~ */) {
@ -53,14 +47,6 @@ module.exports = function strikethrough(state, silent) {
if (nextChar !== 0x7E/* ~ */ && lastChar !== 0x7E/* ~ */) { if (nextChar !== 0x7E/* ~ */ && lastChar !== 0x7E/* ~ */) {
if (lastChar !== 0x20 && lastChar !== 0x0A) { if (lastChar !== 0x20 && lastChar !== 0x0A) {
// closing '~~' // closing '~~'
stack--;
} else if (nextChar !== 0x20 && nextChar !== 0x0A) {
// opening '~~'
stack++;
} // else {
// // standalone ' ~~ ' indented with spaces
//}
if (stack <= 0) {
found = true; found = true;
break; break;
} }
@ -75,20 +61,17 @@ module.exports = function strikethrough(state, silent) {
} }
} }
// restore old state
state.validateInsideEm = oldFlag;
if (!found) { if (!found) {
// parser failed to find ending tag, so it's not valid emphasis // parser failed to find ending tag, so it's not valid emphasis
state.pos = start; state.pos = start;
return false; return false;
} }
if (!silent) {
// found! // found!
state.posMax = state.pos; state.posMax = state.pos;
state.pos = start + 2; state.pos = start + 2;
if (!silent) {
state.push({ type: 'del_open', level: state.level++ }); state.push({ type: 'del_open', level: state.level++ });
state.parser.tokenize(state); state.parser.tokenize(state);
state.push({ type: 'del_close', level: --state.level }); state.push({ type: 'del_close', level: --state.level });

9
test/fixtures/remarkable/strikeout.txt

@ -25,8 +25,8 @@ Strikeouts have the same priority as emphases:
~~**test~~** ~~**test~~**
. .
<p><strong>~~test</strong>~~</p> <p>**<del>test**</del></p>
<p><del>**test</del>**</p> <p>~~<strong>test~~</strong></p>
. .
Strikeouts have the same priority as emphases with respect to links: Strikeouts have the same priority as emphases with respect to links:
@ -52,10 +52,13 @@ Strikeouts have the same priority as emphases with respect to backticks:
Nested strikeouts: Nested strikeouts:
. .
~~foo ~~bar~~ baz~~ ~~foo ~~bar~~ baz~~
.
<p><del>foo <del>bar</del> baz</del></p>
.
.
~~f **o ~~o b~~ a** r~~ ~~f **o ~~o b~~ a** r~~
. .
<p><del>foo <del>bar</del> baz</del></p>
<p><del>f <strong>o <del>o b</del> a</strong> r</del></p> <p><del>f <strong>o <del>o b</del> a</strong> r</del></p>
. .

Loading…
Cancel
Save