Browse Source

Tweak emphasis parsing algorithm

In the case of intersecting tags (e. g. "*foo _bar* baz_") last
complete tag should have a priority.
pull/14/head
Alex Kocharin 10 years ago
parent
commit
f74bbf2515
  1. 17
      lib/parser_inline.js
  2. 123
      lib/rules_inline/emphasis.js
  3. 2
      lib/rules_inline/state_inline.js
  4. 29
      lib/rules_inline/strikethrough.js
  5. 9
      test/fixtures/remarkable/strikeout.txt

17
lib/parser_inline.js

@ -61,16 +61,23 @@ ParserInline.prototype.rulesUpdate = function () {
// returns `true` if any rule reported success
//
ParserInline.prototype.skipToken = function (state) {
var ok, i,
var i, pos = state.pos,
rules = this._rules,
len = this._rules.length;
if (state.memo[pos] !== undefined) {
state.pos = state.memo[pos];
return true;
}
for (i = 0; i < len; i++) {
ok = rules[i](state, true);
if (ok) { break; }
if (rules[i](state, true)) {
state.memo[pos] = state.pos;
return true;
}
}
return ok;
return false;
};
@ -92,7 +99,7 @@ ParserInline.prototype.tokenize = function (state) {
// - return true
for (i = 0; i < len; i++) {
ok = rules[i](state);
ok = rules[i](state, false);
if (ok) { break; }
}

123
lib/rules_inline/emphasis.js

@ -9,97 +9,54 @@ function isAlphaNum(code) {
(code >= 0x61 /* a */ && code <= 0x7A /* z */);
}
// returns the amount of markers (1, 2, 3, 4+), or -1 on failure;
// parse sequence of emphasis markers,
// "start" should point at a valid marker
//
// note: in case if 4+ markers it is still not a valid emphasis,
// should be treated as a special case
function parseStart(state, start) {
function scanDelims(state, start) {
var pos = start, lastChar, nextChar, count,
can_open = true,
can_close = true,
max = state.posMax,
marker = state.src.charCodeAt(start);
lastChar = start > 0 ? state.src.charCodeAt(start - 1) : -1;
while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; }
if (pos >= max) { return -1; }
if (pos >= max) { can_open = false; }
count = pos - start;
// Quoting spec:
//
// Character can open emphasis iff
// 1. it is not part of a sequence of four or more unescaped markers,
// 2. it is not followed by whitespace,
// 3. it is "_" and it is not preceded by an ASCII alphanumeric character, and
// 4. either it is not followed by a marker or it is followed immediately by strong emphasis.
if (count >= 4) {
// check condition 1
// sequence of four or more unescaped markers can't start an emphasis
return count;
}
// check condition 2, marker followed by whitespace
nextChar = state.src.charCodeAt(pos);
if (nextChar === 0x20 || nextChar === 0x0A) { return -1; }
if (marker === 0x5F /* _ */) {
// check condition 3, if it's the beginning of the word
// we need to look back for this
if (isAlphaNum(lastChar)) { return -1; }
}
return count;
}
// returns the amount of markers (1, 2, 3, 4+), or -1 on failure;
// "start" should point at a valid marker
//
// note: in case if 4+ markers it is still not a valid emphasis,
// should be treated as a special case
function parseEnd(state, start) {
var pos = start, lastChar, count,
max = state.posMax,
marker = state.src.charCodeAt(start);
lastChar = start > 0 ? state.src.charCodeAt(start - 1) : -1;
while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; }
count = pos - start;
// Quoting spec:
//
// Character can close emphasis iff
// 1. it is not part of a sequence of four or more unescaped markers,
// 2. it is not preceded by whitespace,
// 3. it is not "_" or it is not followed by an ASCII alphanumeric character
if (count >= 4) {
// check condition 1
// sequence of four or more unescaped markers can't start an emphasis
return count;
}
// check condition 2, marker preceded by whitespace
if (lastChar === 0x20 || lastChar === 0x0A) { return -1; }
if (marker === 0x5F) {
// check condition 3, if it's the end of the word
if (pos < max && isAlphaNum(state.src.charCodeAt(pos))) { return -1; }
// sequence of four or more unescaped markers can't start/end an emphasis
can_open = can_close = false;
} else {
nextChar = pos < max ? state.src.charCodeAt(pos) : -1;
// check whitespace conditions
if (nextChar === 0x20 || nextChar === 0x0A) { can_open = false; }
if (lastChar === 0x20 || lastChar === 0x0A) { can_close = false; }
if (marker === 0x5F /* _ */) {
// check if we aren't inside the word
if (isAlphaNum(lastChar)) { can_open = false; }
if (isAlphaNum(nextChar)) { can_close = false; }
}
}
return count;
return {
can_open: can_open,
can_close: can_close,
delims: count
};
}
module.exports = function emphasis(state, silent) {
var startCount,
count,
oldFlag,
found,
ok,
oldCount,
newCount,
stack,
res,
max = state.posMax,
start = state.pos,
haveLiteralAsterisk,
@ -110,28 +67,26 @@ module.exports = function emphasis(state, silent) {
// skip emphasis in links because it has lower priority, compare:
// [foo *bar]()*
// [foo `bar]()`
if (state.validateInsideEm || state.validateInsideLink) { return false; }
if (state.validateInsideLink) { return false; }
startCount = parseStart(state, start);
if (startCount < 0) { return false; }
if (startCount >= 4) {
res = scanDelims(state, start);
startCount = res.delims;
if (!res.can_open) {
state.pos += startCount;
state.pending += state.src.slice(start, startCount);
if (!silent) { state.pending += state.src.slice(start, state.pos); }
return true;
}
if (state.level >= state.options.maxNesting) { return false; }
oldFlag = state.validateInsideEm;
state.pos = start + startCount;
stack = [ startCount ];
state.validateInsideEm = true;
while (state.pos < max) {
if (state.src.charCodeAt(state.pos) === marker && !haveLiteralAsterisk) {
count = parseEnd(state, state.pos);
if (count >= 1 && count < 4) {
res = scanDelims(state, state.pos);
count = res.delims;
if (res.can_close) {
oldCount = stack.pop();
newCount = count;
@ -158,8 +113,7 @@ module.exports = function emphasis(state, silent) {
continue;
}
count = parseStart(state, state.pos);
if (count >= 1 && count < 4) {
if (res.can_open) {
stack.push(count);
state.pos += count;
continue;
@ -176,9 +130,6 @@ module.exports = function emphasis(state, silent) {
}
}
// restore old state
state.validateInsideEm = oldFlag;
if (!found) {
// parser failed to find ending tag, so it's not valid emphasis
state.pos = start;
@ -186,10 +137,10 @@ module.exports = function emphasis(state, silent) {
}
// found!
if (!silent) {
state.posMax = state.pos;
state.pos = start + startCount;
state.posMax = state.pos;
state.pos = start + startCount;
if (!silent) {
if (startCount === 2 || startCount === 3) {
state.push({ type: 'strong_open', level: state.level++ });
}

2
lib/rules_inline/state_inline.js

@ -15,11 +15,11 @@ function StateInline(src, parser, options, env) {
this.pending = '';
this.pendingLevel = 0;
this.validateInsideEm = false;
this.validateInsideLink = false;
this.linkLevel = 0;
this.link_content = '';
this.label_nest_level = 0; // for stmd-like backtrack optimization
this.memo = {};
}

29
lib/rules_inline/strikethrough.js

@ -3,11 +3,9 @@
'use strict';
module.exports = function strikethrough(state, silent) {
var oldFlag,
found,
var found,
ok,
pos,
stack,
max = state.posMax,
start = state.pos,
lastChar,
@ -19,7 +17,7 @@ module.exports = function strikethrough(state, silent) {
// make del lower a priority tag with respect to links, same as <em>;
// this code also prevents recursion
if (state.validateInsideEm || state.validateInsideLink) { return false; }
if (state.validateInsideLink) { return false; }
if (state.level >= state.options.maxNesting) { return false; }
@ -39,11 +37,7 @@ module.exports = function strikethrough(state, silent) {
return true;
}
oldFlag = state.validateInsideEm;
state.pos = start + 2;
state.validateInsideEm = true;
stack = 1;
while (state.pos + 1 < max) {
if (state.src.charCodeAt(state.pos) === 0x7E/* ~ */) {
@ -53,14 +47,6 @@ module.exports = function strikethrough(state, silent) {
if (nextChar !== 0x7E/* ~ */ && lastChar !== 0x7E/* ~ */) {
if (lastChar !== 0x20 && lastChar !== 0x0A) {
// closing '~~'
stack--;
} else if (nextChar !== 0x20 && nextChar !== 0x0A) {
// opening '~~'
stack++;
} // else {
// // standalone ' ~~ ' indented with spaces
//}
if (stack <= 0) {
found = true;
break;
}
@ -75,20 +61,17 @@ module.exports = function strikethrough(state, silent) {
}
}
// restore old state
state.validateInsideEm = oldFlag;
if (!found) {
// parser failed to find ending tag, so it's not valid emphasis
state.pos = start;
return false;
}
if (!silent) {
// found!
state.posMax = state.pos;
state.pos = start + 2;
// found!
state.posMax = state.pos;
state.pos = start + 2;
if (!silent) {
state.push({ type: 'del_open', level: state.level++ });
state.parser.tokenize(state);
state.push({ type: 'del_close', level: --state.level });

9
test/fixtures/remarkable/strikeout.txt

@ -25,8 +25,8 @@ Strikeouts have the same priority as emphases:
~~**test~~**
.
<p><strong>~~test</strong>~~</p>
<p><del>**test</del>**</p>
<p>**<del>test**</del></p>
<p>~~<strong>test~~</strong></p>
.
Strikeouts have the same priority as emphases with respect to links:
@ -52,10 +52,13 @@ Strikeouts have the same priority as emphases with respect to backticks:
Nested strikeouts:
.
~~foo ~~bar~~ baz~~
.
<p><del>foo <del>bar</del> baz</del></p>
.
.
~~f **o ~~o b~~ a** r~~
.
<p><del>foo <del>bar</del> baz</del></p>
<p><del>f <strong>o <del>o b</del> a</strong> r</del></p>
.

Loading…
Cancel
Save