Browse Source

Implement emphasis parsing

pull/14/head
Alex Kocharin 10 years ago
parent
commit
74a84f8ecd
  1. 3
      lib/lexer_inline.js
  2. 204
      lib/lexer_inline/emphasis.js
  3. 14
      lib/renderer.js

3
lib/lexer_inline.js

@ -18,6 +18,7 @@ rules.push(require('./lexer_inline/escape'));
rules.push(require('./lexer_inline/backticks')); rules.push(require('./lexer_inline/backticks'));
// //
// //
rules.push(require('./lexer_inline/emphasis'));
rules.push(require('./lexer_inline/autolink')); rules.push(require('./lexer_inline/autolink'));
rules.push(require('./lexer_inline/htmltag')); rules.push(require('./lexer_inline/htmltag'));
rules.push(require('./lexer_inline/entity')); rules.push(require('./lexer_inline/entity'));
@ -119,7 +120,7 @@ LexerInline.prototype.tokenize = function (state) {
var ok, i, var ok, i,
rules = this.rules, rules = this.rules,
len = this.rules.length, len = this.rules.length,
end = state.src.length; end = state.posMax;
while (state.pos < end) { while (state.pos < end) {

204
lib/lexer_inline/emphasis.js

@ -0,0 +1,204 @@
// Process *this* and _that_
'use strict';
function isAlphaNum(code) {
return (code >= 0x30 /* 0 */ && code <= 0x39 /* 9 */) ||
(code >= 0x41 /* A */ && code <= 0x5A /* Z */) ||
(code >= 0x61 /* a */ && code <= 0x7A /* z */);
}
// returns the amount of markers (1, 2, 3), or -1 on failure;
// "start" should point at a valid marker
function parseStart(state, start) {
var pos = start, lastChar, count,
max = Math.min(state.posMax, pos + 4),
marker = state.src.charCodeAt(start);
lastChar = state.pending.length !== 0 ? state.pending.charCodeAt(state.pending.length - 1) : -1;
if (lastChar === marker) { return -1; }
while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; }
if (pos >= max) { return -1; }
count = pos - start;
// Quoting spec:
//
// Character can open emphasis iff
// 1. it is not part of a sequence of four or more unescaped markers,
// 2. it is not followed by whitespace,
// 3. it is "_" and it is not preceded by an ASCII alphanumeric character, and
// 4. either it is not followed by a marker or it is followed immediately by strong emphasis.
if (count >= 4) {
// check condition 1
// sequence of four or more unescaped markers can't start an emphasis
return -1;
}
// check condition 2, marker followed by whitespace
if (state.src.charCodeAt(pos) === 0x20) { return -1; }
if (marker === 0x5F /* _ */) {
// check condition 3, if it's the beginning of the word
// we need to look back for this
if (isAlphaNum(lastChar)) { return -1; }
}
return count;
}
// returns the amount of markers (1, 2, 3), or -1 on failure;
// "start" should point at a valid marker
function parseEnd(state, start) {
var pos = start, lastChar, count,
max = Math.min(state.posMax, pos + 4),
marker = state.src.charCodeAt(start);
lastChar = state.pending.length !== 0 ? state.pending.charCodeAt(state.pending.length - 1) : -1;
if (lastChar === marker) { return -1; }
while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; }
count = pos - start;
// Quoting spec:
//
// Character can close emphasis iff
// 1. it is not part of a sequence of four or more unescaped markers,
// 2. it is not preceded by whitespace,
// 3. it is not "_" or it is not followed by an ASCII alphanumeric character
if (count >= 4) {
// check condition 1
// sequence of four or more unescaped markers can't start an emphasis
return -1;
}
// check condition 2, marker preceded by whitespace
if (lastChar === 0x20) { return -1; }
if (marker === 0x5F) {
// check condition 3, if it's the end of the word
if (pos < max && isAlphaNum(state.src.charCodeAt(pos))) { return -1; }
}
return count;
}
module.exports = function emphasis(state/*, silent*/) {
var startCount,
count,
oldLength,
oldPending,
found,
ok,
i,
oldCount,
newCount,
len,
rules,
stack,
breakOutOfOuterLoop,
max = state.posMax,
start = state.pos,
marker = state.src.charCodeAt(start);
if (marker !== 0x5F/* _ */ && marker !== 0x2A /* * */) { return false; }
startCount = parseStart(state, start);
if (startCount < 0) { return false; }
oldLength = state.tokens.length;
oldPending = state.pending;
state.pos = start + startCount;
stack = [ startCount ];
rules = state.lexer.rules;
len = rules.length;
while (state.pos < max) {
if (state.src.charCodeAt(state.pos) === marker) {
count = parseEnd(state, state.pos);
if (count >= 1) {
oldCount = stack.pop();
newCount = count;
while (oldCount !== newCount) {
if (oldCount === 3) {
// e.g. `***foo*`
stack.push(3 - newCount);
break;
}
if (newCount < oldCount) {
// assert(oldCount == 2 && newCount == 1)
// i.e. `**foo* bar*`
// not valid for now, but might be in the future
// eslint is misconfigured, so it doesn't accept "break MAIN;"
// here is a crappy workaround
breakOutOfOuterLoop = true;
break;
}
// assert(newCount > oldCount)
newCount -= oldCount;
if (stack.length === 0) { break; }
state.pos += oldCount;
oldCount = stack.pop();
}
if (breakOutOfOuterLoop) { break; }
if (stack.length === 0) {
startCount = oldCount;
found = true;
break;
}
state.pos += count;
continue;
}
count = parseStart(state, state.pos);
if (count >= 1) {
stack.push(count);
state.pos += count;
continue;
}
}
for (i = 0; i < len; i++) {
if (rules[i] !== emphasis) { ok = rules[i](state); }
if (ok) { break; }
}
if (!ok) { state.pending += state.src[state.pos++]; }
}
// restore old state
state.tokens.length = oldLength;
state.pending = oldPending;
if (!found) {
// parser failed to find ending tag, so it's not valid emphasis
state.pos = start;
return false;
}
// found!
state.posMax = state.pos;
state.pos = start + startCount;
if (state.pending) { state.pushPending(); }
if (startCount === 2 || startCount === 3) { state.push({ type: 'strong_open' }); }
if (startCount === 1 || startCount === 3) { state.push({ type: 'em_open' }); }
state.lexer.tokenize(state);
if (startCount === 1 || startCount === 3) { state.push({ type: 'em_close' }); }
if (startCount === 2 || startCount === 3) { state.push({ type: 'strong_close' }); }
state.pos = state.posMax + startCount;
state.posMax = max;
return true;
};

14
lib/renderer.js

@ -139,6 +139,20 @@ rules.td_close = function (/*tokens, idx, options*/) {
}; };
rules.strong_open = function(/*tokens, idx, options*/) {
return '<strong>';
};
rules.strong_close = function(/*tokens, idx, options*/) {
return '</strong>';
};
rules.em_open = function(/*tokens, idx, options*/) {
return '<em>';
};
rules.em_close = function(/*tokens, idx, options*/) {
return '</em>';
};
rules.hardbreak = function (tokens, idx, options) { rules.hardbreak = function (tokens, idx, options) {
return (options.xhtml ? '<br />' : '<br>') + '\n'; return (options.xhtml ? '<br />' : '<br>') + '\n';
}; };

Loading…
Cancel
Save