diff --git a/lib/lexer_inline.js b/lib/lexer_inline.js index 1dc68cb..6345e7b 100644 --- a/lib/lexer_inline.js +++ b/lib/lexer_inline.js @@ -18,6 +18,7 @@ rules.push(require('./lexer_inline/escape')); rules.push(require('./lexer_inline/backticks')); // // +rules.push(require('./lexer_inline/emphasis')); rules.push(require('./lexer_inline/autolink')); rules.push(require('./lexer_inline/htmltag')); rules.push(require('./lexer_inline/entity')); @@ -119,7 +120,7 @@ LexerInline.prototype.tokenize = function (state) { var ok, i, rules = this.rules, len = this.rules.length, - end = state.src.length; + end = state.posMax; while (state.pos < end) { diff --git a/lib/lexer_inline/emphasis.js b/lib/lexer_inline/emphasis.js new file mode 100644 index 0000000..1d6d134 --- /dev/null +++ b/lib/lexer_inline/emphasis.js @@ -0,0 +1,204 @@ +// Process *this* and _that_ + +'use strict'; + + +function isAlphaNum(code) { + return (code >= 0x30 /* 0 */ && code <= 0x39 /* 9 */) || + (code >= 0x41 /* A */ && code <= 0x5A /* Z */) || + (code >= 0x61 /* a */ && code <= 0x7A /* z */); +} + +// returns the amount of markers (1, 2, 3), or -1 on failure; +// "start" should point at a valid marker +function parseStart(state, start) { + var pos = start, lastChar, count, + max = Math.min(state.posMax, pos + 4), + marker = state.src.charCodeAt(start); + + lastChar = state.pending.length !== 0 ? state.pending.charCodeAt(state.pending.length - 1) : -1; + + if (lastChar === marker) { return -1; } + + while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; } + if (pos >= max) { return -1; } + count = pos - start; + + // Quoting spec: + // + // Character can open emphasis iff + // 1. it is not part of a sequence of four or more unescaped markers, + // 2. it is not followed by whitespace, + // 3. it is "_" and it is not preceded by an ASCII alphanumeric character, and + // 4. either it is not followed by a marker or it is followed immediately by strong emphasis. + + if (count >= 4) { + // check condition 1 + // sequence of four or more unescaped markers can't start an emphasis + return -1; + } + + // check condition 2, marker followed by whitespace + if (state.src.charCodeAt(pos) === 0x20) { return -1; } + + if (marker === 0x5F /* _ */) { + // check condition 3, if it's the beginning of the word + // we need to look back for this + if (isAlphaNum(lastChar)) { return -1; } + } + + return count; +} + +// returns the amount of markers (1, 2, 3), or -1 on failure; +// "start" should point at a valid marker +function parseEnd(state, start) { + var pos = start, lastChar, count, + max = Math.min(state.posMax, pos + 4), + marker = state.src.charCodeAt(start); + + lastChar = state.pending.length !== 0 ? state.pending.charCodeAt(state.pending.length - 1) : -1; + + if (lastChar === marker) { return -1; } + + while (pos < max && state.src.charCodeAt(pos) === marker) { pos++; } + count = pos - start; + + // Quoting spec: + // + // Character can close emphasis iff + // 1. it is not part of a sequence of four or more unescaped markers, + // 2. it is not preceded by whitespace, + // 3. it is not "_" or it is not followed by an ASCII alphanumeric character + + if (count >= 4) { + // check condition 1 + // sequence of four or more unescaped markers can't start an emphasis + return -1; + } + + // check condition 2, marker preceded by whitespace + if (lastChar === 0x20) { return -1; } + + if (marker === 0x5F) { + // check condition 3, if it's the end of the word + if (pos < max && isAlphaNum(state.src.charCodeAt(pos))) { return -1; } + } + + return count; +} + +module.exports = function emphasis(state/*, silent*/) { + var startCount, + count, + oldLength, + oldPending, + found, + ok, + i, + oldCount, + newCount, + len, + rules, + stack, + breakOutOfOuterLoop, + max = state.posMax, + start = state.pos, + marker = state.src.charCodeAt(start); + + if (marker !== 0x5F/* _ */ && marker !== 0x2A /* * */) { return false; } + + startCount = parseStart(state, start); + if (startCount < 0) { return false; } + + oldLength = state.tokens.length; + oldPending = state.pending; + + state.pos = start + startCount; + stack = [ startCount ]; + rules = state.lexer.rules; + len = rules.length; + + while (state.pos < max) { + if (state.src.charCodeAt(state.pos) === marker) { + count = parseEnd(state, state.pos); + if (count >= 1) { + oldCount = stack.pop(); + newCount = count; + + while (oldCount !== newCount) { + if (oldCount === 3) { + // e.g. `***foo*` + stack.push(3 - newCount); + break; + } + + if (newCount < oldCount) { + // assert(oldCount == 2 && newCount == 1) + // i.e. `**foo* bar*` + // not valid for now, but might be in the future + + // eslint is misconfigured, so it doesn't accept "break MAIN;" + // here is a crappy workaround + breakOutOfOuterLoop = true; + break; + } + + // assert(newCount > oldCount) + newCount -= oldCount; + + if (stack.length === 0) { break; } + state.pos += oldCount; + oldCount = stack.pop(); + } + + if (breakOutOfOuterLoop) { break; } + + if (stack.length === 0) { + startCount = oldCount; + found = true; + break; + } + state.pos += count; + continue; + } + + count = parseStart(state, state.pos); + if (count >= 1) { + stack.push(count); + state.pos += count; + continue; + } + } + + for (i = 0; i < len; i++) { + if (rules[i] !== emphasis) { ok = rules[i](state); } + if (ok) { break; } + } + + if (!ok) { state.pending += state.src[state.pos++]; } + } + + // restore old state + state.tokens.length = oldLength; + state.pending = oldPending; + + if (!found) { + // parser failed to find ending tag, so it's not valid emphasis + state.pos = start; + return false; + } + + // found! + state.posMax = state.pos; + state.pos = start + startCount; + if (state.pending) { state.pushPending(); } + if (startCount === 2 || startCount === 3) { state.push({ type: 'strong_open' }); } + if (startCount === 1 || startCount === 3) { state.push({ type: 'em_open' }); } + state.lexer.tokenize(state); + if (startCount === 1 || startCount === 3) { state.push({ type: 'em_close' }); } + if (startCount === 2 || startCount === 3) { state.push({ type: 'strong_close' }); } + state.pos = state.posMax + startCount; + state.posMax = max; + return true; +}; diff --git a/lib/renderer.js b/lib/renderer.js index 3fe83b6..cf6c85b 100644 --- a/lib/renderer.js +++ b/lib/renderer.js @@ -139,6 +139,20 @@ rules.td_close = function (/*tokens, idx, options*/) { }; +rules.strong_open = function(/*tokens, idx, options*/) { + return ''; +}; +rules.strong_close = function(/*tokens, idx, options*/) { + return ''; +}; +rules.em_open = function(/*tokens, idx, options*/) { + return ''; +}; +rules.em_close = function(/*tokens, idx, options*/) { + return ''; +}; + + rules.hardbreak = function (tokens, idx, options) { return (options.xhtml ? '
' : '
') + '\n'; };