From b2aee1a9789c74e5eba16ac71d6a7b256fc61067 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Wed, 22 Jul 2015 16:09:25 +0300 Subject: [PATCH] Expand tabs only when it's needed, as per CommonMark 0.21 --- lib/common/utils.js | 10 +++ lib/parser_block.js | 2 +- lib/rules_block/blockquote.js | 65 ++++++++++++--- lib/rules_block/code.js | 4 +- lib/rules_block/fence.js | 6 +- lib/rules_block/heading.js | 6 +- lib/rules_block/hr.js | 6 +- lib/rules_block/html_block.js | 2 +- lib/rules_block/lheading.js | 4 +- lib/rules_block/list.js | 62 +++++++++++--- lib/rules_block/paragraph.js | 4 +- lib/rules_block/reference.js | 21 +++-- lib/rules_block/state_block.js | 70 +++++++++++----- lib/rules_block/table.js | 4 +- lib/rules_core/normalize.js | 21 +---- lib/rules_inline/escape.js | 8 +- lib/rules_inline/image.js | 9 +- lib/rules_inline/link.js | 9 +- test/fixtures/commonmark/bad.txt | 83 ------------------- test/fixtures/commonmark/good.txt | 52 ++++++++++++ .../markdown-it/commonmark_extras.txt | 27 ++++++ 21 files changed, 298 insertions(+), 177 deletions(-) diff --git a/lib/common/utils.js b/lib/common/utils.js index 34ac689..9a8bcee 100644 --- a/lib/common/utils.js +++ b/lib/common/utils.js @@ -151,6 +151,15 @@ function escapeRE (str) { //////////////////////////////////////////////////////////////////////////////// +function isSpace(code) { + switch (code) { + case 0x09: + case 0x20: + return true; + } + return false; +} + // Zs (unicode class) || [\t\f\v\r\n] function isWhiteSpace(code) { if (code >= 0x2000 && code <= 0x200A) { return true; } @@ -258,6 +267,7 @@ exports.fromCodePoint = fromCodePoint; // exports.replaceEntities = replaceEntities; exports.escapeHtml = escapeHtml; exports.arrayReplaceAt = arrayReplaceAt; +exports.isSpace = isSpace; exports.isWhiteSpace = isWhiteSpace; exports.isMdAsciiPunct = isMdAsciiPunct; exports.isPunctChar = isPunctChar; diff --git a/lib/parser_block.js b/lib/parser_block.js index 921c851..84f1e2b 100644 --- a/lib/parser_block.js +++ b/lib/parser_block.js @@ -59,7 +59,7 @@ ParserBlock.prototype.tokenize = function (state, startLine, endLine) { // Termination condition for nested calls. // Nested calls currently used for blockquotes & lists - if (state.tShift[line] < state.blkIndent) { break; } + if (state.sCount[line] < state.blkIndent) { break; } // If nesting level exceeded - skip tail to the end. That's not ordinary // situation and we should not care about content. diff --git a/lib/rules_block/blockquote.js b/lib/rules_block/blockquote.js index 0e5a43b..30c3702 100644 --- a/lib/rules_block/blockquote.js +++ b/lib/rules_block/blockquote.js @@ -2,9 +2,11 @@ 'use strict'; +var isSpace = require('../common/utils').isSpace; + module.exports = function blockquote(state, startLine, endLine, silent) { - var nextLine, lastLineEmpty, oldTShift, oldBMarks, oldIndent, oldParentType, lines, + var nextLine, lastLineEmpty, oldTShift, oldSCount, oldBMarks, oldIndent, oldParentType, lines, initial, offset, ch, terminatorRules, token, i, l, terminate, pos = state.bMarks[startLine] + state.tShift[startLine], @@ -17,19 +19,39 @@ module.exports = function blockquote(state, startLine, endLine, silent) { // so no point trying to find the end of it in silent mode if (silent) { return true; } - // skip one optional space after '>' + // skip one optional space (but not tab, check cmark impl) after '>' if (state.src.charCodeAt(pos) === 0x20) { pos++; } oldIndent = state.blkIndent; state.blkIndent = 0; + // skip spaces after ">" and re-calculate offset + initial = offset = state.sCount[startLine] + pos - (state.bMarks[startLine] + state.tShift[startLine]); + oldBMarks = [ state.bMarks[startLine] ]; state.bMarks[startLine] = pos; - // check if we have an empty blockquote - pos = pos < max ? state.skipSpaces(pos) : pos; + while (pos < max) { + ch = state.src.charCodeAt(pos); + + if (isSpace(ch)) { + if (ch === 0x09) { + offset += 4 - offset % 4; + } else { + offset++; + } + } else { + break; + } + + pos++; + } + lastLineEmpty = pos >= max; + oldSCount = [ state.sCount[startLine] ]; + state.sCount[startLine] = offset - initial; + oldTShift = [ state.tShift[startLine] ]; state.tShift[startLine] = pos - state.bMarks[startLine]; @@ -54,7 +76,7 @@ module.exports = function blockquote(state, startLine, endLine, silent) { // - - - // ``` for (nextLine = startLine + 1; nextLine < endLine; nextLine++) { - if (state.tShift[nextLine] < oldIndent) { break; } + if (state.sCount[nextLine] < oldIndent) { break; } pos = state.bMarks[nextLine] + state.tShift[nextLine]; max = state.eMarks[nextLine]; @@ -67,15 +89,36 @@ module.exports = function blockquote(state, startLine, endLine, silent) { if (state.src.charCodeAt(pos++) === 0x3E/* > */) { // This line is inside the blockquote. - // skip one optional space after '>' + // skip one optional space (but not tab, check cmark impl) after '>' if (state.src.charCodeAt(pos) === 0x20) { pos++; } + // skip spaces after ">" and re-calculate offset + initial = offset = state.sCount[nextLine] + pos - (state.bMarks[nextLine] + state.tShift[nextLine]); + oldBMarks.push(state.bMarks[nextLine]); state.bMarks[nextLine] = pos; - pos = pos < max ? state.skipSpaces(pos) : pos; + while (pos < max) { + ch = state.src.charCodeAt(pos); + + if (isSpace(ch)) { + if (ch === 0x09) { + offset += 4 - offset % 4; + } else { + offset++; + } + } else { + break; + } + + pos++; + } + lastLineEmpty = pos >= max; + oldSCount.push(state.sCount[nextLine]); + state.sCount[nextLine] = offset - initial; + oldTShift.push(state.tShift[nextLine]); state.tShift[nextLine] = pos - state.bMarks[nextLine]; continue; @@ -96,12 +139,11 @@ module.exports = function blockquote(state, startLine, endLine, silent) { oldBMarks.push(state.bMarks[nextLine]); oldTShift.push(state.tShift[nextLine]); + oldSCount.push(state.sCount[nextLine]); - // A negative number means that this is a paragraph continuation; + // A negative indentation means that this is a paragraph continuation // - // Any negative number will do the job here, but it's better for it - // to be large enough to make any bugs obvious. - state.tShift[nextLine] = -1; + state.sCount[nextLine] = -1; } oldParentType = state.parentType; @@ -124,6 +166,7 @@ module.exports = function blockquote(state, startLine, endLine, silent) { for (i = 0; i < oldTShift.length; i++) { state.bMarks[i + startLine] = oldBMarks[i]; state.tShift[i + startLine] = oldTShift[i]; + state.sCount[i + startLine] = oldSCount[i]; } state.blkIndent = oldIndent; diff --git a/lib/rules_block/code.js b/lib/rules_block/code.js index dcf3336..74bc42a 100644 --- a/lib/rules_block/code.js +++ b/lib/rules_block/code.js @@ -6,7 +6,7 @@ module.exports = function code(state, startLine, endLine/*, silent*/) { var nextLine, last, token; - if (state.tShift[startLine] - state.blkIndent < 4) { return false; } + if (state.sCount[startLine] - state.blkIndent < 4) { return false; } last = nextLine = startLine + 1; @@ -15,7 +15,7 @@ module.exports = function code(state, startLine, endLine/*, silent*/) { nextLine++; continue; } - if (state.tShift[nextLine] - state.blkIndent >= 4) { + if (state.sCount[nextLine] - state.blkIndent >= 4) { nextLine++; last = nextLine; continue; diff --git a/lib/rules_block/fence.js b/lib/rules_block/fence.js index cf72e78..a02bd5f 100644 --- a/lib/rules_block/fence.js +++ b/lib/rules_block/fence.js @@ -47,7 +47,7 @@ module.exports = function fence(state, startLine, endLine, silent) { pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]; max = state.eMarks[nextLine]; - if (pos < max && state.tShift[nextLine] < state.blkIndent) { + if (pos < max && state.sCount[nextLine] < state.blkIndent) { // non-empty line with negative indent should stop the list: // - ``` // test @@ -56,7 +56,7 @@ module.exports = function fence(state, startLine, endLine, silent) { if (state.src.charCodeAt(pos) !== marker) { continue; } - if (state.tShift[nextLine] - state.blkIndent >= 4) { + if (state.sCount[nextLine] - state.blkIndent >= 4) { // closing fence should be indented less than 4 spaces continue; } @@ -77,7 +77,7 @@ module.exports = function fence(state, startLine, endLine, silent) { } // If a fence has heading spaces, they should be removed from its inner block - len = state.tShift[startLine]; + len = state.sCount[startLine]; state.line = nextLine + (haveEndMarker ? 1 : 0); diff --git a/lib/rules_block/heading.js b/lib/rules_block/heading.js index 1cf4723..9b8eee4 100644 --- a/lib/rules_block/heading.js +++ b/lib/rules_block/heading.js @@ -2,6 +2,8 @@ 'use strict'; +var isSpace = require('../common/utils').isSpace; + module.exports = function heading(state, startLine, endLine, silent) { var ch, level, tmp, token, @@ -26,9 +28,9 @@ module.exports = function heading(state, startLine, endLine, silent) { // Let's cut tails like ' ### ' from the end of string - max = state.skipCharsBack(max, 0x20, pos); // space + max = state.skipSpacesBack(max, pos); tmp = state.skipCharsBack(max, 0x23, pos); // # - if (tmp > pos && state.src.charCodeAt(tmp - 1) === 0x20/* space */) { + if (tmp > pos && isSpace(state.src.charCodeAt(tmp - 1))) { max = tmp; } diff --git a/lib/rules_block/hr.js b/lib/rules_block/hr.js index 0abaf66..8638f04 100644 --- a/lib/rules_block/hr.js +++ b/lib/rules_block/hr.js @@ -2,6 +2,8 @@ 'use strict'; +var isSpace = require('../common/utils').isSpace; + module.exports = function hr(state, startLine, endLine, silent) { var marker, cnt, ch, token, @@ -17,12 +19,12 @@ module.exports = function hr(state, startLine, endLine, silent) { return false; } - // markers can be mixed with spaces, but there should be at least 3 one + // markers can be mixed with spaces, but there should be at least 3 of them cnt = 1; while (pos < max) { ch = state.src.charCodeAt(pos++); - if (ch !== marker && ch !== 0x20/* space */) { return false; } + if (ch !== marker && !isSpace(ch)) { return false; } if (ch === marker) { cnt++; } } diff --git a/lib/rules_block/html_block.js b/lib/rules_block/html_block.js index 0d9426b..abef36d 100644 --- a/lib/rules_block/html_block.js +++ b/lib/rules_block/html_block.js @@ -48,7 +48,7 @@ module.exports = function html_block(state, startLine, endLine, silent) { // Let's roll down till block end. if (!HTML_SEQUENCES[i][1].test(lineText)) { for (; nextLine < endLine; nextLine++) { - if (state.tShift[nextLine] < state.blkIndent) { break; } + if (state.sCount[nextLine] < state.blkIndent) { break; } pos = state.bMarks[nextLine] + state.tShift[nextLine]; max = state.eMarks[nextLine]; diff --git a/lib/rules_block/lheading.js b/lib/rules_block/lheading.js index 8668fd0..76343bf 100644 --- a/lib/rules_block/lheading.js +++ b/lib/rules_block/lheading.js @@ -8,11 +8,11 @@ module.exports = function lheading(state, startLine, endLine/*, silent*/) { next = startLine + 1; if (next >= endLine) { return false; } - if (state.tShift[next] < state.blkIndent) { return false; } + if (state.sCount[next] < state.blkIndent) { return false; } // Scan next line - if (state.tShift[next] - state.blkIndent > 3) { return false; } + if (state.sCount[next] - state.blkIndent > 3) { return false; } pos = state.bMarks[next] + state.tShift[next]; max = state.eMarks[next]; diff --git a/lib/rules_block/list.js b/lib/rules_block/list.js index 49b9a5b..bb28e6b 100644 --- a/lib/rules_block/list.js +++ b/lib/rules_block/list.js @@ -2,11 +2,13 @@ 'use strict'; +var isSpace = require('../common/utils').isSpace; + // Search `[-+*][\n ]`, returns next pos arter marker on success // or -1 on fail. function skipBulletListMarker(state, startLine) { - var marker, pos, max; + var marker, pos, max, ch; pos = state.bMarks[startLine] + state.tShift[startLine]; max = state.eMarks[startLine]; @@ -19,9 +21,13 @@ function skipBulletListMarker(state, startLine) { return -1; } - if (pos < max && state.src.charCodeAt(pos) !== 0x20) { - // " 1.test " - is not a list item - return -1; + if (pos < max) { + ch = state.src.charCodeAt(pos); + + if (!isSpace(ch)) { + // " -test " - is not a list item + return -1; + } } return pos; @@ -66,9 +72,13 @@ function skipOrderedListMarker(state, startLine) { } - if (pos < max && state.src.charCodeAt(pos) !== 0x20/* space */) { - // " 1.test " - is not a list item - return -1; + if (pos < max) { + ch = state.src.charCodeAt(pos); + + if (!isSpace(ch)) { + // " 1.test " - is not a list item + return -1; + } } return pos; } @@ -89,13 +99,18 @@ function markTightParagraphs(state, idx) { module.exports = function list(state, startLine, endLine, silent) { var nextLine, + initial, + offset, indent, oldTShift, oldIndent, + oldLIndent, oldTight, oldParentType, start, posAfterMarker, + ch, + pos, max, indentAfterMarker, markerValue, @@ -154,14 +169,34 @@ module.exports = function list(state, startLine, endLine, silent) { terminatorRules = state.md.block.ruler.getRules('list'); while (nextLine < endLine) { - contentStart = state.skipSpaces(posAfterMarker); + pos = posAfterMarker; max = state.eMarks[nextLine]; + initial = offset = state.sCount[nextLine] + posAfterMarker - (state.bMarks[startLine] + state.tShift[startLine]); + + while (pos < max) { + ch = state.src.charCodeAt(pos); + + if (isSpace(ch)) { + if (ch === 0x09) { + offset += 4 - offset % 4; + } else { + offset++; + } + } else { + break; + } + + pos++; + } + + contentStart = pos; + if (contentStart >= max) { // trimming space in "- \n 3" case, indent is 1 here indentAfterMarker = 1; } else { - indentAfterMarker = contentStart - posAfterMarker; + indentAfterMarker = offset - initial; } // If we have more than 4 spaces, the indent is 1 @@ -170,7 +205,7 @@ module.exports = function list(state, startLine, endLine, silent) { // " - test" // ^^^^^ - calculating total length of this thing - indent = (posAfterMarker - state.bMarks[nextLine]) + indentAfterMarker; + indent = initial + indentAfterMarker; // Run subparser & write tokens token = state.push('list_item_open', 'li', 1); @@ -180,11 +215,13 @@ module.exports = function list(state, startLine, endLine, silent) { oldIndent = state.blkIndent; oldTight = state.tight; oldTShift = state.tShift[startLine]; + oldLIndent = state.sCount[startLine]; oldParentType = state.parentType; - state.tShift[startLine] = contentStart - state.bMarks[startLine]; state.blkIndent = indent; state.tight = true; state.parentType = 'list'; + state.tShift[startLine] = contentStart - state.bMarks[startLine]; + state.sCount[startLine] = offset; state.md.block.tokenize(state, startLine, endLine, true); @@ -198,6 +235,7 @@ module.exports = function list(state, startLine, endLine, silent) { state.blkIndent = oldIndent; state.tShift[startLine] = oldTShift; + state.sCount[startLine] = oldLIndent; state.tight = oldTight; state.parentType = oldParentType; @@ -217,7 +255,7 @@ module.exports = function list(state, startLine, endLine, silent) { // // Try to check if list is terminated or continued. // - if (state.tShift[nextLine] < state.blkIndent) { break; } + if (state.sCount[nextLine] < state.blkIndent) { break; } // fail if terminating block found terminate = false; diff --git a/lib/rules_block/paragraph.js b/lib/rules_block/paragraph.js index d9dca25..18a860d 100644 --- a/lib/rules_block/paragraph.js +++ b/lib/rules_block/paragraph.js @@ -13,10 +13,10 @@ module.exports = function paragraph(state, startLine/*, endLine*/) { for (; nextLine < endLine && !state.isEmpty(nextLine); nextLine++) { // this would be a code block normally, but after paragraph // it's considered a lazy continuation regardless of what's there - if (state.tShift[nextLine] - state.blkIndent > 3) { continue; } + if (state.sCount[nextLine] - state.blkIndent > 3) { continue; } // quirk for blockquotes, this line should already be checked by that rule - if (state.tShift[nextLine] < 0) { continue; } + if (state.sCount[nextLine] < 0) { continue; } // Some tags can terminate paragraph without empty line. terminate = false; diff --git a/lib/rules_block/reference.js b/lib/rules_block/reference.js index 260726b..23f662b 100644 --- a/lib/rules_block/reference.js +++ b/lib/rules_block/reference.js @@ -4,6 +4,7 @@ var parseLinkDestination = require('../helpers/parse_link_destination'); var parseLinkTitle = require('../helpers/parse_link_title'); var normalizeReference = require('../common/utils').normalizeReference; +var isSpace = require('../common/utils').isSpace; module.exports = function reference(state, startLine, _endLine, silent) { @@ -48,10 +49,10 @@ module.exports = function reference(state, startLine, _endLine, silent) { for (; nextLine < endLine && !state.isEmpty(nextLine); nextLine++) { // this would be a code block normally, but after paragraph // it's considered a lazy continuation regardless of what's there - if (state.tShift[nextLine] - state.blkIndent > 3) { continue; } + if (state.sCount[nextLine] - state.blkIndent > 3) { continue; } // quirk for blockquotes, this line should already be checked by that rule - if (state.tShift[nextLine] < 0) { continue; } + if (state.sCount[nextLine] < 0) { continue; } // Some tags can terminate paragraph without empty line. terminate = false; @@ -92,7 +93,7 @@ module.exports = function reference(state, startLine, _endLine, silent) { ch = str.charCodeAt(pos); if (ch === 0x0A) { lines++; - } else if (ch === 0x20) { + } else if (isSpace(ch)) { /*eslint no-empty:0*/ } else { break; @@ -121,7 +122,7 @@ module.exports = function reference(state, startLine, _endLine, silent) { ch = str.charCodeAt(pos); if (ch === 0x0A) { lines++; - } else if (ch === 0x20) { + } else if (isSpace(ch)) { /*eslint no-empty:0*/ } else { break; @@ -142,7 +143,11 @@ module.exports = function reference(state, startLine, _endLine, silent) { } // skip trailing spaces until the rest of the line - while (pos < max && str.charCodeAt(pos) === 0x20/* space */) { pos++; } + while (pos < max) { + ch = str.charCodeAt(pos); + if (!isSpace(ch)) { break; } + pos++; + } if (pos < max && str.charCodeAt(pos) !== 0x0A) { if (title) { @@ -151,7 +156,11 @@ module.exports = function reference(state, startLine, _endLine, silent) { title = ''; pos = destEndPos; lines = destEndLineNo; - while (pos < max && str.charCodeAt(pos) === 0x20/* space */) { pos++; } + while (pos < max) { + ch = str.charCodeAt(pos); + if (!isSpace(ch)) { break; } + pos++; + } } } diff --git a/lib/rules_block/state_block.js b/lib/rules_block/state_block.js index 7f608b3..f08bdb2 100644 --- a/lib/rules_block/state_block.js +++ b/lib/rules_block/state_block.js @@ -3,10 +3,11 @@ 'use strict'; var Token = require('../token'); +var isSpace = require('../common/utils').isSpace; function StateBlock(src, md, env, tokens) { - var ch, s, start, pos, len, indent, indent_found; + var ch, s, start, pos, len, indent, offset, indent_found; this.src = src; @@ -23,7 +24,8 @@ function StateBlock(src, md, env, tokens) { this.bMarks = []; // line begin offsets for fast jumps this.eMarks = []; // line end offsets for fast jumps - this.tShift = []; // indent for each line + this.tShift = []; // offsets of the first non-space characters (tabs not expanded) + this.sCount = []; // indents for each line (tabs expanded) // block parser variables this.blkIndent = 0; // required block content indent @@ -42,15 +44,20 @@ function StateBlock(src, md, env, tokens) { // Create caches // Generate markers. s = this.src; - indent = 0; indent_found = false; - for (start = pos = indent = 0, len = s.length; pos < len; pos++) { + for (start = pos = indent = offset = 0, len = s.length; pos < len; pos++) { ch = s.charCodeAt(pos); if (!indent_found) { - if (ch === 0x20/* space */) { + if (isSpace(ch)) { indent++; + + if (ch === 0x09) { + offset += 4 - offset % 4; + } else { + offset++; + } continue; } else { indent_found = true; @@ -62,9 +69,11 @@ function StateBlock(src, md, env, tokens) { this.bMarks.push(start); this.eMarks.push(pos); this.tShift.push(indent); + this.sCount.push(offset); indent_found = false; indent = 0; + offset = 0; start = pos + 1; } } @@ -73,6 +82,7 @@ function StateBlock(src, md, env, tokens) { this.bMarks.push(s.length); this.eMarks.push(s.length); this.tShift.push(0); + this.sCount.push(0); this.lineMax = this.bMarks.length - 1; // don't count last fake line } @@ -106,8 +116,21 @@ StateBlock.prototype.skipEmptyLines = function skipEmptyLines(from) { // Skip spaces from given position. StateBlock.prototype.skipSpaces = function skipSpaces(pos) { + var ch; + for (var max = this.src.length; pos < max; pos++) { - if (this.src.charCodeAt(pos) !== 0x20/* space */) { break; } + ch = this.src.charCodeAt(pos); + if (!isSpace(ch)) { break; } + } + return pos; +}; + +// Skip spaces from given position in reverse. +StateBlock.prototype.skipSpacesBack = function skipSpacesBack(pos, min) { + if (pos <= min) { return pos; } + + while (pos > min) { + if (!isSpace(this.src.charCodeAt(--pos))) { return pos + 1; } } return pos; }; @@ -132,28 +155,18 @@ StateBlock.prototype.skipCharsBack = function skipCharsBack(pos, code, min) { // cut lines range from source. StateBlock.prototype.getLines = function getLines(begin, end, indent, keepLastLF) { - var i, first, last, queue, shift, + var i, lineIndent, ch, first, last, queue, lineStart, line = begin; if (begin >= end) { return ''; } - // Opt: don't use push queue for single line; - if (line + 1 === end) { - first = this.bMarks[line] + Math.min(this.tShift[line], indent); - last = this.eMarks[end - 1] + (keepLastLF ? 1 : 0); - return this.src.slice(first, last); - } - queue = new Array(end - begin); for (i = 0; line < end; line++, i++) { - shift = this.tShift[line]; - if (shift > indent) { shift = indent; } - if (shift < 0) { shift = 0; } - - first = this.bMarks[line] + shift; + lineIndent = 0; + lineStart = first = this.bMarks[line]; if (line + 1 < end || keepLastLF) { // No need for bounds check because we have fake entry on tail. @@ -162,6 +175,25 @@ StateBlock.prototype.getLines = function getLines(begin, end, indent, keepLastLF last = this.eMarks[line]; } + while (first < last && lineIndent < indent) { + ch = this.src.charCodeAt(first); + + if (isSpace(ch)) { + if (ch === 0x09) { + lineIndent += 4 - lineIndent % 4; + } else { + lineIndent++; + } + } else if (first - lineStart < this.tShift[line]) { + // patched tShift masked characters to look like spaces (blockquotes, list markers) + lineIndent++; + } else { + break; + } + + first++; + } + queue[i] = this.src.slice(first, last); } diff --git a/lib/rules_block/table.js b/lib/rules_block/table.js index 1e1c0e9..c45badb 100644 --- a/lib/rules_block/table.js +++ b/lib/rules_block/table.js @@ -62,7 +62,7 @@ module.exports = function table(state, startLine, endLine, silent) { nextLine = startLine + 1; - if (state.tShift[nextLine] < state.blkIndent) { return false; } + if (state.sCount[nextLine] < state.blkIndent) { return false; } // first character of the second line should be '|' or '-' @@ -137,7 +137,7 @@ module.exports = function table(state, startLine, endLine, silent) { token.map = tbodyLines = [ startLine + 2, 0 ]; for (nextLine = startLine + 2; nextLine < endLine; nextLine++) { - if (state.tShift[nextLine] < state.blkIndent) { break; } + if (state.sCount[nextLine] < state.blkIndent) { break; } lineText = getLine(state, nextLine).trim(); if (lineText.indexOf('|') === -1) { break; } diff --git a/lib/rules_core/normalize.js b/lib/rules_core/normalize.js index 4152095..bff5d51 100644 --- a/lib/rules_core/normalize.js +++ b/lib/rules_core/normalize.js @@ -3,13 +3,12 @@ 'use strict'; -var TABS_SCAN_RE = /[\n\t]/g; var NEWLINES_RE = /\r[\n\u0085]|[\u2424\u2028\u0085]/g; var NULL_RE = /\u0000/g; module.exports = function inline(state) { - var str, lineStart, lastTabPos; + var str; // Normalize newlines str = state.src.replace(NEWLINES_RE, '\n'); @@ -17,23 +16,5 @@ module.exports = function inline(state) { // Replace NULL characters str = str.replace(NULL_RE, '\uFFFD'); - // Replace tabs with proper number of spaces (1..4) - if (str.indexOf('\t') >= 0) { - lineStart = 0; - lastTabPos = 0; - - str = str.replace(TABS_SCAN_RE, function (match, offset) { - var result; - if (str.charCodeAt(offset) === 0x0A) { - lineStart = offset + 1; - lastTabPos = 0; - return match; - } - result = ' '.slice((offset - lineStart - lastTabPos) % 4); - lastTabPos = offset - lineStart + 1; - return result; - }); - } - state.src = str; }; diff --git a/lib/rules_inline/escape.js b/lib/rules_inline/escape.js index ff32832..e0996e0 100644 --- a/lib/rules_inline/escape.js +++ b/lib/rules_inline/escape.js @@ -2,6 +2,8 @@ 'use strict'; +var isSpace = require('../common/utils').isSpace; + var ESCAPED = []; for (var i = 0; i < 256; i++) { ESCAPED.push(0); } @@ -33,7 +35,11 @@ module.exports = function escape(state, silent) { pos++; // skip leading whitespaces from next line - while (pos < max && state.src.charCodeAt(pos) === 0x20) { pos++; } + while (pos < max) { + ch = state.src.charCodeAt(pos); + if (!isSpace(ch)) { break; } + pos++; + } state.pos = pos; return true; diff --git a/lib/rules_inline/image.js b/lib/rules_inline/image.js index a5e1630..32355e0 100644 --- a/lib/rules_inline/image.js +++ b/lib/rules_inline/image.js @@ -6,6 +6,7 @@ var parseLinkLabel = require('../helpers/parse_link_label'); var parseLinkDestination = require('../helpers/parse_link_destination'); var parseLinkTitle = require('../helpers/parse_link_title'); var normalizeReference = require('../common/utils').normalizeReference; +var isSpace = require('../common/utils').isSpace; module.exports = function image(state, silent) { @@ -45,7 +46,7 @@ module.exports = function image(state, silent) { pos++; for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } if (pos >= max) { return false; } @@ -67,7 +68,7 @@ module.exports = function image(state, silent) { start = pos; for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } // [link]( "title" ) @@ -81,7 +82,7 @@ module.exports = function image(state, silent) { // ^^ skipping these spaces for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } } else { title = ''; @@ -102,7 +103,7 @@ module.exports = function image(state, silent) { // ^^ optional whitespace (can include newlines) for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } if (pos < max && state.src.charCodeAt(pos) === 0x5B/* [ */) { diff --git a/lib/rules_inline/link.js b/lib/rules_inline/link.js index 8e9e54e..b4a8808 100644 --- a/lib/rules_inline/link.js +++ b/lib/rules_inline/link.js @@ -6,6 +6,7 @@ var parseLinkLabel = require('../helpers/parse_link_label'); var parseLinkDestination = require('../helpers/parse_link_destination'); var parseLinkTitle = require('../helpers/parse_link_title'); var normalizeReference = require('../common/utils').normalizeReference; +var isSpace = require('../common/utils').isSpace; module.exports = function link(state, silent) { @@ -43,7 +44,7 @@ module.exports = function link(state, silent) { pos++; for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } if (pos >= max) { return false; } @@ -65,7 +66,7 @@ module.exports = function link(state, silent) { start = pos; for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } // [link]( "title" ) @@ -79,7 +80,7 @@ module.exports = function link(state, silent) { // ^^ skipping these spaces for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } } else { title = ''; @@ -100,7 +101,7 @@ module.exports = function link(state, silent) { // ^^ optional whitespace (can include newlines) for (; pos < max; pos++) { code = state.src.charCodeAt(pos); - if (code !== 0x20 && code !== 0x0A) { break; } + if (!isSpace(code) && code !== 0x0A) { break; } } if (pos < max && state.src.charCodeAt(pos) === 0x5B/* [ */) { diff --git a/test/fixtures/commonmark/bad.txt b/test/fixtures/commonmark/bad.txt index b5c78a7..9a799f9 100644 --- a/test/fixtures/commonmark/bad.txt +++ b/test/fixtures/commonmark/bad.txt @@ -1,86 +1,3 @@ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -src line: 259 - -. - foo baz bim -. -
foo	baz		bim
-
-. - -error: - -
foo baz     bim
-
- - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -src line: 266 - -. - foo baz bim -. -
foo	baz		bim
-
-. - -error: - -
foo baz     bim
-
- - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -src line: 273 - -. - a a - ὐ a -. -
a	a
-ὐ	a
-
-. - -error: - -
a   a
-ὐ   a
-
- - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -src line: 295 - -. -> foo bar -. -
-

foo bar

-
-. - -error: - -
-

foo bar

-
- - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -src line: 4737 - -. -\ \A\a\ \3\φ\« -. -

\ \A\a\ \3\φ\«

-. - -error: - -

\ \A\a\ \3\φ\«

- - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ src line: 6241 diff --git a/test/fixtures/commonmark/good.txt b/test/fixtures/commonmark/good.txt index 296c819..bdb86d3 100644 --- a/test/fixtures/commonmark/good.txt +++ b/test/fixtures/commonmark/good.txt @@ -1,3 +1,35 @@ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src line: 259 + +. + foo baz bim +. +
foo	baz		bim
+
+. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src line: 266 + +. + foo baz bim +. +
foo	baz		bim
+
+. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src line: 273 + +. + a a + ὐ a +. +
a	a
+ὐ	a
+
+. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ src line: 282 @@ -14,6 +46,17 @@ src line: 282 . +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src line: 295 + +. +> foo bar +. +
+

foo bar

+
+. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ src line: 324 @@ -3945,6 +3988,15 @@ src line: 4728

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src line: 4737 + +. +\ \A\a\ \3\φ\« +. +

\ \A\a\ \3\φ\«

+. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ src line: 4746 diff --git a/test/fixtures/markdown-it/commonmark_extras.txt b/test/fixtures/markdown-it/commonmark_extras.txt index fe302ae..e6b615b 100644 --- a/test/fixtures/markdown-it/commonmark_extras.txt +++ b/test/fixtures/markdown-it/commonmark_extras.txt @@ -245,3 +245,30 @@ test

foo

. + +Coverage. Tabs in blockquotes. +. + > foo + > bar +. +
+
 foo
+ bar
+
+
+. + +Coverage. Tabs in lists. +. +1. foo + + bar +. +
    +
  1. +

    foo

    +
     bar
    +
    +
  2. +
+.