From 552c130b21b2c731a597b00b17d4db045c1099ec Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Wed, 4 Mar 2015 23:34:47 +0300 Subject: [PATCH] Improve smartquotes handling Use implementation similar to the new emphasis one. --- lib/rules_core/smartquotes.js | 66 +++++++++++++++++------ test/fixtures/markdown-it/smartquotes.txt | 13 +++++ 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/lib/rules_core/smartquotes.js b/lib/rules_core/smartquotes.js index 30a7d21..c0a611b 100644 --- a/lib/rules_core/smartquotes.js +++ b/lib/rules_core/smartquotes.js @@ -3,18 +3,14 @@ 'use strict'; +var isWhiteSpace = require('../common/utils').isWhiteSpace; +var isPunctChar = require('../common/utils').isPunctChar; +var isMdAsciiPunct = require('../common/utils').isMdAsciiPunct; + var QUOTE_TEST_RE = /['"]/; var QUOTE_RE = /['"]/g; -var PUNCT_RE = /[-\s()\[\]]/; var APOSTROPHE = '\u2019'; /* ’ */ -// This function returns true if the character at `pos` -// could be inside a word. -function isLetter(str, pos) { - if (pos < 0 || pos >= str.length) { return false; } - return !PUNCT_RE.test(str[pos]); -} - function replaceAt(str, index, ch) { return str.substr(0, index) + ch + str.substr(index + 1); @@ -23,9 +19,9 @@ function replaceAt(str, index, ch) { module.exports = function smartquotes(state) { /*eslint max-depth:0*/ - var i, token, text, t, pos, max, thisLevel, lastSpace, nextSpace, item, - canOpen, canClose, j, isSingle, blkIdx, tokens, - stack; + var i, token, text, t, pos, max, thisLevel, item, lastChar, nextChar, + isLastPunctChar, isNextPunctChar, isLastWhiteSpace, isNextWhiteSpace, + canOpen, canClose, j, isSingle, blkIdx, tokens, stack; if (!state.md.options.typographer) { return; } @@ -61,12 +57,51 @@ module.exports = function smartquotes(state) { t = QUOTE_RE.exec(text); if (!t) { break; } - lastSpace = !isLetter(text, t.index - 1); + canOpen = canClose = true; pos = t.index + 1; isSingle = (t[0] === "'"); - nextSpace = !isLetter(text, pos); - if (!nextSpace && !lastSpace) { + lastChar = t.index - 1 >= 0 ? text.charCodeAt(t.index - 1) : -1; + nextChar = pos < max ? text.charCodeAt(pos) : -1; + + isLastPunctChar = lastChar >= 0 && + (isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar))); + isNextPunctChar = nextChar >= 0 && + (isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar))); + + // begin/end of the line counts as a whitespace too + isLastWhiteSpace = lastChar < 0 || isWhiteSpace(lastChar); + isNextWhiteSpace = nextChar < 0 || isWhiteSpace(nextChar); + + if (isNextWhiteSpace) { + canOpen = false; + } else if (isNextPunctChar) { + if (!(isLastWhiteSpace || isLastPunctChar)) { + canOpen = false; + } + } + + if (isLastWhiteSpace) { + canClose = false; + } else if (isLastPunctChar) { + if (!(isNextWhiteSpace || isNextPunctChar)) { + canClose = false; + } + } + + if (nextChar === 0x22 /* " */ && t[0] === '"') { + if (lastChar >= 0x30 /* 0 */ && lastChar <= 0x39 /* 9 */) { + // special case: 1"" - count first quote as an inch + canClose = canOpen = false; + } + } + + if (canOpen && canClose) { + // treat this as the middle of the word + canOpen = canClose = false; + } + + if (!canOpen && !canClose) { // middle of word if (isSingle) { token.content = replaceAt(token.content, t.index, APOSTROPHE); @@ -74,9 +109,6 @@ module.exports = function smartquotes(state) { continue; } - canOpen = !nextSpace; - canClose = !lastSpace; - if (canClose) { // this could be a closing quote, rewind the stack to get a match for (j = stack.length - 1; j >= 0; j--) { diff --git a/test/fixtures/markdown-it/smartquotes.txt b/test/fixtures/markdown-it/smartquotes.txt index f3c247f..a072570 100644 --- a/test/fixtures/markdown-it/smartquotes.txt +++ b/test/fixtures/markdown-it/smartquotes.txt @@ -31,6 +31,19 @@ Should match quotes on the same level: . +Should handle adjacent nested quotes: + +. +'"double in single"' + +"'single in double'" +. +

‘“double in single”’

+

“‘single in double’”

+. + + + Should not match quotes on different levels: .