markdown-it/lib/rules_core/smartquotes.js

// Convert straight quotation marks to typographic ones
//
'use strict';


var isWhiteSpace   = require('../common/utils').isWhiteSpace;
var isPunctChar    = require('../common/utils').isPunctChar;
var isMdAsciiPunct = require('../common/utils').isMdAsciiPunct;

var QUOTE_TEST_RE = /['"]/;
var QUOTE_RE = /['"]/g;
var APOSTROPHE = '\u2019'; /* ’ */


function replaceAt(str, index, ch) {
  return str.substr(0, index) + ch + str.substr(index + 1);
}

function process_inlines(tokens, state) {
  var i, token, text, t, pos, max, thisLevel, item, lastChar, nextChar,
      isLastPunctChar, isNextPunctChar, isLastWhiteSpace, isNextWhiteSpace,
      canOpen, canClose, j, isSingle, stack, openQuote, closeQuote;

  stack = [];

  for (i = 0; i < tokens.length; i++) {
    token = tokens[i];

    thisLevel = tokens[i].level;

    for (j = stack.length - 1; j >= 0; j--) {
      if (stack[j].level <= thisLevel) { break; }
    }
    stack.length = j + 1;

    if (token.type !== 'text') { continue; }

    text = token.content;
    pos = 0;
    max = text.length;

    /*eslint no-labels:0,block-scoped-var:0*/
    OUTER:
    while (pos < max) {
      QUOTE_RE.lastIndex = pos;
      t = QUOTE_RE.exec(text);
      if (!t) { break; }

      canOpen = canClose = true;
      pos = t.index + 1;
      isSingle = (t[0] === "'");

      // Find previous character,
      // default to space if it's the beginning of the line
      //
      lastChar = 0x20;

      if (t.index - 1 >= 0) {
        lastChar = text.charCodeAt(t.index - 1);
      } else {
        for (j = i - 1; j >= 0; j--) {
          if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // lastChar defaults to 0x20
          if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'

          lastChar = tokens[j].content.charCodeAt(tokens[j].content.length - 1);
          break;
        }
      }

      // Find next character,
      // default to space if it's the end of the line
      //
      nextChar = 0x20;

      if (pos < max) {
        nextChar = text.charCodeAt(pos);
      } else {
        for (j = i + 1; j < tokens.length; j++) {
          if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // nextChar defaults to 0x20
          if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'

          nextChar = tokens[j].content.charCodeAt(0);
          break;
        }
      }

      isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar));
      isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar));

      isLastWhiteSpace = isWhiteSpace(lastChar);
      isNextWhiteSpace = isWhiteSpace(nextChar);

      if (isNextWhiteSpace) {
        canOpen = false;
      } else if (isNextPunctChar) {
        if (!(isLastWhiteSpace || isLastPunctChar)) {
          canOpen = false;
        }
      }

      if (isLastWhiteSpace) {
        canClose = false;
      } else if (isLastPunctChar) {
        if (!(isNextWhiteSpace || isNextPunctChar)) {
          canClose = false;
        }
      }

      if (nextChar === 0x22 /* " */ && t[0] === '"') {
        if (lastChar >= 0x30 /* 0 */ && lastChar <= 0x39 /* 9 */) {
          // special case: 1"" - count first quote as an inch
          canClose = canOpen = false;
        }
      }

      if (canOpen && canClose) {
        // Replace quotes in the middle of punctuation sequence, but not
        // in the middle of the words, i.e.:
        //
        // 1. foo " bar " baz - not replaced
        // 2. foo-"-bar-"-baz - replaced
        // 3. foo"bar"baz     - not replaced
        //
        canOpen = isLastPunctChar;
        canClose = isNextPunctChar;
      }

      if (!canOpen && !canClose) {
        // middle of word
        if (isSingle) {
          token.content = replaceAt(token.content, t.index, APOSTROPHE);
        }
        continue;
      }

      if (canClose) {
        // this could be a closing quote, rewind the stack to get a match
        for (j = stack.length - 1; j >= 0; j--) {
          item = stack[j];
          if (stack[j].level < thisLevel) { break; }
          if (item.single === isSingle && stack[j].level === thisLevel) {
            item = stack[j];

            if (isSingle) {
              openQuote = state.md.options.quotes[2];
              closeQuote = state.md.options.quotes[3];
            } else {
              openQuote = state.md.options.quotes[0];
              closeQuote = state.md.options.quotes[1];
            }

            // replace token.content *before* tokens[item.token].content,
            // because, if they are pointing at the same token, replaceAt
            // could mess up indices when quote length != 1
            token.content = replaceAt(token.content, t.index, closeQuote);
            tokens[item.token].content = replaceAt(
              tokens[item.token].content, item.pos, openQuote);

            pos += closeQuote.length - 1;
            if (item.token === i) { pos += openQuote.length - 1; }

            text = token.content;
            max = text.length;

            stack.length = j;
            continue OUTER;
          }
        }
      }

      if (canOpen) {
        stack.push({
          token: i,
          pos: t.index,
          single: isSingle,
          level: thisLevel
        });
      } else if (canClose && isSingle) {
        token.content = replaceAt(token.content, t.index, APOSTROPHE);
      }
    }
  }
}


module.exports = function smartquotes(state) {
  /*eslint max-depth:0*/
  var blkIdx;

  if (!state.md.options.typographer) { return; }

  for (blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {

    if (state.tokens[blkIdx].type !== 'inline' ||
        !QUOTE_TEST_RE.test(state.tokens[blkIdx].content)) {
      continue;
    }

    process_inlines(state.tokens[blkIdx].children, state);
  }
};
Add rule to replace quotes with typographic ones 10 years ago			`// Convert straight quotation marks to typographic ones`
			`//`
			`'use strict';`


Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago			`var isWhiteSpace = require('../common/utils').isWhiteSpace;`
			`var isPunctChar = require('../common/utils').isPunctChar;`
			`var isMdAsciiPunct = require('../common/utils').isMdAsciiPunct;`

Perf: tweaked typorgapher checks 10 years ago			`var QUOTE_TEST_RE = /['"]/;`
			`var QUOTE_RE = /['"]/g;`
Do browserified output ascii-friendly 10 years ago			`var APOSTROPHE = '\u2019'; /* ’ */`
Add rule to replace quotes with typographic ones 10 years ago

Perf: smartquotes logic cleanup 10 years ago			`function replaceAt(str, index, ch) {`
			`return str.substr(0, index) + ch + str.substr(index + 1);`
Add rule to replace quotes with typographic ones 10 years ago			`}`

Splitted replacements & smartquotes to smaller functions 9 years ago			`function process_inlines(tokens, state) {`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago			`var i, token, text, t, pos, max, thisLevel, item, lastChar, nextChar,`
			`isLastPunctChar, isNextPunctChar, isLastWhiteSpace, isNextWhiteSpace,`
Add multichar replacements in smartquotes fix #115 9 years ago			`canOpen, canClose, j, isSingle, stack, openQuote, closeQuote;`
Nuked typographer class, separated core chain class 10 years ago
			`stack = [];`
Add rule to replace quotes with typographic ones 10 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`for (i = 0; i < tokens.length; i++) {`
			`token = tokens[i];`
Moved scans from typorgapher directly to rules 10 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`thisLevel = tokens[i].level;`
Moved scans from typorgapher directly to rules 10 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`for (j = stack.length - 1; j >= 0; j--) {`
			`if (stack[j].level <= thisLevel) { break; }`
			`}`
			`stack.length = j + 1;`

Smartquote rule cleanup 9 years ago			`if (token.type !== 'text') { continue; }`

Splitted replacements & smartquotes to smaller functions 9 years ago			`text = token.content;`
			`pos = 0;`
			`max = text.length;`

			`/eslint no-labels:0,block-scoped-var:0/`
			`OUTER:`
			`while (pos < max) {`
			`QUOTE_RE.lastIndex = pos;`
			`t = QUOTE_RE.exec(text);`
			`if (!t) { break; }`

			`canOpen = canClose = true;`
			`pos = t.index + 1;`
			`isSingle = (t[0] === "'");`

Take into account adjacent tokens in smartquotes close https://github.com/markdown-it/markdown-it/issues/181 9 years ago			`// Find previous character,`
			`// default to space if it's the beginning of the line`
			`//`
			`lastChar = 0x20;`

			`if (t.index - 1 >= 0) {`
			`lastChar = text.charCodeAt(t.index - 1);`
			`} else {`
			`for (j = i - 1; j >= 0; j--) {`
Fix smartquotes around softbreaks close https://github.com/markdown-it/markdown-it/issues/430 7 years ago			`if (tokens[j].type === 'softbreak' \|\| tokens[j].type === 'hardbreak') break; // lastChar defaults to 0x20`
Fix smartquotes adjacent to code block close https://github.com/markdown-it/markdown-it/issues/677 4 years ago			`if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'`
Take into account adjacent tokens in smartquotes close https://github.com/markdown-it/markdown-it/issues/181 9 years ago
			`lastChar = tokens[j].content.charCodeAt(tokens[j].content.length - 1);`
			`break;`
			`}`
			`}`

			`// Find next character,`
			`// default to space if it's the end of the line`
			`//`
			`nextChar = 0x20;`

			`if (pos < max) {`
			`nextChar = text.charCodeAt(pos);`
			`} else {`
			`for (j = i + 1; j < tokens.length; j++) {`
Fix smartquotes around softbreaks close https://github.com/markdown-it/markdown-it/issues/430 7 years ago			`if (tokens[j].type === 'softbreak' \|\| tokens[j].type === 'hardbreak') break; // nextChar defaults to 0x20`
Fix smartquotes adjacent to code block close https://github.com/markdown-it/markdown-it/issues/677 4 years ago			`if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'`
Take into account adjacent tokens in smartquotes close https://github.com/markdown-it/markdown-it/issues/181 9 years ago
			`nextChar = tokens[j].content.charCodeAt(0);`
			`break;`
			`}`
			`}`
Splitted replacements & smartquotes to smaller functions 9 years ago
Tweak scanDelims algorithm - explicitly set nextChar and lastChar to 0x20 at the begin/end of line - `_` between punctuation characters can now close an emphasis, see: https://github.com/jgm/commonmark.js/issues/12#issuecomment-77421682 - `"` between punctuation characters can now be a closed quote in smartquotes rule. 9 years ago			`isLastPunctChar = isMdAsciiPunct(lastChar) \|\| isPunctChar(String.fromCharCode(lastChar));`
			`isNextPunctChar = isMdAsciiPunct(nextChar) \|\| isPunctChar(String.fromCharCode(nextChar));`
Splitted replacements & smartquotes to smaller functions 9 years ago
Tweak scanDelims algorithm - explicitly set nextChar and lastChar to 0x20 at the begin/end of line - `_` between punctuation characters can now close an emphasis, see: https://github.com/jgm/commonmark.js/issues/12#issuecomment-77421682 - `"` between punctuation characters can now be a closed quote in smartquotes rule. 9 years ago			`isLastWhiteSpace = isWhiteSpace(lastChar);`
			`isNextWhiteSpace = isWhiteSpace(nextChar);`
Splitted replacements & smartquotes to smaller functions 9 years ago
			`if (isNextWhiteSpace) {`
			`canOpen = false;`
			`} else if (isNextPunctChar) {`
			`if (!(isLastWhiteSpace \|\| isLastPunctChar)) {`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago			`canOpen = false;`
			`}`
Splitted replacements & smartquotes to smaller functions 9 years ago			`}`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`if (isLastWhiteSpace) {`
			`canClose = false;`
			`} else if (isLastPunctChar) {`
			`if (!(isNextWhiteSpace \|\| isNextPunctChar)) {`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago			`canClose = false;`
			`}`
Splitted replacements & smartquotes to smaller functions 9 years ago			`}`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`if (nextChar === 0x22 /* " */ && t[0] === '"') {`
			`if (lastChar >= 0x30 /* 0 / && lastChar <= 0x39 / 9 */) {`
			`// special case: 1"" - count first quote as an inch`
			`canClose = canOpen = false;`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago			`}`
Splitted replacements & smartquotes to smaller functions 9 years ago			`}`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`if (canOpen && canClose) {`
Allow opening quote after another punctuation char in typographer close https://github.com/markdown-it/markdown-it/issues/643 4 years ago			`// Replace quotes in the middle of punctuation sequence, but not`
			`// in the middle of the words, i.e.:`
			`//`
			`// 1. foo " bar " baz - not replaced`
			`// 2. foo-"-bar-"-baz - replaced`
			`// 3. foo"bar"baz - not replaced`
			`//`
			`canOpen = isLastPunctChar;`
Tweak scanDelims algorithm - explicitly set nextChar and lastChar to 0x20 at the begin/end of line - `_` between punctuation characters can now close an emphasis, see: https://github.com/jgm/commonmark.js/issues/12#issuecomment-77421682 - `"` between punctuation characters can now be a closed quote in smartquotes rule. 9 years ago			`canClose = isNextPunctChar;`
Splitted replacements & smartquotes to smaller functions 9 years ago			`}`
Improve smartquotes handling Use implementation similar to the new emphasis one. 9 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`if (!canOpen && !canClose) {`
			`// middle of word`
			`if (isSingle) {`
			`token.content = replaceAt(token.content, t.index, APOSTROPHE);`
Moved scans from typorgapher directly to rules 10 years ago			`}`
Splitted replacements & smartquotes to smaller functions 9 years ago			`continue;`
			`}`
Add rule to replace quotes with typographic ones 10 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`if (canClose) {`
			`// this could be a closing quote, rewind the stack to get a match`
			`for (j = stack.length - 1; j >= 0; j--) {`
			`item = stack[j];`
			`if (stack[j].level < thisLevel) { break; }`
			`if (item.single === isSingle && stack[j].level === thisLevel) {`
Add rule to replace quotes with typographic ones 10 years ago			`item = stack[j];`
Add multichar replacements in smartquotes fix #115 9 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`if (isSingle) {`
Add multichar replacements in smartquotes fix #115 9 years ago			`openQuote = state.md.options.quotes[2];`
			`closeQuote = state.md.options.quotes[3];`
Splitted replacements & smartquotes to smaller functions 9 years ago			`} else {`
Add multichar replacements in smartquotes fix #115 9 years ago			`openQuote = state.md.options.quotes[0];`
			`closeQuote = state.md.options.quotes[1];`
Add rule to replace quotes with typographic ones 10 years ago			`}`
Add multichar replacements in smartquotes fix #115 9 years ago
			`// replace token.content before tokens[item.token].content,`
			`// because, if they are pointing at the same token, replaceAt`
			`// could mess up indices when quote length != 1`
			`token.content = replaceAt(token.content, t.index, closeQuote);`
			`tokens[item.token].content = replaceAt(`
			`tokens[item.token].content, item.pos, openQuote);`

			`pos += closeQuote.length - 1;`
			`if (item.token === i) { pos += openQuote.length - 1; }`

			`text = token.content;`
			`max = text.length;`

Splitted replacements & smartquotes to smaller functions 9 years ago			`stack.length = j;`
			`continue OUTER;`
Add rule to replace quotes with typographic ones 10 years ago			`}`
			`}`
Splitted replacements & smartquotes to smaller functions 9 years ago			`}`
Add rule to replace quotes with typographic ones 10 years ago
Splitted replacements & smartquotes to smaller functions 9 years ago			`if (canOpen) {`
			`stack.push({`
			`token: i,`
			`pos: t.index,`
			`single: isSingle,`
			`level: thisLevel`
			`});`
			`} else if (canClose && isSingle) {`
			`token.content = replaceAt(token.content, t.index, APOSTROPHE);`
Add rule to replace quotes with typographic ones 10 years ago			`}`
			`}`
			`}`
Splitted replacements & smartquotes to smaller functions 9 years ago			`}`


			`module.exports = function smartquotes(state) {`
			`/eslint max-depth:0/`
			`var blkIdx;`

			`if (!state.md.options.typographer) { return; }`

			`for (blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {`

			`if (state.tokens[blkIdx].type !== 'inline' \|\|`
			`!QUOTE_TEST_RE.test(state.tokens[blkIdx].content)) {`
			`continue;`
			`}`

			`process_inlines(state.tokens[blkIdx].children, state);`
			`}`
Add rule to replace quotes with typographic ones 10 years ago			`};`