Merge 1321d2eaa0 into 0fe7ccb4b7

5 months ago · 1ce64950d6
2 changed files with 39 additions and 5 deletions
--- a/lib/rules_inline/state_inline.mjs
+++ b/lib/rules_inline/state_inline.mjs
@ -89,8 +89,7 @@ StateInline.prototype.scanDelims = function (start, canSplitWord) {
  const max = this.posMax
  const marker = this.src.charCodeAt(start)

-  // treat beginning of the line as a whitespace
-  const lastChar = start > 0 ? this.src.charCodeAt(start - 1) : 0x20
+  const lastChar = getLastCharCode(this.src, start)

  let pos = start
  while (pos < max && this.src.charCodeAt(pos) === marker) { pos++ }
@ -98,10 +97,10 @@ StateInline.prototype.scanDelims = function (start, canSplitWord) {
  const count = pos - start

  // treat end of the line as a whitespace
-  const nextChar = pos < max ? this.src.charCodeAt(pos) : 0x20
+  const nextChar = pos < max ? this.src.codePointAt(pos) : 0x20

-  const isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar))
-  const isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar))
+  const isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCodePoint(lastChar))
+  const isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCodePoint(nextChar))

  const isLastWhiteSpace = isWhiteSpace(lastChar)
  const isNextWhiteSpace = isWhiteSpace(nextChar)
@ -115,6 +114,19 @@ StateInline.prototype.scanDelims = function (start, canSplitWord) {
  const can_close = right_flanking && (canSplitWord || !left_flanking  || isNextPunctChar)

  return { can_open, can_close, length: count }
+
+  function getLastCharCode (str, pos) {
+    // treat beginning of the line as a whitespace
+    if (pos <= 0) { return 0x20 }
+    const charCode = str.charCodeAt(pos - 1)
+    // not low surrogates (BMP)
+    if ((charCode & 0xFC00) !== 0xDC00) { return charCode }
+
+    // undefined if out of range (leading stray low surrogates)
+    const codePoint = str.codePointAt(pos - 2)
+    // undefined > 0xffff = false, so we don't need extra check here
+    return codePoint > 0xffff ? codePoint : charCode
+  }
 }

 // re-export Token class to use in block rules
--- a/test/fixtures/markdown-it/commonmark_extras.txt
+++ b/test/fixtures/markdown-it/commonmark_extras.txt
@ -740,3 +740,25 @@ Html in image description
 .
 <p><img src="image.png" alt="text &lt;textarea&gt; text"></p>
 .
+
+Issue #1071. Recognize non-BMP punctuations and symbols
+.
+a*a∇*a
+
+a*∇a*a
+
+a*a𝜵*a
+
+a*𝜵a*a
+
+a*𐬼a*a
+
+a*a𐬼*a
+.
+<p>a*a∇*a</p>
+<p>a*∇a*a</p>
+<p>a*a𝜵*a</p>
+<p>a*𝜵a*a</p>
+<p>a*𐬼a*a</p>
+<p>a*a𐬼*a</p>
+.