diff --git a/lib/rules_inline/state_inline.mjs b/lib/rules_inline/state_inline.mjs index 80cb3c5..8d33819 100644 --- a/lib/rules_inline/state_inline.mjs +++ b/lib/rules_inline/state_inline.mjs @@ -89,8 +89,7 @@ StateInline.prototype.scanDelims = function (start, canSplitWord) { const max = this.posMax const marker = this.src.charCodeAt(start) - // treat beginning of the line as a whitespace - const lastChar = start > 0 ? this.src.charCodeAt(start - 1) : 0x20 + const lastChar = getLastCharCode(this.src, start) let pos = start while (pos < max && this.src.charCodeAt(pos) === marker) { pos++ } @@ -98,10 +97,10 @@ StateInline.prototype.scanDelims = function (start, canSplitWord) { const count = pos - start // treat end of the line as a whitespace - const nextChar = pos < max ? this.src.charCodeAt(pos) : 0x20 + const nextChar = pos < max ? this.src.codePointAt(pos) : 0x20 - const isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar)) - const isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar)) + const isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCodePoint(lastChar)) + const isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCodePoint(nextChar)) const isLastWhiteSpace = isWhiteSpace(lastChar) const isNextWhiteSpace = isWhiteSpace(nextChar) @@ -115,6 +114,19 @@ StateInline.prototype.scanDelims = function (start, canSplitWord) { const can_close = right_flanking && (canSplitWord || !left_flanking || isNextPunctChar) return { can_open, can_close, length: count } + + function getLastCharCode (str, pos) { + // treat beginning of the line as a whitespace + if (pos <= 0) { return 0x20 } + const charCode = str.charCodeAt(pos - 1) + // not low surrogates (BMP) + if ((charCode & 0xFC00) !== 0xDC00) { return charCode } + + // undefined if out of range (leading stray low surrogates) + const codePoint = str.codePointAt(pos - 2) + // undefined > 0xffff = false, so we don't need extra check here + return codePoint > 0xffff ? codePoint : charCode + } } // re-export Token class to use in block rules diff --git a/test/fixtures/markdown-it/commonmark_extras.txt b/test/fixtures/markdown-it/commonmark_extras.txt index 558c011..5e24726 100644 --- a/test/fixtures/markdown-it/commonmark_extras.txt +++ b/test/fixtures/markdown-it/commonmark_extras.txt @@ -740,3 +740,25 @@ Html in image description .
a*a∇*a
+a*∇a*a
+a*a𝜵*a
+a*𝜵a*a
+a*𐬼a*a
+a*a𐬼*a
+.