From 7421ecce679867af4e1345d53fc5a6dffca9311e Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Fri, 5 Jul 2019 17:31:53 +0300 Subject: [PATCH] Improve normalization for reference label matching Simple toUpperCase() does not work for various ligatures and legacy characters that are already in a different upper case form. See also: https://github.com/commonmark/commonmark-spec/commit/278ea515fe5ff3e0dd652ee2127ffcd5cbc91527 --- lib/common/utils.js | 26 ++++++++++++++++--- .../markdown-it/commonmark_extras.txt | 20 ++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/lib/common/utils.js b/lib/common/utils.js index a2f9540..17dd817 100644 --- a/lib/common/utils.js +++ b/lib/common/utils.js @@ -241,10 +241,28 @@ function isMdAsciiPunct(ch) { // Hepler to unify [reference labels]. // function normalizeReference(str) { - // use .toUpperCase() instead of .toLowerCase() - // here to avoid a conflict with Object.prototype - // members (most notably, `__proto__`) - return str.trim().replace(/\s+/g, ' ').toUpperCase(); + // Trim and collapse whitespace + // + str = str.trim().replace(/\s+/g, ' '); + + // In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug + // fixed in v12 (couldn't find any details). + // + // So treat this one as a special case + // (remove this when node v10 is no longer supported). + // + if ('ẞ'.toLowerCase() === 'Ṿ') { + str = str.replace(/ẞ/g, 'ß'); + } + + // .toLowerCase().toUpperCase() should get rid of all differences + // between letter variants. + // + // Final result should be uppercased, because it's later stored in an object + // (this avoid a conflict with Object.prototype members, + // most notably, `__proto__`) + // + return str.toLowerCase().toUpperCase(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/test/fixtures/markdown-it/commonmark_extras.txt b/test/fixtures/markdown-it/commonmark_extras.txt index dcb86a9..369977d 100644 --- a/test/fixtures/markdown-it/commonmark_extras.txt +++ b/test/fixtures/markdown-it/commonmark_extras.txt @@ -23,6 +23,26 @@ Issue #55: . +Reference labels: 'i̇θωkå'.toUpperCase() is 'İΘΩKÅ', but these should still be equivalent +. +[İϴΩKÅ] + +[i̇θωkå]: /url +. +

İϴΩKÅ

+. + + +Reference labels: support ligatures (equivalent according to unicode case folding) +. +[fffifl] + +[fffifl]: /url +. +

fffifl

+. + + Issue #35. `<` should work as punctuation . an **(:**