Browse Source

Improve normalization for reference label matching

Simple toUpperCase() does not work for various ligatures and legacy
characters that are already in a different upper case form.

See also:
278ea515fe
pull/570/head
Alex Kocharin 5 years ago
parent
commit
7421ecce67
  1. 26
      lib/common/utils.js
  2. 20
      test/fixtures/markdown-it/commonmark_extras.txt

26
lib/common/utils.js

@ -241,10 +241,28 @@ function isMdAsciiPunct(ch) {
// Hepler to unify [reference labels].
//
function normalizeReference(str) {
// use .toUpperCase() instead of .toLowerCase()
// here to avoid a conflict with Object.prototype
// members (most notably, `__proto__`)
return str.trim().replace(/\s+/g, ' ').toUpperCase();
// Trim and collapse whitespace
//
str = str.trim().replace(/\s+/g, ' ');
// In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
// fixed in v12 (couldn't find any details).
//
// So treat this one as a special case
// (remove this when node v10 is no longer supported).
//
if ('ẞ'.toLowerCase() === 'Ṿ') {
str = str.replace(/ẞ/g, 'ß');
}
// .toLowerCase().toUpperCase() should get rid of all differences
// between letter variants.
//
// Final result should be uppercased, because it's later stored in an object
// (this avoid a conflict with Object.prototype members,
// most notably, `__proto__`)
//
return str.toLowerCase().toUpperCase();
}
////////////////////////////////////////////////////////////////////////////////

20
test/fixtures/markdown-it/commonmark_extras.txt

@ -23,6 +23,26 @@ Issue #55:
.
Reference labels: 'i̇θωkå'.toUpperCase() is 'İΘΩKÅ', but these should still be equivalent
.
[İϴΩKÅ]
[i̇θωkå]: /url
.
<p><a href="/url">İϴΩKÅ</a></p>
.
Reference labels: support ligatures (equivalent according to unicode case folding)
.
[fffifl]
[fffifl]: /url
.
<p><a href="/url">fffifl</a></p>
.
Issue #35. `<` should work as punctuation
.
an **(:**<br>

Loading…
Cancel
Save