|
@ -257,6 +257,31 @@ function normalizeReference(str) { |
|
|
// .toLowerCase().toUpperCase() should get rid of all differences
|
|
|
// .toLowerCase().toUpperCase() should get rid of all differences
|
|
|
// between letter variants.
|
|
|
// between letter variants.
|
|
|
//
|
|
|
//
|
|
|
|
|
|
// Simple .toLowerCase() doesn't normalize 125 code points correctly,
|
|
|
|
|
|
// and .toUpperCase doesn't normalize 6 of them (list of exceptions:
|
|
|
|
|
|
// İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
|
|
|
|
|
|
// uppercased versions).
|
|
|
|
|
|
//
|
|
|
|
|
|
// Here's an example showing how it happens. Lets take greek letter omega:
|
|
|
|
|
|
// uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
|
|
|
|
|
|
//
|
|
|
|
|
|
// Unicode entries:
|
|
|
|
|
|
// 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8;
|
|
|
|
|
|
// 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
|
|
|
|
|
|
// 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
|
|
|
|
|
|
// 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8;
|
|
|
|
|
|
//
|
|
|
|
|
|
// Case-insensitive comparison should treat all of them as equivalent.
|
|
|
|
|
|
//
|
|
|
|
|
|
// But .toLowerCase() doesn't change ϑ (it's already lowercase),
|
|
|
|
|
|
// and .toUpperCase() doesn't change ϴ (already uppercase).
|
|
|
|
|
|
//
|
|
|
|
|
|
// Applying first lower then upper case normalizes any character:
|
|
|
|
|
|
// '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
|
|
|
|
|
|
//
|
|
|
|
|
|
// Note: this is equivalent to unicode case folding; unicode normalization
|
|
|
|
|
|
// is a different step that is not required here.
|
|
|
|
|
|
//
|
|
|
// Final result should be uppercased, because it's later stored in an object
|
|
|
// Final result should be uppercased, because it's later stored in an object
|
|
|
// (this avoid a conflict with Object.prototype members,
|
|
|
// (this avoid a conflict with Object.prototype members,
|
|
|
// most notably, `__proto__`)
|
|
|
// most notably, `__proto__`)
|
|
|