Browse Source

Fix surrogate pairs instead of processing encoder exceptions

pull/30/head
Vitaly Puzrin 10 years ago
parent
commit
0fa09e9cd2
  1. 42
      lib/common/utils.js

42
lib/common/utils.js

@ -130,6 +130,37 @@ function escapeHtml(str) {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
var SURRORATE_TEST_RE = /[\uD800-\uDFFF]/;
var SURRORATE_SEARCH_RE = /[\uD800-\uDFFF]/g;
function replaceBadSurrogate(ch, pos, orig) {
var code = ch.charCodeAt(0);
if (code >= 0xD800 && code <= 0xDBFF) {
// high surrogate
if (pos >= orig.length - 1) { return '\uFFFD'; }
code = orig.charCodeAt(pos + 1);
if (code < 0xDC00 || code > 0xDFFF) { return '\uFFFD'; }
return ch;
}
// low surrogate
if (pos === 0) { return '\uFFFD'; }
code = orig.charCodeAt(pos - 1);
if (code < 0xD900 || code > 0xDBFF) { return '\uFFFD'; }
return ch;
}
function fixBrokenSurrogates(str) {
if (!SURRORATE_TEST_RE.test(str)) { return str; }
return str.replace(SURRORATE_SEARCH_RE, replaceBadSurrogate);
}
////////////////////////////////////////////////////////////////////////////////
// Incoming link can be partially encoded. Convert possible combinations to // Incoming link can be partially encoded. Convert possible combinations to
// unified form. // unified form.
// //
@ -148,14 +179,9 @@ function normalizeLink(url) {
normalized = decodeURI(normalized); normalized = decodeURI(normalized);
} catch (__) {} } catch (__) {}
// Encoder throws exception on broken unicode sequence. // Encoder throws exception on broken surrogate pairs.
// Kill suspicious data for the safety. // Fix those first.
// return encodeURI(fixBrokenSurrogates(normalized));
try {
return encodeURI(normalized);
} catch (__) {
return '';
}
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////

Loading…
Cancel
Save