Browse Source

Use mdurl module instead of decodeURI+encodeURI

pull/82/head
Alex Kocharin 10 years ago
parent
commit
3264f17291
  1. 53
      lib/common/utils.js
  2. 1
      package.json
  3. 2
      test/fixtures/markdown-it/fatal.txt
  4. 13
      test/utils.js

53
lib/common/utils.js

@ -130,36 +130,7 @@ function escapeHtml(str) {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
var SURRORATE_TEST_RE = /[\uD800-\uDFFF]/; var encode = require('mdurl/encode');
var SURRORATE_SEARCH_RE = /[\uD800-\uDFFF]/g;
function replaceBadSurrogate(ch, pos, orig) {
var code = ch.charCodeAt(0);
if (code >= 0xD800 && code <= 0xDBFF) {
// high surrogate
if (pos >= orig.length - 1) { return '\uFFFD'; }
code = orig.charCodeAt(pos + 1);
if (code < 0xDC00 || code > 0xDFFF) { return '\uFFFD'; }
return ch;
}
// low surrogate
if (pos === 0) { return '\uFFFD'; }
code = orig.charCodeAt(pos - 1);
if (code < 0xD800 || code > 0xDBFF) { return '\uFFFD'; }
return ch;
}
function fixBrokenSurrogates(str) {
if (!SURRORATE_TEST_RE.test(str)) { return str; }
return str.replace(SURRORATE_SEARCH_RE, replaceBadSurrogate);
}
////////////////////////////////////////////////////////////////////////////////
// Incoming link can be partially encoded. Convert possible combinations to // Incoming link can be partially encoded. Convert possible combinations to
// unified form. // unified form.
@ -171,24 +142,7 @@ function fixBrokenSurrogates(str) {
// - (?) punicode for domain mame (but encodeURI seems to work in real world) // - (?) punicode for domain mame (but encodeURI seems to work in real world)
// //
function normalizeLink(url) { function normalizeLink(url) {
var normalized = replaceEntities(url); return encode(replaceEntities(url));
// We don't care much about result of mailformed URIs,
// but shoud not throw exception.
try {
normalized = decodeURI(normalized);
} catch (__) {}
// Encoder throws exception on broken surrogate pairs.
// Fix those first.
try {
return encodeURI(fixBrokenSurrogates(normalized));
} catch (__) {
// This should never happen and left for safety only.
/*istanbul ignore next*/
return '';
}
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -305,6 +259,3 @@ exports.isMdAsciiPunct = isMdAsciiPunct;
exports.isPunctChar = isPunctChar; exports.isPunctChar = isPunctChar;
exports.escapeRE = escapeRE; exports.escapeRE = escapeRE;
exports.normalizeReference = normalizeReference; exports.normalizeReference = normalizeReference;
// for testing only
exports.fixBrokenSurrogates = fixBrokenSurrogates;

1
package.json

@ -26,6 +26,7 @@
"dependencies": { "dependencies": {
"argparse": "~ 1.0.0", "argparse": "~ 1.0.0",
"linkify-it": "~ 0.1.1", "linkify-it": "~ 0.1.1",
"mdurl": "~ 0.0.1",
"uc.micro": "~ 0.1.0" "uc.micro": "~ 0.1.0"
}, },
"devDependencies": { "devDependencies": {

2
test/fixtures/markdown-it/fatal.txt

@ -10,7 +10,7 @@ Should not throw exception on broken utf-8 sequence in URL [mailformed URI]
. .
[foo](%C3) [foo](%C3)
. .
<p><a href="%25C3">foo</a></p> <p><a href="%C3">foo</a></p>
. .

13
test/utils.js

@ -48,19 +48,6 @@ describe('Utils', function () {
}); });
}); });
it('fixBrokenSurrogates', function () {
var fixBrokenSurrogates = require('../lib/common/utils').fixBrokenSurrogates;
// Bad
assert.strictEqual(fixBrokenSurrogates('\uD800foo'), '\uFFFDfoo');
assert.strictEqual(fixBrokenSurrogates('foo\uD800'), 'foo\uFFFD');
assert.strictEqual(fixBrokenSurrogates('\uDC00foo'), '\uFFFDfoo');
assert.strictEqual(fixBrokenSurrogates('foo\uDC00'), 'foo\uFFFD');
// Good
assert.strictEqual(fixBrokenSurrogates('\uD800\uDC00'), '\uD800\uDC00');
});
it('normalizeLink', function () { it('normalizeLink', function () {
var normalizeLink = require('../lib/common/utils').normalizeLink; var normalizeLink = require('../lib/common/utils').normalizeLink;

Loading…
Cancel
Save