Browse Source

Use mdurl module instead of decodeURI+encodeURI

pull/82/head
Alex Kocharin 10 years ago
parent
commit
3264f17291
  1. 53
      lib/common/utils.js
  2. 1
      package.json
  3. 4
      test/fixtures/markdown-it/fatal.txt
  4. 13
      test/utils.js

53
lib/common/utils.js

@ -130,36 +130,7 @@ function escapeHtml(str) {
////////////////////////////////////////////////////////////////////////////////
var SURRORATE_TEST_RE = /[\uD800-\uDFFF]/;
var SURRORATE_SEARCH_RE = /[\uD800-\uDFFF]/g;
function replaceBadSurrogate(ch, pos, orig) {
var code = ch.charCodeAt(0);
if (code >= 0xD800 && code <= 0xDBFF) {
// high surrogate
if (pos >= orig.length - 1) { return '\uFFFD'; }
code = orig.charCodeAt(pos + 1);
if (code < 0xDC00 || code > 0xDFFF) { return '\uFFFD'; }
return ch;
}
// low surrogate
if (pos === 0) { return '\uFFFD'; }
code = orig.charCodeAt(pos - 1);
if (code < 0xD800 || code > 0xDBFF) { return '\uFFFD'; }
return ch;
}
function fixBrokenSurrogates(str) {
if (!SURRORATE_TEST_RE.test(str)) { return str; }
return str.replace(SURRORATE_SEARCH_RE, replaceBadSurrogate);
}
////////////////////////////////////////////////////////////////////////////////
var encode = require('mdurl/encode');
// Incoming link can be partially encoded. Convert possible combinations to
// unified form.
@ -171,24 +142,7 @@ function fixBrokenSurrogates(str) {
// - (?) punicode for domain mame (but encodeURI seems to work in real world)
//
function normalizeLink(url) {
var normalized = replaceEntities(url);
// We don't care much about result of mailformed URIs,
// but shoud not throw exception.
try {
normalized = decodeURI(normalized);
} catch (__) {}
// Encoder throws exception on broken surrogate pairs.
// Fix those first.
try {
return encodeURI(fixBrokenSurrogates(normalized));
} catch (__) {
// This should never happen and left for safety only.
/*istanbul ignore next*/
return '';
}
return encode(replaceEntities(url));
}
////////////////////////////////////////////////////////////////////////////////
@ -305,6 +259,3 @@ exports.isMdAsciiPunct = isMdAsciiPunct;
exports.isPunctChar = isPunctChar;
exports.escapeRE = escapeRE;
exports.normalizeReference = normalizeReference;
// for testing only
exports.fixBrokenSurrogates = fixBrokenSurrogates;

1
package.json

@ -26,6 +26,7 @@
"dependencies": {
"argparse": "~ 1.0.0",
"linkify-it": "~ 0.1.1",
"mdurl": "~ 0.0.1",
"uc.micro": "~ 0.1.0"
},
"devDependencies": {

4
test/fixtures/markdown-it/fatal.txt

@ -10,7 +10,7 @@ Should not throw exception on broken utf-8 sequence in URL [mailformed URI]
.
[foo](%C3)
.
<p><a href="%25C3">foo</a></p>
<p><a href="%C3">foo</a></p>
.
@ -38,4 +38,4 @@ Should not hang cdata regexp
foo <![CDATA[ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ]>
.
<p>foo &lt;![CDATA[ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ]&gt;</p>
.
.

13
test/utils.js

@ -48,19 +48,6 @@ describe('Utils', function () {
});
});
it('fixBrokenSurrogates', function () {
var fixBrokenSurrogates = require('../lib/common/utils').fixBrokenSurrogates;
// Bad
assert.strictEqual(fixBrokenSurrogates('\uD800foo'), '\uFFFDfoo');
assert.strictEqual(fixBrokenSurrogates('foo\uD800'), 'foo\uFFFD');
assert.strictEqual(fixBrokenSurrogates('\uDC00foo'), '\uFFFDfoo');
assert.strictEqual(fixBrokenSurrogates('foo\uDC00'), 'foo\uFFFD');
// Good
assert.strictEqual(fixBrokenSurrogates('\uD800\uDC00'), '\uD800\uDC00');
});
it('normalizeLink', function () {
var normalizeLink = require('../lib/common/utils').normalizeLink;

Loading…
Cancel
Save