Use mdurl module instead of decodeURI+encodeURI

10 years ago · 3264f17291
4 changed files with 5 additions and 66 deletions
--- a/lib/common/utils.js
+++ b/lib/common/utils.js
@ -130,36 +130,7 @@ function escapeHtml(str) {

 ////////////////////////////////////////////////////////////////////////////////

-var SURRORATE_TEST_RE   = /[\uD800-\uDFFF]/;
-var SURRORATE_SEARCH_RE = /[\uD800-\uDFFF]/g;
-
-function replaceBadSurrogate(ch, pos, orig) {
-  var code = ch.charCodeAt(0);
-
-  if (code >= 0xD800 && code <= 0xDBFF) {
-    // high surrogate
-    if (pos >= orig.length - 1) { return '\uFFFD'; }
-    code = orig.charCodeAt(pos + 1);
-    if (code < 0xDC00 || code > 0xDFFF) { return '\uFFFD'; }
-
-    return ch;
-  }
-
-  // low surrogate
-  if (pos === 0) { return '\uFFFD'; }
-  code = orig.charCodeAt(pos - 1);
-  if (code < 0xD800 || code > 0xDBFF) { return '\uFFFD'; }
-  return ch;
-}
-
-function fixBrokenSurrogates(str) {
-  if (!SURRORATE_TEST_RE.test(str)) { return str; }
-
-  return str.replace(SURRORATE_SEARCH_RE, replaceBadSurrogate);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
+var encode = require('mdurl/encode');

 // Incoming link can be partially encoded. Convert possible combinations to
 // unified form.
@ -171,24 +142,7 @@ function fixBrokenSurrogates(str) {
 // - (?) punicode for domain mame (but encodeURI seems to work in real world)
 //
 function normalizeLink(url) {
-  var normalized = replaceEntities(url);
-
-  // We don't care much about result of mailformed URIs,
-  // but shoud not throw exception.
-  try {
-    normalized = decodeURI(normalized);
-  } catch (__) {}
-
-  // Encoder throws exception on broken surrogate pairs.
-  // Fix those first.
-
-  try {
-    return encodeURI(fixBrokenSurrogates(normalized));
-  } catch (__) {
-    // This should never happen and left for safety only.
-    /*istanbul ignore next*/
-    return '';
-  }
+  return encode(replaceEntities(url));
 }

 ////////////////////////////////////////////////////////////////////////////////
@ -305,6 +259,3 @@ exports.isMdAsciiPunct      = isMdAsciiPunct;
 exports.isPunctChar         = isPunctChar;
 exports.escapeRE            = escapeRE;
 exports.normalizeReference  = normalizeReference;
-
-// for testing only
-exports.fixBrokenSurrogates = fixBrokenSurrogates;
--- a/package.json
+++ b/package.json
@ -26,6 +26,7 @@
  "dependencies": {
    "argparse": "~ 1.0.0",
    "linkify-it": "~ 0.1.1",
+    "mdurl": "~ 0.0.1",
    "uc.micro": "~ 0.1.0"
  },
  "devDependencies": {
--- a/test/fixtures/markdown-it/fatal.txt
+++ b/test/fixtures/markdown-it/fatal.txt
@ -10,7 +10,7 @@ Should not throw exception on broken utf-8 sequence in URL [mailformed URI]
 .
 [foo](%C3)
 .
-<p><a href="%25C3">foo</a></p>
+<p><a href="%C3">foo</a></p>
 .


@ -38,4 +38,4 @@ Should not hang cdata regexp
 foo <![CDATA[ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ]>
 .
 <p>foo &lt;![CDATA[ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ]&gt;</p>
-.
+.
--- a/test/utils.js
+++ b/test/utils.js
@ -48,19 +48,6 @@ describe('Utils', function () {
    });
  });

-  it('fixBrokenSurrogates', function () {
-    var fixBrokenSurrogates = require('../lib/common/utils').fixBrokenSurrogates;
-
-    // Bad
-    assert.strictEqual(fixBrokenSurrogates('\uD800foo'), '\uFFFDfoo');
-    assert.strictEqual(fixBrokenSurrogates('foo\uD800'), 'foo\uFFFD');
-    assert.strictEqual(fixBrokenSurrogates('\uDC00foo'), '\uFFFDfoo');
-    assert.strictEqual(fixBrokenSurrogates('foo\uDC00'), 'foo\uFFFD');
-
-    // Good
-    assert.strictEqual(fixBrokenSurrogates('\uD800\uDC00'), '\uD800\uDC00');
-  });
-
  it('normalizeLink', function () {
    var normalizeLink = require('../lib/common/utils').normalizeLink;