From 3264f172913ca4b8372112388057dd5bf8fb0516 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Mon, 2 Mar 2015 19:20:58 +0300 Subject: [PATCH] Use mdurl module instead of decodeURI+encodeURI --- lib/common/utils.js | 53 ++--------------------------- package.json | 1 + test/fixtures/markdown-it/fatal.txt | 4 +-- test/utils.js | 13 ------- 4 files changed, 5 insertions(+), 66 deletions(-) diff --git a/lib/common/utils.js b/lib/common/utils.js index 20bcca6..2faa6a1 100644 --- a/lib/common/utils.js +++ b/lib/common/utils.js @@ -130,36 +130,7 @@ function escapeHtml(str) { //////////////////////////////////////////////////////////////////////////////// -var SURRORATE_TEST_RE = /[\uD800-\uDFFF]/; -var SURRORATE_SEARCH_RE = /[\uD800-\uDFFF]/g; - -function replaceBadSurrogate(ch, pos, orig) { - var code = ch.charCodeAt(0); - - if (code >= 0xD800 && code <= 0xDBFF) { - // high surrogate - if (pos >= orig.length - 1) { return '\uFFFD'; } - code = orig.charCodeAt(pos + 1); - if (code < 0xDC00 || code > 0xDFFF) { return '\uFFFD'; } - - return ch; - } - - // low surrogate - if (pos === 0) { return '\uFFFD'; } - code = orig.charCodeAt(pos - 1); - if (code < 0xD800 || code > 0xDBFF) { return '\uFFFD'; } - return ch; -} - -function fixBrokenSurrogates(str) { - if (!SURRORATE_TEST_RE.test(str)) { return str; } - - return str.replace(SURRORATE_SEARCH_RE, replaceBadSurrogate); -} - -//////////////////////////////////////////////////////////////////////////////// - +var encode = require('mdurl/encode'); // Incoming link can be partially encoded. Convert possible combinations to // unified form. @@ -171,24 +142,7 @@ function fixBrokenSurrogates(str) { // - (?) punicode for domain mame (but encodeURI seems to work in real world) // function normalizeLink(url) { - var normalized = replaceEntities(url); - - // We don't care much about result of mailformed URIs, - // but shoud not throw exception. - try { - normalized = decodeURI(normalized); - } catch (__) {} - - // Encoder throws exception on broken surrogate pairs. - // Fix those first. - - try { - return encodeURI(fixBrokenSurrogates(normalized)); - } catch (__) { - // This should never happen and left for safety only. - /*istanbul ignore next*/ - return ''; - } + return encode(replaceEntities(url)); } //////////////////////////////////////////////////////////////////////////////// @@ -305,6 +259,3 @@ exports.isMdAsciiPunct = isMdAsciiPunct; exports.isPunctChar = isPunctChar; exports.escapeRE = escapeRE; exports.normalizeReference = normalizeReference; - -// for testing only -exports.fixBrokenSurrogates = fixBrokenSurrogates; diff --git a/package.json b/package.json index 5419613..44f1e38 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,7 @@ "dependencies": { "argparse": "~ 1.0.0", "linkify-it": "~ 0.1.1", + "mdurl": "~ 0.0.1", "uc.micro": "~ 0.1.0" }, "devDependencies": { diff --git a/test/fixtures/markdown-it/fatal.txt b/test/fixtures/markdown-it/fatal.txt index 1ce843b..dfeeb2e 100644 --- a/test/fixtures/markdown-it/fatal.txt +++ b/test/fixtures/markdown-it/fatal.txt @@ -10,7 +10,7 @@ Should not throw exception on broken utf-8 sequence in URL [mailformed URI] . [foo](%C3) . -

foo

+

foo

. @@ -38,4 +38,4 @@ Should not hang cdata regexp foo .

foo <![CDATA[ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ]>

-. \ No newline at end of file +. diff --git a/test/utils.js b/test/utils.js index c3e6f0a..528f30d 100644 --- a/test/utils.js +++ b/test/utils.js @@ -48,19 +48,6 @@ describe('Utils', function () { }); }); - it('fixBrokenSurrogates', function () { - var fixBrokenSurrogates = require('../lib/common/utils').fixBrokenSurrogates; - - // Bad - assert.strictEqual(fixBrokenSurrogates('\uD800foo'), '\uFFFDfoo'); - assert.strictEqual(fixBrokenSurrogates('foo\uD800'), 'foo\uFFFD'); - assert.strictEqual(fixBrokenSurrogates('\uDC00foo'), '\uFFFDfoo'); - assert.strictEqual(fixBrokenSurrogates('foo\uDC00'), 'foo\uFFFD'); - - // Good - assert.strictEqual(fixBrokenSurrogates('\uD800\uDC00'), '\uD800\uDC00'); - }); - it('normalizeLink', function () { var normalizeLink = require('../lib/common/utils').normalizeLink;