From b05bfd6e8b379025359eff8f18987d1c262ecda9 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Tue, 10 Mar 2015 18:23:37 +0300 Subject: [PATCH] Punycode domain names only in known schemas In urls like `skype:XXX` it prevents XXX to be parsed as domain name and encoded with punycode. --- lib/index.js | 32 +++++++++++++++++++------ test/fixtures/markdown-it/normalize.txt | 16 +++++++++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/lib/index.js b/lib/index.js index 6a0310e..7e1d237 100644 --- a/lib/index.js +++ b/lib/index.js @@ -21,7 +21,7 @@ var config = { }; -var BAD_PROTOCOLS = [ 'vbscript', 'javascript', 'file' ]; +var BAD_PROTOCOLS = [ 'vbscript', 'javascript', 'file' ]; function validateLink(url) { // url should be normalized at this point, and existing entities are decoded @@ -34,13 +34,23 @@ function validateLink(url) { return true; } +var RECODE_HOSTNAME_FOR = [ 'http:', 'https:', 'mailto:' ]; + function normalizeLink(url) { var parsed = mdurl.parse(url, true); if (parsed.hostname) { - try { - parsed.hostname = punycode.toASCII(parsed.hostname); - } catch(er) {} + // Encode hostnames in urls like: + // `http://host/`, `https://host/`, `mailto:user@host`, `//host/` + // + // We don't encode unknown schemas, because it's likely that we encode + // something we shouldn't (e.g. `skype:name` treated as `skype:host`) + // + if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) { + try { + parsed.hostname = punycode.toASCII(parsed.hostname); + } catch(er) {} + } } return mdurl.encode(mdurl.format(parsed)); @@ -50,9 +60,17 @@ function normalizeLinkText(url) { var parsed = mdurl.parse(url, true); if (parsed.hostname) { - try { - parsed.hostname = punycode.toUnicode(parsed.hostname); - } catch(er) {} + // Encode hostnames in urls like: + // `http://host/`, `https://host/`, `mailto:user@host`, `//host/` + // + // We don't encode unknown schemas, because it's likely that we encode + // something we shouldn't (e.g. `skype:name` treated as `skype:host`) + // + if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) { + try { + parsed.hostname = punycode.toUnicode(parsed.hostname); + } catch(er) {} + } } return mdurl.decode(mdurl.format(parsed)); diff --git a/test/fixtures/markdown-it/normalize.txt b/test/fixtures/markdown-it/normalize.txt index 71e1ad8..c59165d 100644 --- a/test/fixtures/markdown-it/normalize.txt +++ b/test/fixtures/markdown-it/normalize.txt @@ -43,6 +43,22 @@ Invalid punycode (non-ascii):

http://xn–γ.com/

. +Two slashes should start a domain: + +. +[](//☃.net/) +. +

+. + +Don't encode domains in unknown schemas: + +. +[](skype:γγγ) +. +

+. + Should auto-add protocol to autolinks: .