Browse Source

Add configurable url normalizers

- md.normalizeLink
 - md.normalizeLinkText
pull/82/head
Alex Kocharin 9 years ago
parent
commit
77e8b6cad0
  1. 18
      lib/common/utils.js
  2. 5
      lib/helpers/parse_link_destination.js
  3. 41
      lib/index.js
  4. 6
      lib/rules_block/reference.js
  5. 11
      lib/rules_core/linkify.js
  6. 14
      lib/rules_inline/autolink.js
  7. 14
      lib/rules_inline/image.js
  8. 14
      lib/rules_inline/link.js
  9. 2
      test/fixtures/markdown-it/commonmark_extras.txt
  10. 11
      test/utils.js

18
lib/common/utils.js

@ -140,23 +140,6 @@ function escapeHtml(str) {
////////////////////////////////////////////////////////////////////////////////
var encode = require('mdurl/encode');
// Incoming link can be partially encoded. Convert possible combinations to
// unified form.
//
// TODO: Rewrite it. Should use:
//
// - encodeURIComponent for query
// - encodeURI for path
// - (?) punicode for domain mame (but encodeURI seems to work in real world)
//
function normalizeLink(url) {
return encode(url);
}
////////////////////////////////////////////////////////////////////////////////
var REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g;
function escapeRE (str) {
@ -272,7 +255,6 @@ exports.fromCodePoint = fromCodePoint;
exports.replaceEntities = replaceEntities;
exports.escapeHtml = escapeHtml;
exports.arrayReplaceAt = arrayReplaceAt;
exports.normalizeLink = normalizeLink;
exports.isWhiteSpace = isWhiteSpace;
exports.isMdAsciiPunct = isMdAsciiPunct;
exports.isPunctChar = isPunctChar;

5
lib/helpers/parse_link_destination.js

@ -3,7 +3,6 @@
'use strict';
var normalizeLink = require('../common/utils').normalizeLink;
var unescapeAll = require('../common/utils').unescapeAll;
@ -25,7 +24,7 @@ module.exports = function parseLinkDestination(str, pos, max) {
if (code === 0x0A /* \n */) { return result; }
if (code === 0x3E /* > */) {
result.pos = pos + 1;
result.str = normalizeLink(unescapeAll(str.slice(start + 1, pos)));
result.str = unescapeAll(str.slice(start + 1, pos));
result.ok = true;
return result;
}
@ -72,7 +71,7 @@ module.exports = function parseLinkDestination(str, pos, max) {
if (start === pos) { return result; }
result.str = normalizeLink(unescapeAll(str.slice(start, pos)));
result.str = unescapeAll(str.slice(start, pos));
result.lines = lines;
result.pos = pos;
result.ok = true;

41
lib/index.js

@ -10,6 +10,8 @@ var ParserCore = require('./parser_core');
var ParserBlock = require('./parser_block');
var ParserInline = require('./parser_inline');
var LinkifyIt = require('linkify-it');
var mdurl = require('mdurl');
var punycode = require('punycode');
var config = {
@ -34,6 +36,30 @@ function validateLink(url) {
return true;
}
function normalizeLink(url) {
var parsed = mdurl.parse(url, true);
if (parsed.hostname) {
try {
parsed.hostname = punycode.toAscii(parsed.hostname);
} catch(er) {}
}
return mdurl.encode(mdurl.format(parsed));
}
function normalizeLinkText(url) {
var parsed = mdurl.parse(url, true);
if (parsed.hostname) {
try {
parsed.hostname = punycode.toUnicode(parsed.hostname);
} catch(er) {}
}
return mdurl.decode(mdurl.format(parsed));
}
/**
* class MarkdownIt
@ -234,6 +260,21 @@ function MarkdownIt(presetName, options) {
**/
this.validateLink = validateLink;
/**
* MarkdownIt#normalizeLink(url) -> String
*
* Function used to encode link url to a machine-readable format,
* which includes url-encoding, punycode, etc.
*/
this.normalizeLink = normalizeLink;
/**
* MarkdownIt#normalizeLinkText(url) -> String
*
* Function used to decode link url to a human-readable format`
*/
this.normalizeLinkText = normalizeLinkText;
// Expose utils & helpers for easy acces from plugins

6
lib/rules_block/reference.js

@ -100,8 +100,10 @@ module.exports = function reference(state, startLine, _endLine, silent) {
// ^^^^^^^^^^^ parse this
res = parseLinkDestination(str, pos, max);
if (!res.ok) { return false; }
if (!state.md.validateLink(res.str)) { return false; }
href = res.str;
href = state.md.normalizeLink(res.str);
if (!state.md.validateLink(href)) { return false; }
pos = res.pos;
lines += res.lines;

11
lib/rules_core/linkify.js

@ -6,7 +6,6 @@
var arrayReplaceAt = require('../common/utils').arrayReplaceAt;
var normalizeLink = require('../common/utils').normalizeLink;
function isLinkOpen(str) {
@ -18,7 +17,7 @@ function isLinkClose(str) {
module.exports = function linkify(state) {
var i, j, l, tokens, token, currentToken, nodes, ln, text, pos, lastPos, level, htmlLinkLevel,
var i, j, l, tokens, token, currentToken, nodes, ln, text, pos, lastPos, level, htmlLinkLevel, url, fullUrl,
blockTokens = state.tokens,
links;
@ -71,7 +70,9 @@ module.exports = function linkify(state) {
for (ln = 0; ln < links.length; ln++) {
if (!state.md.validateLink(links[ln].url)) { continue; }
url = links[ln].url;
fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) { continue; }
pos = links[ln].index;
@ -83,12 +84,12 @@ module.exports = function linkify(state) {
}
token = new state.Token('link_open', 'a', 1);
token.attrs = [ [ 'href', normalizeLink(links[ln].url) ] ];
token.attrs = [ [ 'href', fullUrl ] ];
token.level = level++;
nodes.push(token);
token = new state.Token('text', '', 0);
token.content = links[ln].text;
token.content = state.md.normalizeLinkText(links[ln].text);
token.level = level;
nodes.push(token);

14
lib/rules_inline/autolink.js

@ -2,8 +2,7 @@
'use strict';
var url_schemas = require('../common/url_schemas');
var normalizeLink = require('../common/utils').normalizeLink;
var url_schemas = require('../common/url_schemas');
/*eslint max-len:0*/
@ -27,15 +26,15 @@ module.exports = function autolink(state, silent) {
if (url_schemas.indexOf(linkMatch[1].toLowerCase()) < 0) { return false; }
url = linkMatch[0].slice(1, -1);
fullUrl = normalizeLink(url);
if (!state.md.validateLink(url)) { return false; }
fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) { return false; }
if (!silent) {
token = state.push('link_open', 'a', 1);
token.attrs = [ [ 'href', fullUrl ] ];
token = state.push('text', '', 0);
token.content = url;
token.content = state.md.normalizeLinkText(url);
token = state.push('link_close', 'a', -1);
}
@ -48,8 +47,7 @@ module.exports = function autolink(state, silent) {
emailMatch = tail.match(EMAIL_RE);
url = emailMatch[0].slice(1, -1);
fullUrl = normalizeLink('mailto:' + url);
fullUrl = state.md.normalizeLink('mailto:' + url);
if (!state.md.validateLink(fullUrl)) { return false; }
if (!silent) {
@ -57,7 +55,7 @@ module.exports = function autolink(state, silent) {
token.attrs = [ [ 'href', fullUrl ] ];
token = state.push('text', '', 0);
token.content = url;
token.content = state.md.normalizeLinkText(url);
token = state.push('link_close', 'a', -1);
}

14
lib/rules_inline/image.js

@ -11,7 +11,6 @@ var normalizeReference = require('../common/utils').normalizeReference;
module.exports = function image(state, silent) {
var attrs,
code,
href,
label,
labelEnd,
labelStart,
@ -22,6 +21,7 @@ module.exports = function image(state, silent) {
token,
tokens,
start,
href = '',
oldPos = state.pos,
max = state.posMax;
@ -53,11 +53,13 @@ module.exports = function image(state, silent) {
// ^^^^^^ parsing link destination
start = pos;
res = parseLinkDestination(state.src, pos, state.posMax);
if (res.ok && state.md.validateLink(res.str)) {
href = res.str;
pos = res.pos;
} else {
href = '';
if (res.ok) {
href = state.md.normalizeLink(res.str);
if (state.md.validateLink(href)) {
pos = res.pos;
} else {
href = '';
}
}
// [link]( <href> "title" )

14
lib/rules_inline/link.js

@ -11,7 +11,6 @@ var normalizeReference = require('../common/utils').normalizeReference;
module.exports = function link(state, silent) {
var attrs,
code,
href,
label,
labelEnd,
labelStart,
@ -20,6 +19,7 @@ module.exports = function link(state, silent) {
ref,
title,
token,
href = '',
oldPos = state.pos,
max = state.posMax,
start = state.pos;
@ -51,11 +51,13 @@ module.exports = function link(state, silent) {
// ^^^^^^ parsing link destination
start = pos;
res = parseLinkDestination(state.src, pos, state.posMax);
if (res.ok && state.md.validateLink(res.str)) {
href = res.str;
pos = res.pos;
} else {
href = '';
if (res.ok) {
href = state.md.normalizeLink(res.str);
if (state.md.validateLink(href)) {
pos = res.pos;
} else {
href = '';
}
}
// [link]( <href> "title" )

2
test/fixtures/markdown-it/commonmark_extras.txt

@ -40,7 +40,7 @@ Normalize link destination, but not text inside it:
.
<http://example.com/α%CE%B2γ%CE%B4>
.
<p><a href="http://example.com/%CE%B1%CE%B2%CE%B3%CE%B4">http://example.com/α%CE%B2γ%CE%B4</a></p>
<p><a href="http://example.com/%CE%B1%CE%B2%CE%B3%CE%B4">http://example.com/αβγδ</a></p>
.

11
test/utils.js

@ -48,17 +48,6 @@ describe('Utils', function () {
});
});
it('normalizeLink', function () {
var normalizeLink = require('../lib/common/utils').normalizeLink;
// broken surrogates sequence (encodeURI should not throw)
assert.strictEqual(normalizeLink('/\uD800foo'), '/%EF%BF%BDfoo');
assert.strictEqual(normalizeLink('/\uD900foo'), '/%EF%BF%BDfoo');
// broken utf-8 encoding (catch decodeURI exception)
assert.strictEqual(normalizeLink('\u0025test'), '%25test');
});
it('escapeRE', function () {
var escapeRE = require('../lib/common/utils').escapeRE;

Loading…
Cancel
Save