Browse Source

Add configurable url normalizers

- md.normalizeLink
 - md.normalizeLinkText
pull/82/head
Alex Kocharin 10 years ago
parent
commit
77e8b6cad0
  1. 18
      lib/common/utils.js
  2. 5
      lib/helpers/parse_link_destination.js
  3. 41
      lib/index.js
  4. 6
      lib/rules_block/reference.js
  5. 11
      lib/rules_core/linkify.js
  6. 12
      lib/rules_inline/autolink.js
  7. 8
      lib/rules_inline/image.js
  8. 8
      lib/rules_inline/link.js
  9. 2
      test/fixtures/markdown-it/commonmark_extras.txt
  10. 11
      test/utils.js

18
lib/common/utils.js

@ -140,23 +140,6 @@ function escapeHtml(str) {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
var encode = require('mdurl/encode');
// Incoming link can be partially encoded. Convert possible combinations to
// unified form.
//
// TODO: Rewrite it. Should use:
//
// - encodeURIComponent for query
// - encodeURI for path
// - (?) punicode for domain mame (but encodeURI seems to work in real world)
//
function normalizeLink(url) {
return encode(url);
}
////////////////////////////////////////////////////////////////////////////////
var REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g; var REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g;
function escapeRE (str) { function escapeRE (str) {
@ -272,7 +255,6 @@ exports.fromCodePoint = fromCodePoint;
exports.replaceEntities = replaceEntities; exports.replaceEntities = replaceEntities;
exports.escapeHtml = escapeHtml; exports.escapeHtml = escapeHtml;
exports.arrayReplaceAt = arrayReplaceAt; exports.arrayReplaceAt = arrayReplaceAt;
exports.normalizeLink = normalizeLink;
exports.isWhiteSpace = isWhiteSpace; exports.isWhiteSpace = isWhiteSpace;
exports.isMdAsciiPunct = isMdAsciiPunct; exports.isMdAsciiPunct = isMdAsciiPunct;
exports.isPunctChar = isPunctChar; exports.isPunctChar = isPunctChar;

5
lib/helpers/parse_link_destination.js

@ -3,7 +3,6 @@
'use strict'; 'use strict';
var normalizeLink = require('../common/utils').normalizeLink;
var unescapeAll = require('../common/utils').unescapeAll; var unescapeAll = require('../common/utils').unescapeAll;
@ -25,7 +24,7 @@ module.exports = function parseLinkDestination(str, pos, max) {
if (code === 0x0A /* \n */) { return result; } if (code === 0x0A /* \n */) { return result; }
if (code === 0x3E /* > */) { if (code === 0x3E /* > */) {
result.pos = pos + 1; result.pos = pos + 1;
result.str = normalizeLink(unescapeAll(str.slice(start + 1, pos))); result.str = unescapeAll(str.slice(start + 1, pos));
result.ok = true; result.ok = true;
return result; return result;
} }
@ -72,7 +71,7 @@ module.exports = function parseLinkDestination(str, pos, max) {
if (start === pos) { return result; } if (start === pos) { return result; }
result.str = normalizeLink(unescapeAll(str.slice(start, pos))); result.str = unescapeAll(str.slice(start, pos));
result.lines = lines; result.lines = lines;
result.pos = pos; result.pos = pos;
result.ok = true; result.ok = true;

41
lib/index.js

@ -10,6 +10,8 @@ var ParserCore = require('./parser_core');
var ParserBlock = require('./parser_block'); var ParserBlock = require('./parser_block');
var ParserInline = require('./parser_inline'); var ParserInline = require('./parser_inline');
var LinkifyIt = require('linkify-it'); var LinkifyIt = require('linkify-it');
var mdurl = require('mdurl');
var punycode = require('punycode');
var config = { var config = {
@ -34,6 +36,30 @@ function validateLink(url) {
return true; return true;
} }
function normalizeLink(url) {
var parsed = mdurl.parse(url, true);
if (parsed.hostname) {
try {
parsed.hostname = punycode.toAscii(parsed.hostname);
} catch(er) {}
}
return mdurl.encode(mdurl.format(parsed));
}
function normalizeLinkText(url) {
var parsed = mdurl.parse(url, true);
if (parsed.hostname) {
try {
parsed.hostname = punycode.toUnicode(parsed.hostname);
} catch(er) {}
}
return mdurl.decode(mdurl.format(parsed));
}
/** /**
* class MarkdownIt * class MarkdownIt
@ -234,6 +260,21 @@ function MarkdownIt(presetName, options) {
**/ **/
this.validateLink = validateLink; this.validateLink = validateLink;
/**
* MarkdownIt#normalizeLink(url) -> String
*
* Function used to encode link url to a machine-readable format,
* which includes url-encoding, punycode, etc.
*/
this.normalizeLink = normalizeLink;
/**
* MarkdownIt#normalizeLinkText(url) -> String
*
* Function used to decode link url to a human-readable format`
*/
this.normalizeLinkText = normalizeLinkText;
// Expose utils & helpers for easy acces from plugins // Expose utils & helpers for easy acces from plugins

6
lib/rules_block/reference.js

@ -100,8 +100,10 @@ module.exports = function reference(state, startLine, _endLine, silent) {
// ^^^^^^^^^^^ parse this // ^^^^^^^^^^^ parse this
res = parseLinkDestination(str, pos, max); res = parseLinkDestination(str, pos, max);
if (!res.ok) { return false; } if (!res.ok) { return false; }
if (!state.md.validateLink(res.str)) { return false; }
href = res.str; href = state.md.normalizeLink(res.str);
if (!state.md.validateLink(href)) { return false; }
pos = res.pos; pos = res.pos;
lines += res.lines; lines += res.lines;

11
lib/rules_core/linkify.js

@ -6,7 +6,6 @@
var arrayReplaceAt = require('../common/utils').arrayReplaceAt; var arrayReplaceAt = require('../common/utils').arrayReplaceAt;
var normalizeLink = require('../common/utils').normalizeLink;
function isLinkOpen(str) { function isLinkOpen(str) {
@ -18,7 +17,7 @@ function isLinkClose(str) {
module.exports = function linkify(state) { module.exports = function linkify(state) {
var i, j, l, tokens, token, currentToken, nodes, ln, text, pos, lastPos, level, htmlLinkLevel, var i, j, l, tokens, token, currentToken, nodes, ln, text, pos, lastPos, level, htmlLinkLevel, url, fullUrl,
blockTokens = state.tokens, blockTokens = state.tokens,
links; links;
@ -71,7 +70,9 @@ module.exports = function linkify(state) {
for (ln = 0; ln < links.length; ln++) { for (ln = 0; ln < links.length; ln++) {
if (!state.md.validateLink(links[ln].url)) { continue; } url = links[ln].url;
fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) { continue; }
pos = links[ln].index; pos = links[ln].index;
@ -83,12 +84,12 @@ module.exports = function linkify(state) {
} }
token = new state.Token('link_open', 'a', 1); token = new state.Token('link_open', 'a', 1);
token.attrs = [ [ 'href', normalizeLink(links[ln].url) ] ]; token.attrs = [ [ 'href', fullUrl ] ];
token.level = level++; token.level = level++;
nodes.push(token); nodes.push(token);
token = new state.Token('text', '', 0); token = new state.Token('text', '', 0);
token.content = links[ln].text; token.content = state.md.normalizeLinkText(links[ln].text);
token.level = level; token.level = level;
nodes.push(token); nodes.push(token);

12
lib/rules_inline/autolink.js

@ -3,7 +3,6 @@
'use strict'; 'use strict';
var url_schemas = require('../common/url_schemas'); var url_schemas = require('../common/url_schemas');
var normalizeLink = require('../common/utils').normalizeLink;
/*eslint max-len:0*/ /*eslint max-len:0*/
@ -27,15 +26,15 @@ module.exports = function autolink(state, silent) {
if (url_schemas.indexOf(linkMatch[1].toLowerCase()) < 0) { return false; } if (url_schemas.indexOf(linkMatch[1].toLowerCase()) < 0) { return false; }
url = linkMatch[0].slice(1, -1); url = linkMatch[0].slice(1, -1);
fullUrl = normalizeLink(url); fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(url)) { return false; } if (!state.md.validateLink(fullUrl)) { return false; }
if (!silent) { if (!silent) {
token = state.push('link_open', 'a', 1); token = state.push('link_open', 'a', 1);
token.attrs = [ [ 'href', fullUrl ] ]; token.attrs = [ [ 'href', fullUrl ] ];
token = state.push('text', '', 0); token = state.push('text', '', 0);
token.content = url; token.content = state.md.normalizeLinkText(url);
token = state.push('link_close', 'a', -1); token = state.push('link_close', 'a', -1);
} }
@ -48,8 +47,7 @@ module.exports = function autolink(state, silent) {
emailMatch = tail.match(EMAIL_RE); emailMatch = tail.match(EMAIL_RE);
url = emailMatch[0].slice(1, -1); url = emailMatch[0].slice(1, -1);
fullUrl = state.md.normalizeLink('mailto:' + url);
fullUrl = normalizeLink('mailto:' + url);
if (!state.md.validateLink(fullUrl)) { return false; } if (!state.md.validateLink(fullUrl)) { return false; }
if (!silent) { if (!silent) {
@ -57,7 +55,7 @@ module.exports = function autolink(state, silent) {
token.attrs = [ [ 'href', fullUrl ] ]; token.attrs = [ [ 'href', fullUrl ] ];
token = state.push('text', '', 0); token = state.push('text', '', 0);
token.content = url; token.content = state.md.normalizeLinkText(url);
token = state.push('link_close', 'a', -1); token = state.push('link_close', 'a', -1);
} }

8
lib/rules_inline/image.js

@ -11,7 +11,6 @@ var normalizeReference = require('../common/utils').normalizeReference;
module.exports = function image(state, silent) { module.exports = function image(state, silent) {
var attrs, var attrs,
code, code,
href,
label, label,
labelEnd, labelEnd,
labelStart, labelStart,
@ -22,6 +21,7 @@ module.exports = function image(state, silent) {
token, token,
tokens, tokens,
start, start,
href = '',
oldPos = state.pos, oldPos = state.pos,
max = state.posMax; max = state.posMax;
@ -53,12 +53,14 @@ module.exports = function image(state, silent) {
// ^^^^^^ parsing link destination // ^^^^^^ parsing link destination
start = pos; start = pos;
res = parseLinkDestination(state.src, pos, state.posMax); res = parseLinkDestination(state.src, pos, state.posMax);
if (res.ok && state.md.validateLink(res.str)) { if (res.ok) {
href = res.str; href = state.md.normalizeLink(res.str);
if (state.md.validateLink(href)) {
pos = res.pos; pos = res.pos;
} else { } else {
href = ''; href = '';
} }
}
// [link]( <href> "title" ) // [link]( <href> "title" )
// ^^ skipping these spaces // ^^ skipping these spaces

8
lib/rules_inline/link.js

@ -11,7 +11,6 @@ var normalizeReference = require('../common/utils').normalizeReference;
module.exports = function link(state, silent) { module.exports = function link(state, silent) {
var attrs, var attrs,
code, code,
href,
label, label,
labelEnd, labelEnd,
labelStart, labelStart,
@ -20,6 +19,7 @@ module.exports = function link(state, silent) {
ref, ref,
title, title,
token, token,
href = '',
oldPos = state.pos, oldPos = state.pos,
max = state.posMax, max = state.posMax,
start = state.pos; start = state.pos;
@ -51,12 +51,14 @@ module.exports = function link(state, silent) {
// ^^^^^^ parsing link destination // ^^^^^^ parsing link destination
start = pos; start = pos;
res = parseLinkDestination(state.src, pos, state.posMax); res = parseLinkDestination(state.src, pos, state.posMax);
if (res.ok && state.md.validateLink(res.str)) { if (res.ok) {
href = res.str; href = state.md.normalizeLink(res.str);
if (state.md.validateLink(href)) {
pos = res.pos; pos = res.pos;
} else { } else {
href = ''; href = '';
} }
}
// [link]( <href> "title" ) // [link]( <href> "title" )
// ^^ skipping these spaces // ^^ skipping these spaces

2
test/fixtures/markdown-it/commonmark_extras.txt

@ -40,7 +40,7 @@ Normalize link destination, but not text inside it:
. .
<http://example.com/α%CE%B2γ%CE%B4> <http://example.com/α%CE%B2γ%CE%B4>
. .
<p><a href="http://example.com/%CE%B1%CE%B2%CE%B3%CE%B4">http://example.com/α%CE%B2γ%CE%B4</a></p> <p><a href="http://example.com/%CE%B1%CE%B2%CE%B3%CE%B4">http://example.com/αβγδ</a></p>
. .

11
test/utils.js

@ -48,17 +48,6 @@ describe('Utils', function () {
}); });
}); });
it('normalizeLink', function () {
var normalizeLink = require('../lib/common/utils').normalizeLink;
// broken surrogates sequence (encodeURI should not throw)
assert.strictEqual(normalizeLink('/\uD800foo'), '/%EF%BF%BDfoo');
assert.strictEqual(normalizeLink('/\uD900foo'), '/%EF%BF%BDfoo');
// broken utf-8 encoding (catch decodeURI exception)
assert.strictEqual(normalizeLink('\u0025test'), '%25test');
});
it('escapeRE', function () { it('escapeRE', function () {
var escapeRE = require('../lib/common/utils').escapeRE; var escapeRE = require('../lib/common/utils').escapeRE;

Loading…
Cancel
Save