Browse Source

Add linkifier rule to inline chain for full links

prevents emphasis from appearing in `http://example.org/foo._bar_.baz`
pull/864/head
Alex Kocharin 2 years ago
parent
commit
6b58ec4245
  1. 2
      CHANGELOG.md
  2. 1
      lib/parser_inline.js
  3. 11
      lib/rules_core/linkify.js
  4. 11
      lib/rules_inline/html_inline.js
  5. 2
      lib/rules_inline/link.js
  6. 58
      lib/rules_inline/linkify.js
  7. 4
      lib/rules_inline/state_inline.js
  8. 2
      package.json
  9. 112
      test/fixtures/markdown-it/linkify.txt

2
CHANGELOG.md

@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Smartquotes, typographic replacements and plain text links can now be escaped
with backslash (e.g. `\(c)` or `google\.com` are no longer replaced).
- Fixed collision of emphasis and linkifier (so `http://example.org/foo._bar_-_baz`
is now a single link, not emphasized). Emails and fuzzy links are not affected by this.
## [12.3.2] - 2022-01-08

1
lib/parser_inline.js

@ -14,6 +14,7 @@ var Ruler = require('./ruler');
var _rules = [
[ 'text', require('./rules_inline/text') ],
[ 'linkify', require('./rules_inline/linkify') ],
[ 'newline', require('./rules_inline/newline') ],
[ 'escape', require('./rules_inline/escape') ],
[ 'backticks', require('./rules_inline/backticks') ],

11
lib/rules_core/linkify.js

@ -69,8 +69,17 @@ module.exports = function linkify(state) {
level = currentToken.level;
lastPos = 0;
for (ln = 0; ln < links.length; ln++) {
// forbid escape sequence at the start of the string,
// this avoids http\://example.com/ from being linkified as
// http:<a href="//example.com/">//example.com/</a>
if (links.length > 0 &&
links[0].index === 0 &&
i > 0 &&
tokens[i - 1].type === 'text_special') {
links = links.slice(1);
}
for (ln = 0; ln < links.length; ln++) {
url = links[ln].url;
fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) { continue; }

11
lib/rules_inline/html_inline.js

@ -6,6 +6,14 @@
var HTML_TAG_RE = require('../common/html_re').HTML_TAG_RE;
function isLinkOpen(str) {
return /^<a[>\s]/i.test(str);
}
function isLinkClose(str) {
return /^<\/a\s*>/i.test(str);
}
function isLetter(ch) {
/*eslint no-bitwise:0*/
var lc = ch | 0x20; // to lower case
@ -41,6 +49,9 @@ module.exports = function html_inline(state, silent) {
if (!silent) {
token = state.push('html_inline', '', 0);
token.content = state.src.slice(pos, pos + match[0].length);
if (isLinkOpen(token.content)) state.linkLevel++;
if (isLinkClose(token.content)) state.linkLevel--;
}
state.pos += match[0].length;
return true;

2
lib/rules_inline/link.js

@ -137,7 +137,9 @@ module.exports = function link(state, silent) {
attrs.push([ 'title', title ]);
}
state.linkLevel++;
state.md.inline.tokenize(state);
state.linkLevel--;
token = state.push('link_close', 'a', -1);
}

58
lib/rules_inline/linkify.js

@ -0,0 +1,58 @@
// Process links like https://example.org/
'use strict';
// RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
var SCHEME_RE = /(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$/i;
module.exports = function linkify(state, silent) {
var pos, max, match, proto, link, url, fullUrl, token;
if (!state.md.options.linkify) return false;
if (state.linkLevel > 0) return false;
pos = state.pos;
max = state.posMax;
if (pos + 3 > max) return false;
if (state.src.charCodeAt(pos) !== 0x3A/* : */) return false;
if (state.src.charCodeAt(pos + 1) !== 0x2F/* / */) return false;
if (state.src.charCodeAt(pos + 2) !== 0x2F/* / */) return false;
match = state.pending.match(SCHEME_RE);
if (!match) return false;
proto = match[1];
link = state.md.linkify.matchAtStart(state.src.slice(pos - proto.length));
if (!link) return false;
url = link.url;
// disallow '*' at the end of the link (conflicts with emphasis)
url = url.replace(/\*+$/, '');
fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) return false;
if (!silent) {
state.pending = state.pending.slice(0, -proto.length);
token = state.push('link_open', 'a', 1);
token.attrs = [ [ 'href', fullUrl ] ];
token.markup = 'linkify';
token.info = 'auto';
token = state.push('text', '', 0);
token.content = state.md.normalizeLinkText(url);
token = state.push('link_close', 'a', -1);
token.markup = 'linkify';
token.info = 'auto';
}
state.pos += url.length - proto.length;
return true;
};

4
lib/rules_inline/state_inline.js

@ -35,6 +35,10 @@ function StateInline(src, md, env, outTokens) {
// backtick length => last seen position
this.backticks = {};
this.backticksScanned = false;
// Counter used to disable inline linkify-it execution
// inside <a> and markdown links
this.linkLevel = 0;
}

2
package.json

@ -39,7 +39,7 @@
"dependencies": {
"argparse": "^2.0.1",
"entities": "~3.0.1",
"linkify-it": "^3.0.1",
"linkify-it": "markdown-it/linkify-it",
"mdurl": "^1.0.1",
"uc.micro": "^1.0.5"
},

112
test/fixtures/markdown-it/linkify.txt

@ -30,6 +30,86 @@ don't touch text in html <a> tags
.
entities inside raw links
.
https://example.com/foo&amp;bar
.
<p><a href="https://example.com/foo&amp;amp;bar">https://example.com/foo&amp;amp;bar</a></p>
.
emphasis inside raw links (asterisk, can happen in links with params)
.
https://example.com/foo*bar*baz
.
<p><a href="https://example.com/foo*bar*baz">https://example.com/foo*bar*baz</a></p>
.
emphasis inside raw links (underscore)
.
http://example.org/foo._bar_-_baz
.
<p><a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a></p>
.
backticks inside raw links
.
https://example.com/foo`bar`baz
.
<p><a href="https://example.com/foo%60bar%60baz">https://example.com/foo`bar`baz</a></p>
.
links inside raw links
.
https://example.com/foo[123](456)bar
.
<p><a href="https://example.com/foo%5B123%5D(456)bar">https://example.com/foo[123](456)bar</a></p>
.
escapes not allowed at the start
.
\https://example.com
.
<p>\https://example.com</p>
.
escapes not allowed at comma
.
https\://example.com
.
<p>https://example.com</p>
.
escapes not allowed at slashes
.
https:\//aa.org https://bb.org
.
<p>https://aa.org <a href="https://bb.org">https://bb.org</a></p>
.
fuzzy link shouldn't match cc.org
.
https:/\/cc.org
.
<p>https://cc.org</p>
.
bold links (exclude markup of pairs from link tail)
.
**http://example.com/foobar**
.
<p><strong><a href="http://example.com/foobar">http://example.com/foobar</a></strong></p>
.
match links without protocol
.
www.example.org
@ -55,3 +135,35 @@ http://example.com/(c)
.
<p><a href="http://example.com/(c)">http://example.com/(c)</a></p>
.
coverage, prefix not valid
.
http:/example.com/
.
<p>http:/example.com/</p>
.
coverage, negative link level
.
</a>[https://example.com](https://example.com)
.
<p></a><a href="https://example.com"><a href="https://example.com">https://example.com</a></a></p>
.
emphasis with '*', real link:
.
http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B
.
<p><a href="http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B">http://cdecl.ridiculousfish.com/?q=int+(*f)+(float+*)%3B</a></p>
.
emphasis with '_', real link:
.
https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf
.
<p><a href="https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf">https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf</a></p>
.

Loading…
Cancel
Save