Markdown parser, done right. 100% CommonMark support, extensions, syntax plugins & high speed
https://markdown-it.github.io/
551 lines
16 KiB
551 lines
16 KiB
// Main perser class
|
|
|
|
'use strict';
|
|
|
|
|
|
var utils = require('./common/utils');
|
|
var helpers = require('./helpers');
|
|
var Renderer = require('./renderer');
|
|
var ParserCore = require('./parser_core');
|
|
var ParserBlock = require('./parser_block');
|
|
var ParserInline = require('./parser_inline');
|
|
var LinkifyIt = require('linkify-it');
|
|
var mdurl = require('mdurl');
|
|
var punycode = require('punycode');
|
|
|
|
|
|
var config = {
|
|
'default': require('./presets/default'),
|
|
zero: require('./presets/zero'),
|
|
commonmark: require('./presets/commonmark')
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// This validator can prohibit more than really needed to prevent XSS. It's a
|
|
// tradeoff to keep code simple and to be secure by default.
|
|
//
|
|
// If you need different setup - override validator method as you wish. Or
|
|
// replace it with dummy function and use external sanitizer.
|
|
//
|
|
|
|
var BAD_PROTO_RE = /^(vbscript|javascript|file|data):/;
|
|
var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/;
|
|
|
|
function validateLink(url) {
|
|
// url should be normalized at this point, and existing entities are decoded
|
|
var str = url.trim().toLowerCase();
|
|
|
|
return BAD_PROTO_RE.test(str) ? (GOOD_DATA_RE.test(str) ? true : false) : true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
var RECODE_HOSTNAME_FOR = [ 'http:', 'https:', 'mailto:' ];
|
|
|
|
function normalizeLink(url) {
|
|
var parsed = mdurl.parse(url, true);
|
|
|
|
if (parsed.hostname) {
|
|
// Encode hostnames in urls like:
|
|
// `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
|
//
|
|
// We don't encode unknown schemas, because it's likely that we encode
|
|
// something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
|
//
|
|
if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
|
|
try {
|
|
parsed.hostname = punycode.toASCII(parsed.hostname);
|
|
} catch(er) {}
|
|
}
|
|
}
|
|
|
|
return mdurl.encode(mdurl.format(parsed));
|
|
}
|
|
|
|
function normalizeLinkText(url) {
|
|
var parsed = mdurl.parse(url, true);
|
|
|
|
if (parsed.hostname) {
|
|
// Encode hostnames in urls like:
|
|
// `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
|
//
|
|
// We don't encode unknown schemas, because it's likely that we encode
|
|
// something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
|
//
|
|
if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
|
|
try {
|
|
parsed.hostname = punycode.toUnicode(parsed.hostname);
|
|
} catch(er) {}
|
|
}
|
|
}
|
|
|
|
return mdurl.decode(mdurl.format(parsed));
|
|
}
|
|
|
|
|
|
/**
|
|
* class MarkdownIt
|
|
*
|
|
* Main parser/renderer class.
|
|
*
|
|
* ##### Usage
|
|
*
|
|
* ```javascript
|
|
* // node.js, "classic" way:
|
|
* var MarkdownIt = require('markdown-it'),
|
|
* md = new MarkdownIt();
|
|
* var result = md.render('# markdown-it rulezz!');
|
|
*
|
|
* // node.js, the same, but with sugar:
|
|
* var md = require('markdown-it')();
|
|
* var result = md.render('# markdown-it rulezz!');
|
|
*
|
|
* // browser without AMD, added to "window" on script load
|
|
* // Note, there are no dash.
|
|
* var md = window.markdownit();
|
|
* var result = md.render('# markdown-it rulezz!');
|
|
* ```
|
|
*
|
|
* Single line rendering, without paragraph wrap:
|
|
*
|
|
* ```javascript
|
|
* var md = require('markdown-it')();
|
|
* var result = md.renderInline('__markdown-it__ rulezz!');
|
|
* ```
|
|
**/
|
|
|
|
/**
|
|
* new MarkdownIt([presetName, options])
|
|
* - presetName (String): optional, `commonmark` / `zero`
|
|
* - options (Object)
|
|
*
|
|
* Creates parser instanse with given config. Can be called without `new`.
|
|
*
|
|
* ##### presetName
|
|
*
|
|
* MarkdownIt provides named presets as a convenience to quickly
|
|
* enable/disable active syntax rules and options for common use cases.
|
|
*
|
|
* - ["commonmark"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/commonmark.js) -
|
|
* configures parser to strict [CommonMark](http://commonmark.org/) mode.
|
|
* - [default](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/default.js) -
|
|
* similar to GFM, used when no preset name given. Enables all available rules,
|
|
* but still without html, typographer & autolinker.
|
|
* - ["zero"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/zero.js) -
|
|
* all rules disabled. Useful to quickly setup your config via `.enable()`.
|
|
* For example, when you need only `bold` and `italic` markup and nothing else.
|
|
*
|
|
* ##### options:
|
|
*
|
|
* - __html__ - `false`. Set `true` to enable HTML tags in source. Be careful!
|
|
* That's not safe! You may need external sanitizer to protect output from XSS.
|
|
* It's better to extend features via plugins, instead of enabling HTML.
|
|
* - __xhtmlOut__ - `false`. Set `true` to add '/' when closing single tags
|
|
* (`<br />`). This is needed only for full CommonMark compatibility. In real
|
|
* world you will need HTML output.
|
|
* - __breaks__ - `false`. Set `true` to convert `\n` in paragraphs into `<br>`.
|
|
* - __langPrefix__ - `language-`. CSS language class prefix for fenced blocks.
|
|
* Can be useful for external highlighters.
|
|
* - __linkify__ - `false`. Set `true` to autoconvert URL-like text to links.
|
|
* - __typographer__ - `false`. Set `true` to enable [some language-neutral
|
|
* replacement](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/replacements.js) +
|
|
* quotes beautification (smartquotes).
|
|
* - __quotes__ - `“”‘’`, String or Array. Double + single quotes replacement
|
|
* pairs, when typographer enabled and smartquotes on. For example, you can
|
|
* use `'«»„“'` for Russian, `'„“‚‘'` for German, and
|
|
* `['«\xA0', '\xA0»', '‹\xA0', '\xA0›']` for French (including nbsp).
|
|
* - __highlight__ - `null`. Highlighter function for fenced code blocks.
|
|
* Highlighter `function (str, lang)` should return escaped HTML. It can also
|
|
* return empty string if the source was not changed and should be escaped externaly.
|
|
*
|
|
* ##### Example
|
|
*
|
|
* ```javascript
|
|
* // commonmark mode
|
|
* var md = require('markdown-it')('commonmark');
|
|
*
|
|
* // default mode
|
|
* var md = require('markdown-it')();
|
|
*
|
|
* // enable everything
|
|
* var md = require('markdown-it')({
|
|
* html: true,
|
|
* linkify: true,
|
|
* typographer: true
|
|
* });
|
|
* ```
|
|
*
|
|
* ##### Syntax highlighting
|
|
*
|
|
* ```js
|
|
* var hljs = require('highlight.js') // https://highlightjs.org/
|
|
*
|
|
* var md = require('markdown-it')({
|
|
* highlight: function (str, lang) {
|
|
* if (lang && hljs.getLanguage(lang)) {
|
|
* try {
|
|
* return hljs.highlight(lang, str).value;
|
|
* } catch (__) {}
|
|
* }
|
|
*
|
|
* try {
|
|
* return hljs.highlightAuto(str).value;
|
|
* } catch (__) {}
|
|
*
|
|
* return ''; // use external default escaping
|
|
* }
|
|
* });
|
|
* ```
|
|
**/
|
|
function MarkdownIt(presetName, options) {
|
|
if (!(this instanceof MarkdownIt)) {
|
|
return new MarkdownIt(presetName, options);
|
|
}
|
|
|
|
if (!options) {
|
|
if (!utils.isString(presetName)) {
|
|
options = presetName || {};
|
|
presetName = 'default';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* MarkdownIt#inline -> ParserInline
|
|
*
|
|
* Instance of [[ParserInline]]. You may need it to add new rules when
|
|
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and
|
|
* [[MarkdownIt.enable]].
|
|
**/
|
|
this.inline = new ParserInline();
|
|
|
|
/**
|
|
* MarkdownIt#block -> ParserBlock
|
|
*
|
|
* Instance of [[ParserBlock]]. You may need it to add new rules when
|
|
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and
|
|
* [[MarkdownIt.enable]].
|
|
**/
|
|
this.block = new ParserBlock();
|
|
|
|
/**
|
|
* MarkdownIt#core -> Core
|
|
*
|
|
* Instance of [[Core]] chain executor. You may need it to add new rules when
|
|
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and
|
|
* [[MarkdownIt.enable]].
|
|
**/
|
|
this.core = new ParserCore();
|
|
|
|
/**
|
|
* MarkdownIt#renderer -> Renderer
|
|
*
|
|
* Instance of [[Renderer]]. Use it to modify output look. Or to add rendering
|
|
* rules for new token types, generated by plugins.
|
|
*
|
|
* ##### Example
|
|
*
|
|
* ```javascript
|
|
* var md = require('markdown-it')();
|
|
*
|
|
* function myToken(tokens, idx, options, env, self) {
|
|
* //...
|
|
* return result;
|
|
* };
|
|
*
|
|
* md.renderer.rules['my_token'] = myToken
|
|
* ```
|
|
*
|
|
* See [[Renderer]] docs and [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js).
|
|
**/
|
|
this.renderer = new Renderer();
|
|
|
|
/**
|
|
* MarkdownIt#linkify -> LinkifyIt
|
|
*
|
|
* [linkify-it](https://github.com/markdown-it/linkify-it) instance.
|
|
* Used by [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.js)
|
|
* rule.
|
|
**/
|
|
this.linkify = new LinkifyIt();
|
|
|
|
/**
|
|
* MarkdownIt#validateLink(url) -> Boolean
|
|
*
|
|
* Link validation function. CommonMark allows too much in links. By default
|
|
* we disable `javascript:`, `vbscript:`, `file:` schemas, and almost all `data:...` schemas
|
|
* except some embedded image types.
|
|
*
|
|
* You can change this behaviour:
|
|
*
|
|
* ```javascript
|
|
* var md = require('markdown-it')();
|
|
* // enable everything
|
|
* md.validateLink = function () { return true; }
|
|
* ```
|
|
**/
|
|
this.validateLink = validateLink;
|
|
|
|
/**
|
|
* MarkdownIt#normalizeLink(url) -> String
|
|
*
|
|
* Function used to encode link url to a machine-readable format,
|
|
* which includes url-encoding, punycode, etc.
|
|
**/
|
|
this.normalizeLink = normalizeLink;
|
|
|
|
/**
|
|
* MarkdownIt#normalizeLinkText(url) -> String
|
|
*
|
|
* Function used to decode link url to a human-readable format`
|
|
**/
|
|
this.normalizeLinkText = normalizeLinkText;
|
|
|
|
|
|
// Expose utils & helpers for easy acces from plugins
|
|
|
|
/**
|
|
* MarkdownIt#utils -> utils
|
|
*
|
|
* Assorted utility functions, useful to write plugins. See details
|
|
* [here](https://github.com/markdown-it/markdown-it/blob/master/lib/common/utils.js).
|
|
**/
|
|
this.utils = utils;
|
|
|
|
/**
|
|
* MarkdownIt#helpers -> helpers
|
|
*
|
|
* Link components parser functions, useful to write plugins. See details
|
|
* [here](https://github.com/markdown-it/markdown-it/blob/master/lib/helpers).
|
|
**/
|
|
this.helpers = helpers;
|
|
|
|
|
|
this.options = {};
|
|
this.configure(presetName);
|
|
|
|
if (options) { this.set(options); }
|
|
}
|
|
|
|
|
|
/** chainable
|
|
* MarkdownIt.set(options)
|
|
*
|
|
* Set parser options (in the same format as in constructor). Probably, you
|
|
* will never need it, but you can change options after constructor call.
|
|
*
|
|
* ##### Example
|
|
*
|
|
* ```javascript
|
|
* var md = require('markdown-it')()
|
|
* .set({ html: true, breaks: true })
|
|
* .set({ typographer, true });
|
|
* ```
|
|
*
|
|
* __Note:__ To achieve the best possible performance, don't modify a
|
|
* `markdown-it` instance options on the fly. If you need multiple configurations
|
|
* it's best to create multiple instances and initialize each with separate
|
|
* config.
|
|
**/
|
|
MarkdownIt.prototype.set = function (options) {
|
|
utils.assign(this.options, options);
|
|
return this;
|
|
};
|
|
|
|
|
|
/** chainable, internal
|
|
* MarkdownIt.configure(presets)
|
|
*
|
|
* Batch load of all options and compenent settings. This is internal method,
|
|
* and you probably will not need it. But if you with - see available presets
|
|
* and data structure [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
|
|
*
|
|
* We strongly recommend to use presets instead of direct config loads. That
|
|
* will give better compatibility with next versions.
|
|
**/
|
|
MarkdownIt.prototype.configure = function (presets) {
|
|
var self = this, presetName;
|
|
|
|
if (utils.isString(presets)) {
|
|
presetName = presets;
|
|
presets = config[presetName];
|
|
if (!presets) { throw new Error('Wrong `markdown-it` preset "' + presetName + '", check name'); }
|
|
}
|
|
|
|
if (!presets) { throw new Error('Wrong `markdown-it` preset, can\'t be empty'); }
|
|
|
|
if (presets.options) { self.set(presets.options); }
|
|
|
|
if (presets.components) {
|
|
Object.keys(presets.components).forEach(function (name) {
|
|
if (presets.components[name].rules) {
|
|
self[name].ruler.enableOnly(presets.components[name].rules);
|
|
}
|
|
});
|
|
}
|
|
return this;
|
|
};
|
|
|
|
|
|
/** chainable
|
|
* MarkdownIt.enable(list, ignoreInvalid)
|
|
* - list (String|Array): rule name or list of rule names to enable
|
|
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
|
|
*
|
|
* Enable list or rules. It will automatically find appropriate components,
|
|
* containing rules with given names. If rule not found, and `ignoreInvalid`
|
|
* not set - throws exception.
|
|
*
|
|
* ##### Example
|
|
*
|
|
* ```javascript
|
|
* var md = require('markdown-it')()
|
|
* .enable(['sub', 'sup'])
|
|
* .disable('smartquotes');
|
|
* ```
|
|
**/
|
|
MarkdownIt.prototype.enable = function (list, ignoreInvalid) {
|
|
var result = [];
|
|
|
|
if (!Array.isArray(list)) { list = [ list ]; }
|
|
|
|
[ 'core', 'block', 'inline' ].forEach(function (chain) {
|
|
result = result.concat(this[chain].ruler.enable(list, true));
|
|
}, this);
|
|
|
|
var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
|
|
|
|
if (missed.length && !ignoreInvalid) {
|
|
throw new Error('MarkdownIt. Failed to enable unknown rule(s): ' + missed);
|
|
}
|
|
|
|
return this;
|
|
};
|
|
|
|
|
|
/** chainable
|
|
* MarkdownIt.disable(list, ignoreInvalid)
|
|
* - list (String|Array): rule name or list of rule names to disable.
|
|
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
|
|
*
|
|
* The same as [[MarkdownIt.enable]], but turn specified rules off.
|
|
**/
|
|
MarkdownIt.prototype.disable = function (list, ignoreInvalid) {
|
|
var result = [];
|
|
|
|
if (!Array.isArray(list)) { list = [ list ]; }
|
|
|
|
[ 'core', 'block', 'inline' ].forEach(function (chain) {
|
|
result = result.concat(this[chain].ruler.disable(list, true));
|
|
}, this);
|
|
|
|
var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
|
|
|
|
if (missed.length && !ignoreInvalid) {
|
|
throw new Error('MarkdownIt. Failed to disable unknown rule(s): ' + missed);
|
|
}
|
|
return this;
|
|
};
|
|
|
|
|
|
/** chainable
|
|
* MarkdownIt.use(plugin, params)
|
|
*
|
|
* Load specified plugin with given params into current parser instance.
|
|
* It's just a sugar to call `plugin(md, params)` with curring.
|
|
*
|
|
* ##### Example
|
|
*
|
|
* ```javascript
|
|
* var iterator = require('markdown-it-for-inline');
|
|
* var md = require('markdown-it')()
|
|
* .use(iterator, 'foo_replace', 'text', function (tokens, idx) {
|
|
* tokens[idx].content = tokens[idx].content.replace(/foo/g, 'bar');
|
|
* });
|
|
* ```
|
|
**/
|
|
MarkdownIt.prototype.use = function (plugin /*, params, ... */) {
|
|
var args = [ this ].concat(Array.prototype.slice.call(arguments, 1));
|
|
plugin.apply(plugin, args);
|
|
return this;
|
|
};
|
|
|
|
|
|
/** internal
|
|
* MarkdownIt.parse(src, env) -> Array
|
|
* - src (String): source string
|
|
* - env (Object): environment sandbox
|
|
*
|
|
* Parse input string and returns list of block tokens (special token type
|
|
* "inline" will contain list of inline tokens). You should not call this
|
|
* method directly, until you write custom renderer (for example, to produce
|
|
* AST).
|
|
*
|
|
* `env` is used to pass data between "distributed" rules and return additional
|
|
* metadata like reference info, needed for for renderer. It also can be used to
|
|
* inject data in specific cases. Usually, you will be ok to pass `{}`,
|
|
* and then pass updated object to renderer.
|
|
**/
|
|
MarkdownIt.prototype.parse = function (src, env) {
|
|
var state = new this.core.State(src, this, env);
|
|
|
|
this.core.process(state);
|
|
|
|
return state.tokens;
|
|
};
|
|
|
|
|
|
/**
|
|
* MarkdownIt.render(src [, env]) -> String
|
|
* - src (String): source string
|
|
* - env (Object): environment sandbox
|
|
*
|
|
* Render markdown string into html. It does all magic for you :).
|
|
*
|
|
* `env` can be used to inject additional metadata (`{}` by default).
|
|
* But you will not need it with high probability. See also comment
|
|
* in [[MarkdownIt.parse]].
|
|
**/
|
|
MarkdownIt.prototype.render = function (src, env) {
|
|
env = env || {};
|
|
|
|
return this.renderer.render(this.parse(src, env), this.options, env);
|
|
};
|
|
|
|
|
|
/** internal
|
|
* MarkdownIt.parseInline(src, env) -> Array
|
|
* - src (String): source string
|
|
* - env (Object): environment sandbox
|
|
*
|
|
* The same as [[MarkdownIt.parse]] but skip all block rules. It returns the
|
|
* block tokens list with the single `inline` element, containing parsed inline
|
|
* tokens in `children` property. Also updates `env` object.
|
|
**/
|
|
MarkdownIt.prototype.parseInline = function (src, env) {
|
|
var state = new this.core.State(src, this, env);
|
|
|
|
state.inlineMode = true;
|
|
this.core.process(state);
|
|
|
|
return state.tokens;
|
|
};
|
|
|
|
|
|
/**
|
|
* MarkdownIt.renderInline(src [, env]) -> String
|
|
* - src (String): source string
|
|
* - env (Object): environment sandbox
|
|
*
|
|
* Similar to [[MarkdownIt.render]] but for single paragraph content. Result
|
|
* will NOT be wrapped into `<p>` tags.
|
|
**/
|
|
MarkdownIt.prototype.renderInline = function (src, env) {
|
|
env = env || {};
|
|
|
|
return this.renderer.render(this.parseInline(src, env), this.options, env);
|
|
};
|
|
|
|
|
|
module.exports = MarkdownIt;
|
|
|