Markdown parser, done right. 100% CommonMark support, extensions, syntax plugins & high speed https://markdown-it.github.io/
 
 
 

551 lines
16 KiB

// Main perser class
'use strict';
var utils = require('./common/utils');
var helpers = require('./helpers');
var Renderer = require('./renderer');
var ParserCore = require('./parser_core');
var ParserBlock = require('./parser_block');
var ParserInline = require('./parser_inline');
var LinkifyIt = require('linkify-it');
var mdurl = require('mdurl');
var punycode = require('punycode');
var config = {
'default': require('./presets/default'),
zero: require('./presets/zero'),
commonmark: require('./presets/commonmark')
};
////////////////////////////////////////////////////////////////////////////////
//
// This validator can prohibit more than really needed to prevent XSS. It's a
// tradeoff to keep code simple and to be secure by default.
//
// If you need different setup - override validator method as you wish. Or
// replace it with dummy function and use external sanitizer.
//
var BAD_PROTO_RE = /^(vbscript|javascript|file|data):/;
var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/;
function validateLink(url) {
// url should be normalized at this point, and existing entities are decoded
var str = url.trim().toLowerCase();
return BAD_PROTO_RE.test(str) ? (GOOD_DATA_RE.test(str) ? true : false) : true;
}
////////////////////////////////////////////////////////////////////////////////
var RECODE_HOSTNAME_FOR = [ 'http:', 'https:', 'mailto:' ];
function normalizeLink(url) {
var parsed = mdurl.parse(url, true);
if (parsed.hostname) {
// Encode hostnames in urls like:
// `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
//
// We don't encode unknown schemas, because it's likely that we encode
// something we shouldn't (e.g. `skype:name` treated as `skype:host`)
//
if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
try {
parsed.hostname = punycode.toASCII(parsed.hostname);
} catch(er) {}
}
}
return mdurl.encode(mdurl.format(parsed));
}
function normalizeLinkText(url) {
var parsed = mdurl.parse(url, true);
if (parsed.hostname) {
// Encode hostnames in urls like:
// `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
//
// We don't encode unknown schemas, because it's likely that we encode
// something we shouldn't (e.g. `skype:name` treated as `skype:host`)
//
if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
try {
parsed.hostname = punycode.toUnicode(parsed.hostname);
} catch(er) {}
}
}
return mdurl.decode(mdurl.format(parsed));
}
/**
* class MarkdownIt
*
* Main parser/renderer class.
*
* ##### Usage
*
* ```javascript
* // node.js, "classic" way:
* var MarkdownIt = require('markdown-it'),
* md = new MarkdownIt();
* var result = md.render('# markdown-it rulezz!');
*
* // node.js, the same, but with sugar:
* var md = require('markdown-it')();
* var result = md.render('# markdown-it rulezz!');
*
* // browser without AMD, added to "window" on script load
* // Note, there are no dash.
* var md = window.markdownit();
* var result = md.render('# markdown-it rulezz!');
* ```
*
* Single line rendering, without paragraph wrap:
*
* ```javascript
* var md = require('markdown-it')();
* var result = md.renderInline('__markdown-it__ rulezz!');
* ```
**/
/**
* new MarkdownIt([presetName, options])
* - presetName (String): optional, `commonmark` / `zero`
* - options (Object)
*
* Creates parser instanse with given config. Can be called without `new`.
*
* ##### presetName
*
* MarkdownIt provides named presets as a convenience to quickly
* enable/disable active syntax rules and options for common use cases.
*
* - ["commonmark"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/commonmark.js) -
* configures parser to strict [CommonMark](http://commonmark.org/) mode.
* - [default](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/default.js) -
* similar to GFM, used when no preset name given. Enables all available rules,
* but still without html, typographer & autolinker.
* - ["zero"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/zero.js) -
* all rules disabled. Useful to quickly setup your config via `.enable()`.
* For example, when you need only `bold` and `italic` markup and nothing else.
*
* ##### options:
*
* - __html__ - `false`. Set `true` to enable HTML tags in source. Be careful!
* That's not safe! You may need external sanitizer to protect output from XSS.
* It's better to extend features via plugins, instead of enabling HTML.
* - __xhtmlOut__ - `false`. Set `true` to add '/' when closing single tags
* (`<br />`). This is needed only for full CommonMark compatibility. In real
* world you will need HTML output.
* - __breaks__ - `false`. Set `true` to convert `\n` in paragraphs into `<br>`.
* - __langPrefix__ - `language-`. CSS language class prefix for fenced blocks.
* Can be useful for external highlighters.
* - __linkify__ - `false`. Set `true` to autoconvert URL-like text to links.
* - __typographer__ - `false`. Set `true` to enable [some language-neutral
* replacement](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/replacements.js) +
* quotes beautification (smartquotes).
* - __quotes__ - `“”‘’`, String or Array. Double + single quotes replacement
* pairs, when typographer enabled and smartquotes on. For example, you can
* use `'«»„“'` for Russian, `'„“‚‘'` for German, and
* `['«\xA0', '\xA0»', '‹\xA0', '\xA0›']` for French (including nbsp).
* - __highlight__ - `null`. Highlighter function for fenced code blocks.
* Highlighter `function (str, lang)` should return escaped HTML. It can also
* return empty string if the source was not changed and should be escaped externaly.
*
* ##### Example
*
* ```javascript
* // commonmark mode
* var md = require('markdown-it')('commonmark');
*
* // default mode
* var md = require('markdown-it')();
*
* // enable everything
* var md = require('markdown-it')({
* html: true,
* linkify: true,
* typographer: true
* });
* ```
*
* ##### Syntax highlighting
*
* ```js
* var hljs = require('highlight.js') // https://highlightjs.org/
*
* var md = require('markdown-it')({
* highlight: function (str, lang) {
* if (lang && hljs.getLanguage(lang)) {
* try {
* return hljs.highlight(lang, str).value;
* } catch (__) {}
* }
*
* try {
* return hljs.highlightAuto(str).value;
* } catch (__) {}
*
* return ''; // use external default escaping
* }
* });
* ```
**/
function MarkdownIt(presetName, options) {
if (!(this instanceof MarkdownIt)) {
return new MarkdownIt(presetName, options);
}
if (!options) {
if (!utils.isString(presetName)) {
options = presetName || {};
presetName = 'default';
}
}
/**
* MarkdownIt#inline -> ParserInline
*
* Instance of [[ParserInline]]. You may need it to add new rules when
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and
* [[MarkdownIt.enable]].
**/
this.inline = new ParserInline();
/**
* MarkdownIt#block -> ParserBlock
*
* Instance of [[ParserBlock]]. You may need it to add new rules when
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and
* [[MarkdownIt.enable]].
**/
this.block = new ParserBlock();
/**
* MarkdownIt#core -> Core
*
* Instance of [[Core]] chain executor. You may need it to add new rules when
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and
* [[MarkdownIt.enable]].
**/
this.core = new ParserCore();
/**
* MarkdownIt#renderer -> Renderer
*
* Instance of [[Renderer]]. Use it to modify output look. Or to add rendering
* rules for new token types, generated by plugins.
*
* ##### Example
*
* ```javascript
* var md = require('markdown-it')();
*
* function myToken(tokens, idx, options, env, self) {
* //...
* return result;
* };
*
* md.renderer.rules['my_token'] = myToken
* ```
*
* See [[Renderer]] docs and [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js).
**/
this.renderer = new Renderer();
/**
* MarkdownIt#linkify -> LinkifyIt
*
* [linkify-it](https://github.com/markdown-it/linkify-it) instance.
* Used by [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.js)
* rule.
**/
this.linkify = new LinkifyIt();
/**
* MarkdownIt#validateLink(url) -> Boolean
*
* Link validation function. CommonMark allows too much in links. By default
* we disable `javascript:`, `vbscript:`, `file:` schemas, and almost all `data:...` schemas
* except some embedded image types.
*
* You can change this behaviour:
*
* ```javascript
* var md = require('markdown-it')();
* // enable everything
* md.validateLink = function () { return true; }
* ```
**/
this.validateLink = validateLink;
/**
* MarkdownIt#normalizeLink(url) -> String
*
* Function used to encode link url to a machine-readable format,
* which includes url-encoding, punycode, etc.
**/
this.normalizeLink = normalizeLink;
/**
* MarkdownIt#normalizeLinkText(url) -> String
*
* Function used to decode link url to a human-readable format`
**/
this.normalizeLinkText = normalizeLinkText;
// Expose utils & helpers for easy acces from plugins
/**
* MarkdownIt#utils -> utils
*
* Assorted utility functions, useful to write plugins. See details
* [here](https://github.com/markdown-it/markdown-it/blob/master/lib/common/utils.js).
**/
this.utils = utils;
/**
* MarkdownIt#helpers -> helpers
*
* Link components parser functions, useful to write plugins. See details
* [here](https://github.com/markdown-it/markdown-it/blob/master/lib/helpers).
**/
this.helpers = helpers;
this.options = {};
this.configure(presetName);
if (options) { this.set(options); }
}
/** chainable
* MarkdownIt.set(options)
*
* Set parser options (in the same format as in constructor). Probably, you
* will never need it, but you can change options after constructor call.
*
* ##### Example
*
* ```javascript
* var md = require('markdown-it')()
* .set({ html: true, breaks: true })
* .set({ typographer, true });
* ```
*
* __Note:__ To achieve the best possible performance, don't modify a
* `markdown-it` instance options on the fly. If you need multiple configurations
* it's best to create multiple instances and initialize each with separate
* config.
**/
MarkdownIt.prototype.set = function (options) {
utils.assign(this.options, options);
return this;
};
/** chainable, internal
* MarkdownIt.configure(presets)
*
* Batch load of all options and compenent settings. This is internal method,
* and you probably will not need it. But if you with - see available presets
* and data structure [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
*
* We strongly recommend to use presets instead of direct config loads. That
* will give better compatibility with next versions.
**/
MarkdownIt.prototype.configure = function (presets) {
var self = this, presetName;
if (utils.isString(presets)) {
presetName = presets;
presets = config[presetName];
if (!presets) { throw new Error('Wrong `markdown-it` preset "' + presetName + '", check name'); }
}
if (!presets) { throw new Error('Wrong `markdown-it` preset, can\'t be empty'); }
if (presets.options) { self.set(presets.options); }
if (presets.components) {
Object.keys(presets.components).forEach(function (name) {
if (presets.components[name].rules) {
self[name].ruler.enableOnly(presets.components[name].rules);
}
});
}
return this;
};
/** chainable
* MarkdownIt.enable(list, ignoreInvalid)
* - list (String|Array): rule name or list of rule names to enable
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
*
* Enable list or rules. It will automatically find appropriate components,
* containing rules with given names. If rule not found, and `ignoreInvalid`
* not set - throws exception.
*
* ##### Example
*
* ```javascript
* var md = require('markdown-it')()
* .enable(['sub', 'sup'])
* .disable('smartquotes');
* ```
**/
MarkdownIt.prototype.enable = function (list, ignoreInvalid) {
var result = [];
if (!Array.isArray(list)) { list = [ list ]; }
[ 'core', 'block', 'inline' ].forEach(function (chain) {
result = result.concat(this[chain].ruler.enable(list, true));
}, this);
var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
if (missed.length && !ignoreInvalid) {
throw new Error('MarkdownIt. Failed to enable unknown rule(s): ' + missed);
}
return this;
};
/** chainable
* MarkdownIt.disable(list, ignoreInvalid)
* - list (String|Array): rule name or list of rule names to disable.
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
*
* The same as [[MarkdownIt.enable]], but turn specified rules off.
**/
MarkdownIt.prototype.disable = function (list, ignoreInvalid) {
var result = [];
if (!Array.isArray(list)) { list = [ list ]; }
[ 'core', 'block', 'inline' ].forEach(function (chain) {
result = result.concat(this[chain].ruler.disable(list, true));
}, this);
var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
if (missed.length && !ignoreInvalid) {
throw new Error('MarkdownIt. Failed to disable unknown rule(s): ' + missed);
}
return this;
};
/** chainable
* MarkdownIt.use(plugin, params)
*
* Load specified plugin with given params into current parser instance.
* It's just a sugar to call `plugin(md, params)` with curring.
*
* ##### Example
*
* ```javascript
* var iterator = require('markdown-it-for-inline');
* var md = require('markdown-it')()
* .use(iterator, 'foo_replace', 'text', function (tokens, idx) {
* tokens[idx].content = tokens[idx].content.replace(/foo/g, 'bar');
* });
* ```
**/
MarkdownIt.prototype.use = function (plugin /*, params, ... */) {
var args = [ this ].concat(Array.prototype.slice.call(arguments, 1));
plugin.apply(plugin, args);
return this;
};
/** internal
* MarkdownIt.parse(src, env) -> Array
* - src (String): source string
* - env (Object): environment sandbox
*
* Parse input string and returns list of block tokens (special token type
* "inline" will contain list of inline tokens). You should not call this
* method directly, until you write custom renderer (for example, to produce
* AST).
*
* `env` is used to pass data between "distributed" rules and return additional
* metadata like reference info, needed for for renderer. It also can be used to
* inject data in specific cases. Usually, you will be ok to pass `{}`,
* and then pass updated object to renderer.
**/
MarkdownIt.prototype.parse = function (src, env) {
var state = new this.core.State(src, this, env);
this.core.process(state);
return state.tokens;
};
/**
* MarkdownIt.render(src [, env]) -> String
* - src (String): source string
* - env (Object): environment sandbox
*
* Render markdown string into html. It does all magic for you :).
*
* `env` can be used to inject additional metadata (`{}` by default).
* But you will not need it with high probability. See also comment
* in [[MarkdownIt.parse]].
**/
MarkdownIt.prototype.render = function (src, env) {
env = env || {};
return this.renderer.render(this.parse(src, env), this.options, env);
};
/** internal
* MarkdownIt.parseInline(src, env) -> Array
* - src (String): source string
* - env (Object): environment sandbox
*
* The same as [[MarkdownIt.parse]] but skip all block rules. It returns the
* block tokens list with the single `inline` element, containing parsed inline
* tokens in `children` property. Also updates `env` object.
**/
MarkdownIt.prototype.parseInline = function (src, env) {
var state = new this.core.State(src, this, env);
state.inlineMode = true;
this.core.process(state);
return state.tokens;
};
/**
* MarkdownIt.renderInline(src [, env]) -> String
* - src (String): source string
* - env (Object): environment sandbox
*
* Similar to [[MarkdownIt.render]] but for single paragraph content. Result
* will NOT be wrapped into `<p>` tags.
**/
MarkdownIt.prototype.renderInline = function (src, env) {
env = env || {};
return this.renderer.render(this.parseInline(src, env), this.options, env);
};
module.exports = MarkdownIt;