// Utilities // 'use strict'; function _class(obj) { return Object.prototype.toString.call(obj); } function isString(obj) { return _class(obj) === '[object String]'; } var _hasOwnProperty = Object.prototype.hasOwnProperty; function has(object, key) { return _hasOwnProperty.call(object, key); } // Merge objects // function assign(obj /*from1, from2, from3, ...*/) { var sources = Array.prototype.slice.call(arguments, 1); sources.forEach(function (source) { if (!source) { return; } if (typeof source !== 'object') { throw new TypeError(source + 'must be object'); } Object.keys(source).forEach(function (key) { obj[key] = source[key]; }); }); return obj; } // Remove element from array and put another array at those position. // Useful for some operations with tokens function arrayReplaceAt(src, pos, newElements) { return [].concat(src.slice(0, pos), newElements, src.slice(pos + 1)); } //////////////////////////////////////////////////////////////////////////////// var UNESCAPE_MD_RE = /\\([!"#$%&'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])/g; function unescapeMd(str) { if (str.indexOf('\\') < 0) { return str; } return str.replace(UNESCAPE_MD_RE, '$1'); } //////////////////////////////////////////////////////////////////////////////// function isValidEntityCode(c) { /*eslint no-bitwise:0*/ // broken sequence if (c >= 0xD800 && c <= 0xDFFF) { return false; } // never used if (c >= 0xFDD0 && c <= 0xFDEF) { return false; } if ((c & 0xFFFF) === 0xFFFF || (c & 0xFFFF) === 0xFFFE) { return false; } // control codes if (c >= 0x00 && c <= 0x08) { return false; } if (c === 0x0B) { return false; } if (c >= 0x0E && c <= 0x1F) { return false; } if (c >= 0x7F && c <= 0x9F) { return false; } // out of range if (c > 0x10FFFF) { return false; } return true; } function fromCodePoint(c) { /*eslint no-bitwise:0*/ if (c > 0xffff) { c -= 0x10000; var surrogate1 = 0xd800 + (c >> 10), surrogate2 = 0xdc00 + (c & 0x3ff); return String.fromCharCode(surrogate1, surrogate2); } return String.fromCharCode(c); } var NAMED_ENTITY_RE = /&([a-z#][a-z0-9]{1,31});/gi; var DIGITAL_ENTITY_TEST_RE = /^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))/i; var entities = require('./entities'); function replaceEntityPattern(match, name) { var code = 0; if (has(entities, name)) { return entities[name]; } else if (name.charCodeAt(0) === 0x23/* # */ && DIGITAL_ENTITY_TEST_RE.test(name)) { code = name[1].toLowerCase() === 'x' ? parseInt(name.slice(2), 16) : parseInt(name.slice(1), 10); if (isValidEntityCode(code)) { return fromCodePoint(code); } } return match; } function replaceEntities(str) { if (str.indexOf('&') < 0) { return str; } return str.replace(NAMED_ENTITY_RE, replaceEntityPattern); } //////////////////////////////////////////////////////////////////////////////// var HTML_ESCAPE_TEST_RE = /[&<>"]/; var HTML_ESCAPE_REPLACE_RE = /[&<>"]/g; var HTML_REPLACEMENTS = { '&': '&', '<': '<', '>': '>', '"': '"' }; function replaceUnsafeChar(ch) { return HTML_REPLACEMENTS[ch]; } function escapeHtml(str) { if (HTML_ESCAPE_TEST_RE.test(str)) { return str.replace(HTML_ESCAPE_REPLACE_RE, replaceUnsafeChar); } return str; } //////////////////////////////////////////////////////////////////////////////// var SURRORATE_TEST_RE = /[\uD800-\uDFFF]/; var SURRORATE_SEARCH_RE = /[\uD800-\uDFFF]/g; function replaceBadSurrogate(ch, pos, orig) { var code = ch.charCodeAt(0); if (code >= 0xD800 && code <= 0xDBFF) { // high surrogate if (pos >= orig.length - 1) { return '\uFFFD'; } code = orig.charCodeAt(pos + 1); if (code < 0xDC00 || code > 0xDFFF) { return '\uFFFD'; } return ch; } // low surrogate if (pos === 0) { return '\uFFFD'; } code = orig.charCodeAt(pos - 1); if (code < 0xD800 || code > 0xDBFF) { return '\uFFFD'; } return ch; } function fixBrokenSurrogates(str) { if (!SURRORATE_TEST_RE.test(str)) { return str; } return str.replace(SURRORATE_SEARCH_RE, replaceBadSurrogate); } //////////////////////////////////////////////////////////////////////////////// // Incoming link can be partially encoded. Convert possible combinations to // unified form. // // TODO: Rewrite it. Should use: // // - encodeURIComponent for query // - encodeURI for path // - (?) punicode for domain mame (but encodeURI seems to work in real world) // function normalizeLink(url) { var normalized = replaceEntities(url); // We don't care much about result of mailformed URIs, // but shoud not throw exception. try { normalized = decodeURI(normalized); } catch (__) {} // Encoder throws exception on broken surrogate pairs. // Fix those first. try { return encodeURI(fixBrokenSurrogates(normalized)); } catch (__) { // This should never happen and left for safety only. /*istanbul ignore next*/ return ''; } } //////////////////////////////////////////////////////////////////////////////// var REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g; function escapeRE (str) { return str.replace(REGEXP_ESCAPE_RE, '\\$&'); } //////////////////////////////////////////////////////////////////////////////// // Zs (unicode class) || 09, 0A, 0D, 0C function isWhiteSpace(code) { if (code >= 0x2000 && code <= 0x200A) { return true; } switch (code) { case 0x09: case 0x0A: case 0x0D: case 0x0C: case 0x20: case 0xA0: case 0x1680: case 0x202F: case 0x205F: case 0x3000: return true; } return false; } //////////////////////////////////////////////////////////////////////////////// /*eslint-disable max-len*/ var UNICODE_PUNCT_RE = require('uc.micro/categories/P/regex'); // Currently without astral characters support. function isPunctChar(char) { return UNICODE_PUNCT_RE.test(char); } // Markdown ASCII punctuation characters. // // !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ // http://spec.commonmark.org/0.15/#ascii-punctuation-character // // Don't confuse with unicode punctuation !!! It lacks some chars in ascii range. // function isMdAsciiPunct(ch) { switch (ch) { case 0x21/* ! */: case 0x22/* " */: case 0x23/* # */: case 0x24/* $ */: case 0x25/* % */: case 0x26/* & */: case 0x27/* ' */: case 0x28/* ( */: case 0x29/* ) */: case 0x2A/* * */: case 0x2B/* + */: case 0x2C/* , */: case 0x2D/* - */: case 0x2E/* . */: case 0x2F/* / */: case 0x3A/* : */: case 0x3B/* ; */: case 0x3C/* < */: case 0x3D/* = */: case 0x3E/* > */: case 0x3F/* ? */: case 0x40/* @ */: case 0x5B/* [ */: case 0x5C/* \ */: case 0x5D/* ] */: case 0x5E/* ^ */: case 0x5F/* _ */: case 0x60/* ` */: case 0x7B/* { */: case 0x7C/* | */: case 0x7D/* } */: case 0x7E/* ~ */: return true; default: return false; } } // Hepler to unify [reference labels]. // function normalizeReference(str) { // use .toUpperCase() instead of .toLowerCase() // here to avoid a conflict with Object.prototype // members (most notably, `__proto__`) return str.trim().replace(/\s+/g, ' ').toUpperCase(); } //////////////////////////////////////////////////////////////////////////////// exports.assign = assign; exports.isString = isString; exports.has = has; exports.unescapeMd = unescapeMd; exports.isValidEntityCode = isValidEntityCode; exports.fromCodePoint = fromCodePoint; exports.replaceEntities = replaceEntities; exports.escapeHtml = escapeHtml; exports.arrayReplaceAt = arrayReplaceAt; exports.normalizeLink = normalizeLink; exports.isWhiteSpace = isWhiteSpace; exports.isMdAsciiPunct = isMdAsciiPunct; exports.isPunctChar = isPunctChar; exports.escapeRE = escapeRE; exports.normalizeReference = normalizeReference; // for testing only exports.fixBrokenSurrogates = fixBrokenSurrogates;