Browse Source

Reworked html parse in block lexer + added `html` option

pull/14/head
Vitaly Puzrin 10 years ago
parent
commit
a8e6bab747
  1. 3
      bin/specsplit.js
  2. 58
      lib/common/html_blocks.js
  3. 48
      lib/common/html_re.js
  4. 172
      lib/common/url_schemas.js
  5. 2
      lib/lexer_block.js
  6. 75
      lib/lexer_block/htmlblock.js
  7. 2
      lib/lexer_block/paragraph.js
  8. 1
      test/stmd.js

3
bin/specsplit.js

@ -58,7 +58,8 @@ function readFile(filename, encoding, callback) {
readFile(options.spec, 'utf8', function (error, input) {
var good = [], bad = [],
markdown = new Remarkable({
xhtml: true, // write <hr /> instead of <hr> and so on
html: true,
xhtml: true,
codeLangPrefix: 'language-'
});

58
lib/common/html_blocks.js

@ -0,0 +1,58 @@
// List of valid html blocks names, accorting to stmd spec
// http://jgm.github.io/stmd/spec.html#html-blocks
'use strict';
module.exports = [
'article',
'aside',
'button',
'blockquote',
'body',
'canvas',
'caption',
'col',
'colgroup',
'dd',
'div',
'dl',
'dt',
'embed',
'fieldset',
'figcaption',
'figure',
'footer',
'form',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'header',
'hgroup',
'hr',
'iframe',
'li',
'map',
'object',
'ol',
'output',
'p',
'pre',
'progress',
'script',
'section',
'style',
'table',
'tbody',
'td',
'textarea',
'tfoot',
'th',
'tr',
'thead',
'ul',
'video'
];

48
lib/lexer_block/html.js → lib/common/html_re.js

@ -1,12 +1,8 @@
// HTML block
// Regexps to match html elements
'use strict';
var isEmpty = require('../helpers').isEmpty;
var getLines = require('../helpers').getLines;
function replace(regex, options) {
regex = regex.source;
options = options || '';
@ -16,14 +12,14 @@ function replace(regex, options) {
return new RegExp(regex, options);
}
val = val.source || val;
val = val.replace(/(^|[^\[])\^/g, '$1');
//val = val.replace(/(^|[^\[])\^/g, '$1');
regex = regex.replace(name, val);
return self;
};
}
var attr_name = /[a-zA-Z_:][a-zA-Z0-9:._-]*/;
var attr_name = /[a-zA-Z_:][a-zA-Z0-9:._-]*/;
var unquoted = /[^"'=<>`\x00-\x20]+/;
var single_quoted = /'[^']*'/;
@ -51,7 +47,7 @@ var processing = /<[?].*?[?]>/;
var declaration = /<![A-Z]\s+[^>]*>/;
var cdata = /<!\[CDATA\[([^\]]+|\][^\]]|\]\][^>])*\]\]>/;
var html_tag = replace(/^(?:open_tag|close_tag|comment|processing|declaration|cdata)/, 'i')
var HTML_TAG_RE = replace(/^(?:open_tag|close_tag|comment|processing|declaration|cdata)/, 'i')
('open_tag', open_tag)
('close_tag', close_tag)
('comment', comment)
@ -61,38 +57,4 @@ var html_tag = replace(/^(?:open_tag|close_tag|comment|processing|declaration|cd
();
module.exports = function html(state, startLine, endLine, silent) {
var nextLine,
pos = state.bMarks[startLine],
max = state.eMarks[startLine],
shift = state.tShift[startLine];
pos += shift;
if (pos + 3 >= max ||
shift > 3 ||
state.blkLevel > 0) { return false; }
if (state.src.charCodeAt(pos) !== 0x3C/* < */) { return false; }
// TODO: (?) optimize check.
nextLine = startLine + 1;
while (nextLine < state.lineMax && !isEmpty(state, nextLine)) {
nextLine++;
}
if (!html_tag.test(state.src.slice(pos, state.eMarks[nextLine - 1]).replace(/\n/g,' '))) {
return false;
}
if (silent) { return true; }
state.tokens.push({
type: 'html',
content: getLines(state, startLine, nextLine, 0, true)
});
state.line = nextLine;
return true;
};
module.exports.HTML_TAG_RE = HTML_TAG_RE;

172
lib/common/url_schemas.js

@ -0,0 +1,172 @@
// List of valid url schemas, accorting to stmd spec
// http://jgm.github.io/stmd/spec.html#autolinks
'use strict';
module.exports = [
'coap',
'doi',
'javascript',
'aaa',
'aaas',
'about',
'acap',
'cap',
'cid',
'crid',
'data',
'dav',
'dict',
'dns',
'file',
'ftp',
'geo',
'go',
'gopher',
'h323',
'http',
'https',
'iax',
'icap',
'im',
'imap',
'info',
'ipp',
'iris',
'iris.beep',
'iris.xpc',
'iris.xpcs',
'iris.lwz',
'ldap',
'mailto',
'mid',
'msrp',
'msrps',
'mtqp',
'mupdate',
'news',
'nfs',
'ni',
'nih',
'nntp',
'opaquelocktoken',
'pop',
'pres',
'rtsp',
'service',
'session',
'shttp',
'sieve',
'sip',
'sips',
'sms',
'snmp',
'soap.beep',
'soap.beeps',
'tag',
'tel',
'telnet',
'tftp',
'thismessage',
'tn3270',
'tip',
'tv',
'urn',
'vemmi',
'ws',
'wss',
'xcon',
'xcon-userid',
'xmlrpc.beep',
'xmlrpc.beeps',
'xmpp',
'z39.50r',
'z39.50s',
'adiumxtra',
'afp',
'afs',
'aim',
'apt',
'attachment',
'aw',
'beshare',
'bitcoin',
'bolo',
'callto',
'chrome',
'chrome-extension',
'com-eventbrite-attendee',
'content',
'cvs',
'dlna-playsingle',
'dlna-playcontainer',
'dtn',
'dvb',
'ed2k',
'facetime',
'feed',
'finger',
'fish',
'gg',
'git',
'gizmoproject',
'gtalk',
'hcp',
'icon',
'ipn',
'irc',
'irc6',
'ircs',
'itms',
'jar',
'jms',
'keyparc',
'lastfm',
'ldaps',
'magnet',
'maps',
'market',
'message',
'mms',
'ms-help',
'msnim',
'mumble',
'mvn',
'notes',
'oid',
'palm',
'paparazzi',
'platform',
'proxy',
'psyc',
'query',
'res',
'resource',
'rmi',
'rsync',
'rtmp',
'secondlife',
'sftp',
'sgn',
'skype',
'smb',
'soldat',
'spotify',
'ssh',
'steam',
'svn',
'teamspeak',
'things',
'udp',
'unreal',
'ut2004',
'ventrilo',
'view-source',
'webcal',
'wtai',
'wyciwyg',
'xfire',
'xri',
'ymsgr'
];

2
lib/lexer_block.js

@ -19,7 +19,7 @@ rules.push(require('./lexer_block/hr'));
rules.push(require('./lexer_block/list'));
rules.push(require('./lexer_block/heading'));
rules.push(require('./lexer_block/lheading'));
rules.push(require('./lexer_block/html'));
rules.push(require('./lexer_block/htmlblock'));
rules.push(require('./lexer_block/table'));
rules.push(require('./lexer_block/paragraph'));

75
lib/lexer_block/htmlblock.js

@ -0,0 +1,75 @@
// HTML block
'use strict';
var isEmpty = require('../helpers').isEmpty;
var getLines = require('../helpers').getLines;
var block_names = require('../common/html_blocks');
var HTML_TAG_OPEN_RE = /^<([a-zA-Z]{1,15})[\s\/>]/;
var HTML_TAG_CLOSE_RE = /^<\/([a-zA-Z]{1,15})[\s>]/;
function isLetter(ch) {
/*eslint no-bitwise:0*/
var lc = ch | 0x20; // to lower case
return (lc >= 0x61/* a */) && (lc <= 0x7a/* z */);
}
module.exports = function htmlblock(state, startLine, endLine, silent) {
var ch, match, nextLine,
pos = state.bMarks[startLine],
max = state.eMarks[startLine],
shift = state.tShift[startLine];
pos += shift;
if (!state.options.html) { return false; }
if (shift > 3 || pos + 2 >= max || state.blkLevel > 0) { return false; }
if (state.src.charCodeAt(pos) !== 0x3C/* < */) { return false; }
ch = state.src.charCodeAt(pos + 1);
if (ch === 0x21/* ! */ || ch === 0x3F/* ? */) {
// Directive start / comment start / processing instruction start
if (silent) { return true; }
} else if (ch === 0x2F/* / */ || isLetter(ch)) {
// Probably start or end of tag
if (ch === 0x2F/* \ */) {
// closing tag
match = state.src.slice(pos, max).match(HTML_TAG_CLOSE_RE);
if (!match) { return false; }
} else {
// opening tag
match = state.src.slice(pos, max).match(HTML_TAG_OPEN_RE);
if (!match) { return false; }
}
// Make sure tag name is valid
if (block_names.indexOf(match[1]) < 0) { return false; }
if (silent) { return true; }
} else {
return false;
}
// If we are here - we detected HTML block.
// Let's roll down till empty line (block end).
nextLine = startLine + 1;
while (nextLine < state.lineMax && !isEmpty(state, nextLine)) {
nextLine++;
}
state.tokens.push({
type: 'html',
content: getLines(state, startLine, nextLine, 0, true)
});
state.line = nextLine;
return true;
};

2
lib/lexer_block/paragraph.js

@ -28,7 +28,7 @@ module.exports = function paragraph(state, startLine/*, endLine*/) {
// setex header can't interrupt paragraph
// if (rules_named.lheading(state, nextLine, endLine, true)) { break; }
if (rules_named.blockquote(state, nextLine, endLine, true)) { break; }
if (rules_named.html(state, nextLine, endLine, true)) { break; }
if (rules_named.htmlblock(state, nextLine, endLine, true)) { break; }
if (rules_named.table(state, nextLine, endLine, true)) { break; }
//if (rules_named.tag(state, nextLine, endLine, true)) { break; }
//if (rules_named.def(state, nextLine, endLine, true)) { break; }

1
test/stmd.js

@ -11,6 +11,7 @@ var Remarked = require('../');
describe('stmd', function () {
var md = new Remarked({
html: true,
xhtml: true,
codeLangPrefix: 'language-'
});

Loading…
Cancel
Save