From 45da119338f34cf10c094017a7be92645a9932eb Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Sun, 28 Sep 2014 16:49:14 +0400 Subject: [PATCH] Parse link reference definitions --- lib/lexer_block.js | 5 +- lib/lexer_block/paragraph.js | 24 ++- lib/lexer_block/state_block.js | 4 +- lib/lexer_inline.js | 61 ++++++- lib/lexer_inline/links.js | 283 +++++++++++++++++++------------ lib/lexer_inline/state_inline.js | 3 +- lib/parser.js | 12 +- 7 files changed, 267 insertions(+), 125 deletions(-) diff --git a/lib/lexer_block.js b/lib/lexer_block.js index 206d62a..d32bd64 100644 --- a/lib/lexer_block.js +++ b/lib/lexer_block.js @@ -163,7 +163,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) { }; -LexerBlock.prototype.parse = function (src, options) { +LexerBlock.prototype.parse = function (src, options, env) { var state, lineStart = 0, lastTabPos = 0; if (!src) { return ''; } @@ -201,7 +201,8 @@ LexerBlock.prototype.parse = function (src, options) { src, this, [], - options + options, + env ); this.tokenize(state, state.line, state.lineMax); diff --git a/lib/lexer_block/paragraph.js b/lib/lexer_block/paragraph.js index 5175a95..fb5d316 100644 --- a/lib/lexer_block/paragraph.js +++ b/lib/lexer_block/paragraph.js @@ -8,7 +8,7 @@ var getLines = require('../helpers').getLines; module.exports = function paragraph(state, startLine/*, endLine*/) { - var endLine, + var endLine, content, ref, t, nextLine = startLine + 1, rules_named = state.lexer.rules_named; @@ -34,12 +34,22 @@ module.exports = function paragraph(state, startLine/*, endLine*/) { //if (rules_named.def(state, nextLine, endLine, true)) { break; } } - state.tokens.push({ type: 'paragraph_open' }); - state.tokens.push({ - type: 'inline', - content: getLines(state, startLine, nextLine, state.blkIndent, false).trim() - }); - state.tokens.push({ type: 'paragraph_close' }); + content = getLines(state, startLine, nextLine, state.blkIndent, false).trim(); + + while ((ref = state.lexer.inline.parse_reference(content, state.options, state.env))) { + t = state.env.references; + t[ref.label] = t[ref.label] || { title: ref.title, href: ref.href }; + content = ref.remaining; + } + + if (content) { + state.tokens.push({ type: 'paragraph_open' }); + state.tokens.push({ + type: 'inline', + content: content + }); + state.tokens.push({ type: 'paragraph_close' }); + } state.line = nextLine; return true; diff --git a/lib/lexer_block/state_block.js b/lib/lexer_block/state_block.js index 08d356b..047fdaa 100644 --- a/lib/lexer_block/state_block.js +++ b/lib/lexer_block/state_block.js @@ -3,7 +3,7 @@ 'use strict'; -function State(src, lexer, tokens, options) { +function State(src, lexer, tokens, options, env) { var ch, s, start, pos, len, indent, indent_found; // TODO: check if we can move string replaces to parser, to avoid @@ -23,6 +23,8 @@ function State(src, lexer, tokens, options) { // TODO: (?) set directly for faster access. this.options = options; + this.env = env; + // // Internal state vartiables // diff --git a/lib/lexer_inline.js b/lib/lexer_inline.js index e9c8755..a6875b3 100644 --- a/lib/lexer_inline.js +++ b/lib/lexer_inline.js @@ -4,6 +4,8 @@ var StateInline = require('./lexer_inline/state_inline'); +var links = require('./lexer_inline/links'); +var skipSpaces = require('./helpers').skipSpaces; //////////////////////////////////////////////////////////////////////////////// // Lexer rules @@ -154,13 +156,68 @@ LexerInline.prototype.tokenize = function (state) { // Parse input string. // -LexerInline.prototype.parse = function (str, options) { - var state = new StateInline(str, this, options); +LexerInline.prototype.parse = function (str, options, env) { + var state = new StateInline(str, this, options, env); this.tokenize(state); return state.tokens; }; +// Parse link reference definition. +// +LexerInline.prototype.parse_reference = function (str, options) { + var state, labelEnd, pos, max, code, start, href, title; + + if (str.charCodeAt(0) !== 0x5B/* [ */) { return null; } + + state = new StateInline(str, this, options); + labelEnd = links.parseLinkLabel(state, 0); + + if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return null; } + + max = state.posMax; + + // [label]: destination 'title' + // ^^^ skip optional whitespace here + for (pos = labelEnd + 2; pos < max; pos++) { + code = state.src.charCodeAt(pos); + if (code !== 0x20 && code !== 0x0A) { break; } + } + + // [label]: destination 'title' + // ^^^^^^^^^^^ parse this + href = links.parseLinkDestination(state, pos); + if (href === null) { return null; } + pos = state.pos; + + // [label]: destination 'title' + // ^^^ skipping those spaces + start = pos; + for (pos = pos + 1; pos < max; pos++) { + code = state.src.charCodeAt(pos); + if (code !== 0x20 && code !== 0x0A) { break; } + } + + // [label]: destination 'title' + // ^^^^^^^ parse this + if (pos < max && start !== pos && (title = links.parseLinkTitle(state, pos)) !== null) { + pos = state.pos; + } else { + title = ''; + } + + // ensure that the end of the line is empty + pos = skipSpaces(state, pos); + if (pos < max && state.src.charCodeAt(pos) !== 0x0A) { return null; } + + return { + label: str.slice(1, labelEnd).trim().replace(/\s+/g, ' '), + title: title, + href: href, + remaining: str.slice(pos) + }; +}; + module.exports = LexerInline; diff --git a/lib/lexer_inline/links.js b/lib/lexer_inline/links.js index 9b32994..e1a30e3 100644 --- a/lib/lexer_inline/links.js +++ b/lib/lexer_inline/links.js @@ -5,39 +5,18 @@ var skipSpaces = require('../helpers').skipSpaces; - -module.exports = function links(state) { - var oldLength, - oldPending, - level, - rules, - len, - i, - ok, - found, - labelStart, - labelEnd, - href, - title, - pos, - code, - isImage = false, +// +// Parse link label +// +// this function assumes that first character ("[") already matches; +// returns the end of the label +function parseLinkLabel(state, start) { + var level, rules, len, found, marker, i, ok, + labelEnd = -1, max = state.posMax, - start = state.pos, - marker = state.src.charCodeAt(start); - - if (marker === 0x21/* ! */) { - isImage = true; - marker = state.src.charCodeAt(++start); - } - - if (marker !== 0x5B/* [ */) { return false; } - - // - // Parse link label - // - oldLength = state.tokens.length; - oldPending = state.pending; + oldPos = state.pos, + oldLength = state.tokens.length, + oldPending = state.pending; state.pos = start + 1; level = 1; @@ -60,7 +39,7 @@ module.exports = function links(state) { // skip emphasis because it has lower priority, compare: // [foo *bar]()* // [foo `bar]()` - if (rules[i].name !== 'emphasis' && rules[i] !== links) { + if (rules[i].name !== 'emphasis' && rules[i].name !== 'links') { ok = rules[i](state); } if (ok) { break; } @@ -69,38 +48,34 @@ module.exports = function links(state) { if (!ok) { state.pending += state.src[state.pos++]; } } + if (found) { labelEnd = state.pos; } + // restore old state - labelStart = start + 1; - labelEnd = state.pos; - state.pos = start; + state.pos = oldPos; state.tokens.length = oldLength; state.pending = oldPending; - // parser failed to find ']', so it's not a valid link - if (!found) { return false; } + return labelEnd; +} - // - // Parse link destination and title - // - pos = labelEnd + 1; - href = title = ''; - if (pos >= max || state.src.charCodeAt(pos) !== 0x28/* ( */) { return false; } - - // [link]( "title" ) - // ^^ skipping these spaces - pos++; - if ((pos = skipSpaces(state, pos)) >= max) { return false; } +// +// Parse link destination +// +// on success it returns a string and updates state.pos; +// on failure it returns null +function parseLinkDestination(state, pos) { + var code, level, + max = state.posMax, + href = ''; - // [link]( "title" ) - // ^^^^^^ parsing link destination if (state.src.charCodeAt(pos) === 0x3C /* < */) { pos++; while (pos < max) { code = state.src.charCodeAt(pos); - if (code === 0x0A /* \n */) { return false; } + if (code === 0x0A /* \n */) { return null; } if (code === 0x3E /* > */) { - pos++; - break; + state.pos = pos + 1; + return href; } if (code === 0x5C /* \ */) { pos++; @@ -110,74 +85,161 @@ module.exports = function links(state) { href += state.src[pos++]; } - } else { - level = 0; - while (pos < max) { - code = state.src.charCodeAt(pos); - if (code === 0x20) { break; } + // no closing '>' + return null; + } - // ascii control characters - if (code < 0x20 || code === 0x7F) { return false; } + // this should be ... } else { ... branch - if (code === 0x5C /* \ */) { - pos++; - href += state.src[pos++]; - continue; - } + level = 0; + while (pos < max) { + code = state.src.charCodeAt(pos); - if (code === 0x28 /* ( */) { - level++; - if (level > 1) { return false; } - } + if (code === 0x20) { break; } - if (code === 0x29 /* ) */) { - level--; - if (level < 0) { - break; - } - } + // ascii control characters + if (code < 0x20 || code === 0x7F) { return null; } + if (code === 0x5C /* \ */) { + pos++; href += state.src[pos++]; + continue; } + + if (code === 0x28 /* ( */) { + level++; + if (level > 1) { return null; } + } + + if (code === 0x29 /* ) */) { + level--; + if (level < 0) { + break; + } + } + + href += state.src[pos++]; } - // [link]( "title" ) - // ^^ skipping these spaces - start = pos; - if ((pos = skipSpaces(state, pos)) >= max) { return false; } + if (!href.length) { return null; } - // [link]( "title" ) - // ^^^^^^^ parsing link title - marker = state.src.charCodeAt(pos); - if (start !== pos) { - if (marker === 0x22 /* " */ || marker === 0x27 /* ' */ || marker === 0x28 /* ( */) { - pos++; + state.pos = pos; + return href; +} - // if opening marker is "(", switch it to closing marker ")" - if (marker === 0x28) { marker = 0x29; } - - while (pos < max) { - code = state.src.charCodeAt(pos); - if (code === marker) { - pos++; - break; - } - if (code === 0x5C /* \ */) { - pos++; - title += state.src[pos++]; - continue; - } - - title += state.src[pos++]; - } +// +// Parse link title +// +// on success it returns a string and updates state.pos; +// on failure it returns null +function parseLinkTitle(state, pos) { + var title, code, + max = state.posMax, + marker = state.src.charCodeAt(pos); + + if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return null; } + + pos++; + title = ''; + + // if opening marker is "(", switch it to closing marker ")" + if (marker === 0x28) { marker = 0x29; } + + while (pos < max) { + code = state.src.charCodeAt(pos); + if (code === marker) { + state.pos = pos + 1; + return title; } + if (code === 0x5C /* \ */) { + pos++; + title += state.src[pos++]; + continue; + } + + title += state.src[pos++]; + } + + return null; +} + +function links(state) { + var labelStart, + labelEnd, + href, + title, + pos, + ref, + isImage = false, + max = state.posMax, + start = state.pos, + marker = state.src.charCodeAt(start); + + if (marker === 0x21/* ! */) { + isImage = true; + marker = state.src.charCodeAt(++start); } - // [link]( "title" ) - // ^^ skipping these spaces - if ((pos = skipSpaces(state, pos)) >= max) { return false; } - if (state.src.charCodeAt(pos) !== 0x29/* ) */) { return false; } + if (marker !== 0x5B/* [ */) { return false; } + + labelStart = start + 1; + labelEnd = parseLinkLabel(state, start); + + // parser failed to find ']', so it's not a valid link + if (pos < 0) { return false; } + + pos = labelEnd + 1; + if (pos < max && state.src.charCodeAt(pos) === 0x28/* ( */) { + // + // Inline link + // + + // [link]( "title" ) + // ^^ skipping these spaces + pos++; + if ((pos = skipSpaces(state, pos)) >= max) { return false; } + + // [link]( "title" ) + // ^^^^^^ parsing link destination + start = pos; + href = parseLinkDestination(state, pos); + if (href !== null) { + pos = state.pos; + } else { + href = ''; + } + + // [link]( "title" ) + // ^^ skipping these spaces + start = pos; + pos = skipSpaces(state, pos); + + // [link]( "title" ) + // ^^^^^^^ parsing link title + if (pos < max && start !== pos && (title = parseLinkTitle(state, pos)) !== null) { + pos = state.pos; + + // [link]( "title" ) + // ^^ skipping these spaces + pos = skipSpaces(state, pos); + } else { + title = ''; + } + + if (pos >= max || state.src.charCodeAt(pos) !== 0x29/* ) */) { + state.pos = labelStart - 1; + return false; + } + } else { + // + // Link reference + // + ref = state.env.references[state.src.slice(labelStart, labelEnd).trim().replace(/\s+/g, ' ')]; + if (!ref) { return false; } + href = ref.href; + title = ref.title; + } // // We found the end of the link, and know for a fact it's a valid link; @@ -201,4 +263,9 @@ module.exports = function links(state) { state.pos = pos + 1; state.posMax = max; return true; -}; +} + +module.exports = links; +module.exports.parseLinkLabel = parseLinkLabel; +module.exports.parseLinkDestination = parseLinkDestination; +module.exports.parseLinkTitle = parseLinkTitle; diff --git a/lib/lexer_inline/state_inline.js b/lib/lexer_inline/state_inline.js index 66ddcc9..e18750f 100644 --- a/lib/lexer_inline/state_inline.js +++ b/lib/lexer_inline/state_inline.js @@ -3,8 +3,9 @@ 'use strict'; -function StateInline(src, lexer, options) { +function StateInline(src, lexer, options, env) { this.src = src; + this.env = env; this.options = options; this.lexer = lexer; this.tokens = []; diff --git a/lib/parser.js b/lib/parser.js index 239628d..44f0a97 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -21,6 +21,10 @@ function Parser(options) { this.block = new LexerBlock(); this.renderer = new Renderer(); + // a bunch of cross-references between parsers + // used for link reference definitions + this.block.inline = this.inline; + if (options) { this.set(options); } } @@ -31,21 +35,21 @@ Parser.prototype.set = function (options) { Parser.prototype.render = function (src) { - var tokens, tok, i, l; + var tokens, tok, i, l, env = { references: Object.create(null) }; // Parse blocks - tokens = this.block.parse(src, this.options); + tokens = this.block.parse(src, this.options, env); // Parse inlines for (i = 0, l = tokens.length; i < l; i++) { tok = tokens[i]; if (tok.type === 'inline') { - tok.children = this.inline.parse(tok.content, this.options); + tok.children = this.inline.parse(tok.content, this.options, env); } } // Render - return this.renderer.render(tokens, this.options); + return this.renderer.render(tokens, this.options, env); };