Browse Source

Parse link reference definitions

pull/14/head
Alex Kocharin 10 years ago
parent
commit
45da119338
  1. 5
      lib/lexer_block.js
  2. 24
      lib/lexer_block/paragraph.js
  3. 4
      lib/lexer_block/state_block.js
  4. 61
      lib/lexer_inline.js
  5. 283
      lib/lexer_inline/links.js
  6. 3
      lib/lexer_inline/state_inline.js
  7. 12
      lib/parser.js

5
lib/lexer_block.js

@ -163,7 +163,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
}; };
LexerBlock.prototype.parse = function (src, options) { LexerBlock.prototype.parse = function (src, options, env) {
var state, lineStart = 0, lastTabPos = 0; var state, lineStart = 0, lastTabPos = 0;
if (!src) { return ''; } if (!src) { return ''; }
@ -201,7 +201,8 @@ LexerBlock.prototype.parse = function (src, options) {
src, src,
this, this,
[], [],
options options,
env
); );
this.tokenize(state, state.line, state.lineMax); this.tokenize(state, state.line, state.lineMax);

24
lib/lexer_block/paragraph.js

@ -8,7 +8,7 @@ var getLines = require('../helpers').getLines;
module.exports = function paragraph(state, startLine/*, endLine*/) { module.exports = function paragraph(state, startLine/*, endLine*/) {
var endLine, var endLine, content, ref, t,
nextLine = startLine + 1, nextLine = startLine + 1,
rules_named = state.lexer.rules_named; rules_named = state.lexer.rules_named;
@ -34,12 +34,22 @@ module.exports = function paragraph(state, startLine/*, endLine*/) {
//if (rules_named.def(state, nextLine, endLine, true)) { break; } //if (rules_named.def(state, nextLine, endLine, true)) { break; }
} }
state.tokens.push({ type: 'paragraph_open' }); content = getLines(state, startLine, nextLine, state.blkIndent, false).trim();
state.tokens.push({
type: 'inline', while ((ref = state.lexer.inline.parse_reference(content, state.options, state.env))) {
content: getLines(state, startLine, nextLine, state.blkIndent, false).trim() t = state.env.references;
}); t[ref.label] = t[ref.label] || { title: ref.title, href: ref.href };
state.tokens.push({ type: 'paragraph_close' }); content = ref.remaining;
}
if (content) {
state.tokens.push({ type: 'paragraph_open' });
state.tokens.push({
type: 'inline',
content: content
});
state.tokens.push({ type: 'paragraph_close' });
}
state.line = nextLine; state.line = nextLine;
return true; return true;

4
lib/lexer_block/state_block.js

@ -3,7 +3,7 @@
'use strict'; 'use strict';
function State(src, lexer, tokens, options) { function State(src, lexer, tokens, options, env) {
var ch, s, start, pos, len, indent, indent_found; var ch, s, start, pos, len, indent, indent_found;
// TODO: check if we can move string replaces to parser, to avoid // TODO: check if we can move string replaces to parser, to avoid
@ -23,6 +23,8 @@ function State(src, lexer, tokens, options) {
// TODO: (?) set directly for faster access. // TODO: (?) set directly for faster access.
this.options = options; this.options = options;
this.env = env;
// //
// Internal state vartiables // Internal state vartiables
// //

61
lib/lexer_inline.js

@ -4,6 +4,8 @@
var StateInline = require('./lexer_inline/state_inline'); var StateInline = require('./lexer_inline/state_inline');
var links = require('./lexer_inline/links');
var skipSpaces = require('./helpers').skipSpaces;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer rules // Lexer rules
@ -154,13 +156,68 @@ LexerInline.prototype.tokenize = function (state) {
// Parse input string. // Parse input string.
// //
LexerInline.prototype.parse = function (str, options) { LexerInline.prototype.parse = function (str, options, env) {
var state = new StateInline(str, this, options); var state = new StateInline(str, this, options, env);
this.tokenize(state); this.tokenize(state);
return state.tokens; return state.tokens;
}; };
// Parse link reference definition.
//
LexerInline.prototype.parse_reference = function (str, options) {
var state, labelEnd, pos, max, code, start, href, title;
if (str.charCodeAt(0) !== 0x5B/* [ */) { return null; }
state = new StateInline(str, this, options);
labelEnd = links.parseLinkLabel(state, 0);
if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return null; }
max = state.posMax;
// [label]: destination 'title'
// ^^^ skip optional whitespace here
for (pos = labelEnd + 2; pos < max; pos++) {
code = state.src.charCodeAt(pos);
if (code !== 0x20 && code !== 0x0A) { break; }
}
// [label]: destination 'title'
// ^^^^^^^^^^^ parse this
href = links.parseLinkDestination(state, pos);
if (href === null) { return null; }
pos = state.pos;
// [label]: destination 'title'
// ^^^ skipping those spaces
start = pos;
for (pos = pos + 1; pos < max; pos++) {
code = state.src.charCodeAt(pos);
if (code !== 0x20 && code !== 0x0A) { break; }
}
// [label]: destination 'title'
// ^^^^^^^ parse this
if (pos < max && start !== pos && (title = links.parseLinkTitle(state, pos)) !== null) {
pos = state.pos;
} else {
title = '';
}
// ensure that the end of the line is empty
pos = skipSpaces(state, pos);
if (pos < max && state.src.charCodeAt(pos) !== 0x0A) { return null; }
return {
label: str.slice(1, labelEnd).trim().replace(/\s+/g, ' '),
title: title,
href: href,
remaining: str.slice(pos)
};
};
module.exports = LexerInline; module.exports = LexerInline;

283
lib/lexer_inline/links.js

@ -5,39 +5,18 @@
var skipSpaces = require('../helpers').skipSpaces; var skipSpaces = require('../helpers').skipSpaces;
//
module.exports = function links(state) { // Parse link label
var oldLength, //
oldPending, // this function assumes that first character ("[") already matches;
level, // returns the end of the label
rules, function parseLinkLabel(state, start) {
len, var level, rules, len, found, marker, i, ok,
i, labelEnd = -1,
ok,
found,
labelStart,
labelEnd,
href,
title,
pos,
code,
isImage = false,
max = state.posMax, max = state.posMax,
start = state.pos, oldPos = state.pos,
marker = state.src.charCodeAt(start); oldLength = state.tokens.length,
oldPending = state.pending;
if (marker === 0x21/* ! */) {
isImage = true;
marker = state.src.charCodeAt(++start);
}
if (marker !== 0x5B/* [ */) { return false; }
//
// Parse link label
//
oldLength = state.tokens.length;
oldPending = state.pending;
state.pos = start + 1; state.pos = start + 1;
level = 1; level = 1;
@ -60,7 +39,7 @@ module.exports = function links(state) {
// skip emphasis because it has lower priority, compare: // skip emphasis because it has lower priority, compare:
// [foo *bar]()* // [foo *bar]()*
// [foo `bar]()` // [foo `bar]()`
if (rules[i].name !== 'emphasis' && rules[i] !== links) { if (rules[i].name !== 'emphasis' && rules[i].name !== 'links') {
ok = rules[i](state); ok = rules[i](state);
} }
if (ok) { break; } if (ok) { break; }
@ -69,38 +48,34 @@ module.exports = function links(state) {
if (!ok) { state.pending += state.src[state.pos++]; } if (!ok) { state.pending += state.src[state.pos++]; }
} }
if (found) { labelEnd = state.pos; }
// restore old state // restore old state
labelStart = start + 1; state.pos = oldPos;
labelEnd = state.pos;
state.pos = start;
state.tokens.length = oldLength; state.tokens.length = oldLength;
state.pending = oldPending; state.pending = oldPending;
// parser failed to find ']', so it's not a valid link return labelEnd;
if (!found) { return false; } }
// //
// Parse link destination and title // Parse link destination
// //
pos = labelEnd + 1; // on success it returns a string and updates state.pos;
href = title = ''; // on failure it returns null
if (pos >= max || state.src.charCodeAt(pos) !== 0x28/* ( */) { return false; } function parseLinkDestination(state, pos) {
var code, level,
// [link]( <href> "title" ) max = state.posMax,
// ^^ skipping these spaces href = '';
pos++;
if ((pos = skipSpaces(state, pos)) >= max) { return false; }
// [link]( <href> "title" )
// ^^^^^^ parsing link destination
if (state.src.charCodeAt(pos) === 0x3C /* < */) { if (state.src.charCodeAt(pos) === 0x3C /* < */) {
pos++; pos++;
while (pos < max) { while (pos < max) {
code = state.src.charCodeAt(pos); code = state.src.charCodeAt(pos);
if (code === 0x0A /* \n */) { return false; } if (code === 0x0A /* \n */) { return null; }
if (code === 0x3E /* > */) { if (code === 0x3E /* > */) {
pos++; state.pos = pos + 1;
break; return href;
} }
if (code === 0x5C /* \ */) { if (code === 0x5C /* \ */) {
pos++; pos++;
@ -110,74 +85,161 @@ module.exports = function links(state) {
href += state.src[pos++]; href += state.src[pos++];
} }
} else {
level = 0;
while (pos < max) {
code = state.src.charCodeAt(pos);
if (code === 0x20) { break; } // no closing '>'
return null;
}
// ascii control characters // this should be ... } else { ... branch
if (code < 0x20 || code === 0x7F) { return false; }
if (code === 0x5C /* \ */) { level = 0;
pos++; while (pos < max) {
href += state.src[pos++]; code = state.src.charCodeAt(pos);
continue;
}
if (code === 0x28 /* ( */) { if (code === 0x20) { break; }
level++;
if (level > 1) { return false; }
}
if (code === 0x29 /* ) */) { // ascii control characters
level--; if (code < 0x20 || code === 0x7F) { return null; }
if (level < 0) {
break;
}
}
if (code === 0x5C /* \ */) {
pos++;
href += state.src[pos++]; href += state.src[pos++];
continue;
} }
if (code === 0x28 /* ( */) {
level++;
if (level > 1) { return null; }
}
if (code === 0x29 /* ) */) {
level--;
if (level < 0) {
break;
}
}
href += state.src[pos++];
} }
// [link]( <href> "title" ) if (!href.length) { return null; }
// ^^ skipping these spaces
start = pos;
if ((pos = skipSpaces(state, pos)) >= max) { return false; }
// [link]( <href> "title" ) state.pos = pos;
// ^^^^^^^ parsing link title return href;
marker = state.src.charCodeAt(pos); }
if (start !== pos) {
if (marker === 0x22 /* " */ || marker === 0x27 /* ' */ || marker === 0x28 /* ( */) {
pos++;
// if opening marker is "(", switch it to closing marker ")" //
if (marker === 0x28) { marker = 0x29; } // Parse link title
//
while (pos < max) { // on success it returns a string and updates state.pos;
code = state.src.charCodeAt(pos); // on failure it returns null
if (code === marker) { function parseLinkTitle(state, pos) {
pos++; var title, code,
break; max = state.posMax,
} marker = state.src.charCodeAt(pos);
if (code === 0x5C /* \ */) {
pos++; if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return null; }
title += state.src[pos++];
continue; pos++;
} title = '';
title += state.src[pos++]; // if opening marker is "(", switch it to closing marker ")"
} if (marker === 0x28) { marker = 0x29; }
while (pos < max) {
code = state.src.charCodeAt(pos);
if (code === marker) {
state.pos = pos + 1;
return title;
} }
if (code === 0x5C /* \ */) {
pos++;
title += state.src[pos++];
continue;
}
title += state.src[pos++];
}
return null;
}
function links(state) {
var labelStart,
labelEnd,
href,
title,
pos,
ref,
isImage = false,
max = state.posMax,
start = state.pos,
marker = state.src.charCodeAt(start);
if (marker === 0x21/* ! */) {
isImage = true;
marker = state.src.charCodeAt(++start);
} }
// [link]( <href> "title" ) if (marker !== 0x5B/* [ */) { return false; }
// ^^ skipping these spaces
if ((pos = skipSpaces(state, pos)) >= max) { return false; } labelStart = start + 1;
if (state.src.charCodeAt(pos) !== 0x29/* ) */) { return false; } labelEnd = parseLinkLabel(state, start);
// parser failed to find ']', so it's not a valid link
if (pos < 0) { return false; }
pos = labelEnd + 1;
if (pos < max && state.src.charCodeAt(pos) === 0x28/* ( */) {
//
// Inline link
//
// [link]( <href> "title" )
// ^^ skipping these spaces
pos++;
if ((pos = skipSpaces(state, pos)) >= max) { return false; }
// [link]( <href> "title" )
// ^^^^^^ parsing link destination
start = pos;
href = parseLinkDestination(state, pos);
if (href !== null) {
pos = state.pos;
} else {
href = '';
}
// [link]( <href> "title" )
// ^^ skipping these spaces
start = pos;
pos = skipSpaces(state, pos);
// [link]( <href> "title" )
// ^^^^^^^ parsing link title
if (pos < max && start !== pos && (title = parseLinkTitle(state, pos)) !== null) {
pos = state.pos;
// [link]( <href> "title" )
// ^^ skipping these spaces
pos = skipSpaces(state, pos);
} else {
title = '';
}
if (pos >= max || state.src.charCodeAt(pos) !== 0x29/* ) */) {
state.pos = labelStart - 1;
return false;
}
} else {
//
// Link reference
//
ref = state.env.references[state.src.slice(labelStart, labelEnd).trim().replace(/\s+/g, ' ')];
if (!ref) { return false; }
href = ref.href;
title = ref.title;
}
// //
// We found the end of the link, and know for a fact it's a valid link; // We found the end of the link, and know for a fact it's a valid link;
@ -201,4 +263,9 @@ module.exports = function links(state) {
state.pos = pos + 1; state.pos = pos + 1;
state.posMax = max; state.posMax = max;
return true; return true;
}; }
module.exports = links;
module.exports.parseLinkLabel = parseLinkLabel;
module.exports.parseLinkDestination = parseLinkDestination;
module.exports.parseLinkTitle = parseLinkTitle;

3
lib/lexer_inline/state_inline.js

@ -3,8 +3,9 @@
'use strict'; 'use strict';
function StateInline(src, lexer, options) { function StateInline(src, lexer, options, env) {
this.src = src; this.src = src;
this.env = env;
this.options = options; this.options = options;
this.lexer = lexer; this.lexer = lexer;
this.tokens = []; this.tokens = [];

12
lib/parser.js

@ -21,6 +21,10 @@ function Parser(options) {
this.block = new LexerBlock(); this.block = new LexerBlock();
this.renderer = new Renderer(); this.renderer = new Renderer();
// a bunch of cross-references between parsers
// used for link reference definitions
this.block.inline = this.inline;
if (options) { this.set(options); } if (options) { this.set(options); }
} }
@ -31,21 +35,21 @@ Parser.prototype.set = function (options) {
Parser.prototype.render = function (src) { Parser.prototype.render = function (src) {
var tokens, tok, i, l; var tokens, tok, i, l, env = { references: Object.create(null) };
// Parse blocks // Parse blocks
tokens = this.block.parse(src, this.options); tokens = this.block.parse(src, this.options, env);
// Parse inlines // Parse inlines
for (i = 0, l = tokens.length; i < l; i++) { for (i = 0, l = tokens.length; i < l; i++) {
tok = tokens[i]; tok = tokens[i];
if (tok.type === 'inline') { if (tok.type === 'inline') {
tok.children = this.inline.parse(tok.content, this.options); tok.children = this.inline.parse(tok.content, this.options, env);
} }
} }
// Render // Render
return this.renderer.render(tokens, this.options); return this.renderer.render(tokens, this.options, env);
}; };

Loading…
Cancel
Save