Browse Source

Parse link reference definitions

pull/14/head
Alex Kocharin 10 years ago
parent
commit
45da119338
  1. 5
      lib/lexer_block.js
  2. 24
      lib/lexer_block/paragraph.js
  3. 4
      lib/lexer_block/state_block.js
  4. 61
      lib/lexer_inline.js
  5. 283
      lib/lexer_inline/links.js
  6. 3
      lib/lexer_inline/state_inline.js
  7. 12
      lib/parser.js

5
lib/lexer_block.js

@ -163,7 +163,7 @@ LexerBlock.prototype.tokenize = function (state, startLine, endLine) {
};
LexerBlock.prototype.parse = function (src, options) {
LexerBlock.prototype.parse = function (src, options, env) {
var state, lineStart = 0, lastTabPos = 0;
if (!src) { return ''; }
@ -201,7 +201,8 @@ LexerBlock.prototype.parse = function (src, options) {
src,
this,
[],
options
options,
env
);
this.tokenize(state, state.line, state.lineMax);

24
lib/lexer_block/paragraph.js

@ -8,7 +8,7 @@ var getLines = require('../helpers').getLines;
module.exports = function paragraph(state, startLine/*, endLine*/) {
var endLine,
var endLine, content, ref, t,
nextLine = startLine + 1,
rules_named = state.lexer.rules_named;
@ -34,12 +34,22 @@ module.exports = function paragraph(state, startLine/*, endLine*/) {
//if (rules_named.def(state, nextLine, endLine, true)) { break; }
}
state.tokens.push({ type: 'paragraph_open' });
state.tokens.push({
type: 'inline',
content: getLines(state, startLine, nextLine, state.blkIndent, false).trim()
});
state.tokens.push({ type: 'paragraph_close' });
content = getLines(state, startLine, nextLine, state.blkIndent, false).trim();
while ((ref = state.lexer.inline.parse_reference(content, state.options, state.env))) {
t = state.env.references;
t[ref.label] = t[ref.label] || { title: ref.title, href: ref.href };
content = ref.remaining;
}
if (content) {
state.tokens.push({ type: 'paragraph_open' });
state.tokens.push({
type: 'inline',
content: content
});
state.tokens.push({ type: 'paragraph_close' });
}
state.line = nextLine;
return true;

4
lib/lexer_block/state_block.js

@ -3,7 +3,7 @@
'use strict';
function State(src, lexer, tokens, options) {
function State(src, lexer, tokens, options, env) {
var ch, s, start, pos, len, indent, indent_found;
// TODO: check if we can move string replaces to parser, to avoid
@ -23,6 +23,8 @@ function State(src, lexer, tokens, options) {
// TODO: (?) set directly for faster access.
this.options = options;
this.env = env;
//
// Internal state vartiables
//

61
lib/lexer_inline.js

@ -4,6 +4,8 @@
var StateInline = require('./lexer_inline/state_inline');
var links = require('./lexer_inline/links');
var skipSpaces = require('./helpers').skipSpaces;
////////////////////////////////////////////////////////////////////////////////
// Lexer rules
@ -154,13 +156,68 @@ LexerInline.prototype.tokenize = function (state) {
// Parse input string.
//
LexerInline.prototype.parse = function (str, options) {
var state = new StateInline(str, this, options);
LexerInline.prototype.parse = function (str, options, env) {
var state = new StateInline(str, this, options, env);
this.tokenize(state);
return state.tokens;
};
// Parse link reference definition.
//
LexerInline.prototype.parse_reference = function (str, options) {
var state, labelEnd, pos, max, code, start, href, title;
if (str.charCodeAt(0) !== 0x5B/* [ */) { return null; }
state = new StateInline(str, this, options);
labelEnd = links.parseLinkLabel(state, 0);
if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return null; }
max = state.posMax;
// [label]: destination 'title'
// ^^^ skip optional whitespace here
for (pos = labelEnd + 2; pos < max; pos++) {
code = state.src.charCodeAt(pos);
if (code !== 0x20 && code !== 0x0A) { break; }
}
// [label]: destination 'title'
// ^^^^^^^^^^^ parse this
href = links.parseLinkDestination(state, pos);
if (href === null) { return null; }
pos = state.pos;
// [label]: destination 'title'
// ^^^ skipping those spaces
start = pos;
for (pos = pos + 1; pos < max; pos++) {
code = state.src.charCodeAt(pos);
if (code !== 0x20 && code !== 0x0A) { break; }
}
// [label]: destination 'title'
// ^^^^^^^ parse this
if (pos < max && start !== pos && (title = links.parseLinkTitle(state, pos)) !== null) {
pos = state.pos;
} else {
title = '';
}
// ensure that the end of the line is empty
pos = skipSpaces(state, pos);
if (pos < max && state.src.charCodeAt(pos) !== 0x0A) { return null; }
return {
label: str.slice(1, labelEnd).trim().replace(/\s+/g, ' '),
title: title,
href: href,
remaining: str.slice(pos)
};
};
module.exports = LexerInline;

283
lib/lexer_inline/links.js

@ -5,39 +5,18 @@
var skipSpaces = require('../helpers').skipSpaces;
module.exports = function links(state) {
var oldLength,
oldPending,
level,
rules,
len,
i,
ok,
found,
labelStart,
labelEnd,
href,
title,
pos,
code,
isImage = false,
//
// Parse link label
//
// this function assumes that first character ("[") already matches;
// returns the end of the label
function parseLinkLabel(state, start) {
var level, rules, len, found, marker, i, ok,
labelEnd = -1,
max = state.posMax,
start = state.pos,
marker = state.src.charCodeAt(start);
if (marker === 0x21/* ! */) {
isImage = true;
marker = state.src.charCodeAt(++start);
}
if (marker !== 0x5B/* [ */) { return false; }
//
// Parse link label
//
oldLength = state.tokens.length;
oldPending = state.pending;
oldPos = state.pos,
oldLength = state.tokens.length,
oldPending = state.pending;
state.pos = start + 1;
level = 1;
@ -60,7 +39,7 @@ module.exports = function links(state) {
// skip emphasis because it has lower priority, compare:
// [foo *bar]()*
// [foo `bar]()`
if (rules[i].name !== 'emphasis' && rules[i] !== links) {
if (rules[i].name !== 'emphasis' && rules[i].name !== 'links') {
ok = rules[i](state);
}
if (ok) { break; }
@ -69,38 +48,34 @@ module.exports = function links(state) {
if (!ok) { state.pending += state.src[state.pos++]; }
}
if (found) { labelEnd = state.pos; }
// restore old state
labelStart = start + 1;
labelEnd = state.pos;
state.pos = start;
state.pos = oldPos;
state.tokens.length = oldLength;
state.pending = oldPending;
// parser failed to find ']', so it's not a valid link
if (!found) { return false; }
return labelEnd;
}
//
// Parse link destination and title
//
pos = labelEnd + 1;
href = title = '';
if (pos >= max || state.src.charCodeAt(pos) !== 0x28/* ( */) { return false; }
// [link]( <href> "title" )
// ^^ skipping these spaces
pos++;
if ((pos = skipSpaces(state, pos)) >= max) { return false; }
//
// Parse link destination
//
// on success it returns a string and updates state.pos;
// on failure it returns null
function parseLinkDestination(state, pos) {
var code, level,
max = state.posMax,
href = '';
// [link]( <href> "title" )
// ^^^^^^ parsing link destination
if (state.src.charCodeAt(pos) === 0x3C /* < */) {
pos++;
while (pos < max) {
code = state.src.charCodeAt(pos);
if (code === 0x0A /* \n */) { return false; }
if (code === 0x0A /* \n */) { return null; }
if (code === 0x3E /* > */) {
pos++;
break;
state.pos = pos + 1;
return href;
}
if (code === 0x5C /* \ */) {
pos++;
@ -110,74 +85,161 @@ module.exports = function links(state) {
href += state.src[pos++];
}
} else {
level = 0;
while (pos < max) {
code = state.src.charCodeAt(pos);
if (code === 0x20) { break; }
// no closing '>'
return null;
}
// ascii control characters
if (code < 0x20 || code === 0x7F) { return false; }
// this should be ... } else { ... branch
if (code === 0x5C /* \ */) {
pos++;
href += state.src[pos++];
continue;
}
level = 0;
while (pos < max) {
code = state.src.charCodeAt(pos);
if (code === 0x28 /* ( */) {
level++;
if (level > 1) { return false; }
}
if (code === 0x20) { break; }
if (code === 0x29 /* ) */) {
level--;
if (level < 0) {
break;
}
}
// ascii control characters
if (code < 0x20 || code === 0x7F) { return null; }
if (code === 0x5C /* \ */) {
pos++;
href += state.src[pos++];
continue;
}
if (code === 0x28 /* ( */) {
level++;
if (level > 1) { return null; }
}
if (code === 0x29 /* ) */) {
level--;
if (level < 0) {
break;
}
}
href += state.src[pos++];
}
// [link]( <href> "title" )
// ^^ skipping these spaces
start = pos;
if ((pos = skipSpaces(state, pos)) >= max) { return false; }
if (!href.length) { return null; }
// [link]( <href> "title" )
// ^^^^^^^ parsing link title
marker = state.src.charCodeAt(pos);
if (start !== pos) {
if (marker === 0x22 /* " */ || marker === 0x27 /* ' */ || marker === 0x28 /* ( */) {
pos++;
state.pos = pos;
return href;
}
// if opening marker is "(", switch it to closing marker ")"
if (marker === 0x28) { marker = 0x29; }
while (pos < max) {
code = state.src.charCodeAt(pos);
if (code === marker) {
pos++;
break;
}
if (code === 0x5C /* \ */) {
pos++;
title += state.src[pos++];
continue;
}
title += state.src[pos++];
}
//
// Parse link title
//
// on success it returns a string and updates state.pos;
// on failure it returns null
function parseLinkTitle(state, pos) {
var title, code,
max = state.posMax,
marker = state.src.charCodeAt(pos);
if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return null; }
pos++;
title = '';
// if opening marker is "(", switch it to closing marker ")"
if (marker === 0x28) { marker = 0x29; }
while (pos < max) {
code = state.src.charCodeAt(pos);
if (code === marker) {
state.pos = pos + 1;
return title;
}
if (code === 0x5C /* \ */) {
pos++;
title += state.src[pos++];
continue;
}
title += state.src[pos++];
}
return null;
}
function links(state) {
var labelStart,
labelEnd,
href,
title,
pos,
ref,
isImage = false,
max = state.posMax,
start = state.pos,
marker = state.src.charCodeAt(start);
if (marker === 0x21/* ! */) {
isImage = true;
marker = state.src.charCodeAt(++start);
}
// [link]( <href> "title" )
// ^^ skipping these spaces
if ((pos = skipSpaces(state, pos)) >= max) { return false; }
if (state.src.charCodeAt(pos) !== 0x29/* ) */) { return false; }
if (marker !== 0x5B/* [ */) { return false; }
labelStart = start + 1;
labelEnd = parseLinkLabel(state, start);
// parser failed to find ']', so it's not a valid link
if (pos < 0) { return false; }
pos = labelEnd + 1;
if (pos < max && state.src.charCodeAt(pos) === 0x28/* ( */) {
//
// Inline link
//
// [link]( <href> "title" )
// ^^ skipping these spaces
pos++;
if ((pos = skipSpaces(state, pos)) >= max) { return false; }
// [link]( <href> "title" )
// ^^^^^^ parsing link destination
start = pos;
href = parseLinkDestination(state, pos);
if (href !== null) {
pos = state.pos;
} else {
href = '';
}
// [link]( <href> "title" )
// ^^ skipping these spaces
start = pos;
pos = skipSpaces(state, pos);
// [link]( <href> "title" )
// ^^^^^^^ parsing link title
if (pos < max && start !== pos && (title = parseLinkTitle(state, pos)) !== null) {
pos = state.pos;
// [link]( <href> "title" )
// ^^ skipping these spaces
pos = skipSpaces(state, pos);
} else {
title = '';
}
if (pos >= max || state.src.charCodeAt(pos) !== 0x29/* ) */) {
state.pos = labelStart - 1;
return false;
}
} else {
//
// Link reference
//
ref = state.env.references[state.src.slice(labelStart, labelEnd).trim().replace(/\s+/g, ' ')];
if (!ref) { return false; }
href = ref.href;
title = ref.title;
}
//
// We found the end of the link, and know for a fact it's a valid link;
@ -201,4 +263,9 @@ module.exports = function links(state) {
state.pos = pos + 1;
state.posMax = max;
return true;
};
}
module.exports = links;
module.exports.parseLinkLabel = parseLinkLabel;
module.exports.parseLinkDestination = parseLinkDestination;
module.exports.parseLinkTitle = parseLinkTitle;

3
lib/lexer_inline/state_inline.js

@ -3,8 +3,9 @@
'use strict';
function StateInline(src, lexer, options) {
function StateInline(src, lexer, options, env) {
this.src = src;
this.env = env;
this.options = options;
this.lexer = lexer;
this.tokens = [];

12
lib/parser.js

@ -21,6 +21,10 @@ function Parser(options) {
this.block = new LexerBlock();
this.renderer = new Renderer();
// a bunch of cross-references between parsers
// used for link reference definitions
this.block.inline = this.inline;
if (options) { this.set(options); }
}
@ -31,21 +35,21 @@ Parser.prototype.set = function (options) {
Parser.prototype.render = function (src) {
var tokens, tok, i, l;
var tokens, tok, i, l, env = { references: Object.create(null) };
// Parse blocks
tokens = this.block.parse(src, this.options);
tokens = this.block.parse(src, this.options, env);
// Parse inlines
for (i = 0, l = tokens.length; i < l; i++) {
tok = tokens[i];
if (tok.type === 'inline') {
tok.children = this.inline.parse(tok.content, this.options);
tok.children = this.inline.parse(tok.content, this.options, env);
}
}
// Render
return this.renderer.render(tokens, this.options);
return this.renderer.render(tokens, this.options, env);
};

Loading…
Cancel
Save