Browse Source

Add abbreviations

pull/14/head
Alex Kocharin 10 years ago
parent
commit
2c286f5aed
  1. 2
      lib/parser_core.js
  2. 9
      lib/renderer.js
  3. 62
      lib/rules_core/abbr.js
  4. 86
      lib/rules_core/abbr2.js
  5. 87
      test/fixtures/remarkable/abbr.txt

2
lib/parser_core.js

@ -8,8 +8,10 @@ var Ruler = require('./ruler');
var _rules = [
[ 'block', require('./rules_core/block') ],
[ 'abbr', require('./rules_core/abbr') ],
[ 'references', require('./rules_core/references') ],
[ 'inline', require('./rules_core/inline') ],
[ 'abbr2', require('./rules_core/abbr2') ],
[ 'replacements', require('./rules_core/replacements') ],
[ 'smartquotes', require('./rules_core/smartquotes') ],
[ 'linkify', require('./rules_core/linkify') ]

9
lib/renderer.js

@ -71,6 +71,15 @@ function getBreak(tokens, idx) {
var rules = {};
rules.abbr_open = function (tokens, idx/*, options*/) {
var title = tokens[idx].title ? (' title="' + escapeHtml(replaceEntities(tokens[idx].title)) + '"') : '';
return '<abbr' + title + '>';
};
rules.abbr_close = function (/*tokens, idx, options*/) {
return '</abbr>';
};
rules.blockquote_open = function (/*tokens, idx, options*/) {
return '<blockquote>\n';
};

62
lib/rules_core/abbr.js

@ -0,0 +1,62 @@
// Parse abbreviation definitions, i.e. `*[abbr]: description`
//
'use strict';
var StateInline = require('../rules_inline/state_inline');
var parseLinkLabel = require('../links').parseLinkLabel;
function parseAbbr(str, parser, options, env) {
var state, labelEnd, pos, max, label, title;
if (str.charCodeAt(0) !== 0x2A/* * */) { return -1; }
if (str.charCodeAt(1) !== 0x5B/* [ */) { return -1; }
if (str.indexOf(']:') === -1) { return -1; }
state = new StateInline(str, parser, options, env);
labelEnd = parseLinkLabel(state, 1);
if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return -1; }
max = state.posMax;
// abbr title is always one line, so looking for ending "\n" here
for (pos = labelEnd + 2; pos < max; pos++) {
if (state.src.charCodeAt(pos) === 0x0A) { break; }
}
label = str.slice(2, labelEnd);
title = str.slice(labelEnd + 2, pos).trim();
if (title.length === 0) { return -1; }
if (!env.abbreviations) { env.abbreviations = {}; }
env.abbreviations[label] = env.abbreviations[label] || title;
return pos;
}
module.exports = function abbr(state) {
var tokens = state.tokens, i, l, content, pos;
// Parse inlines
for (i = 1, l = tokens.length - 1; i < l; i++) {
if (tokens[i - 1].type === 'paragraph_open' &&
tokens[i].type === 'inline' &&
tokens[i + 1].type === 'paragraph_close') {
content = tokens[i].content;
while (content.length) {
pos = parseAbbr(content, state.inline, state.options, state.env);
if (pos < 0) { break; }
content = content.slice(pos).trim();
}
tokens[i].content = content;
if (!content.length) {
tokens[i - 1].tight = true;
tokens[i + 1].tight = true;
}
}
}
};

86
lib/rules_core/abbr2.js

@ -0,0 +1,86 @@
// Enclose abbreviations in <abbr> tags
//
'use strict';
var PUNCT_CHARS = ' \n()[]\'".,!?-';
// from Google closure library
// http://closure-library.googlecode.com/git-history/docs/local_closure_goog_string_string.js.source.html#line1021
function regEscape(s) {
return s.replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1');
}
module.exports = function abbr2(state) {
var i, j, l, tokens, token, text, nodes, pos, level, reg, m, regText,
blockTokens = state.tokens;
if (!state.env.abbreviations) { return; }
if (!state.env.abbrRegExp) {
regText = '(^|[' + PUNCT_CHARS.split('').map(regEscape).join('') + '])'
+ '(' + Object.keys(state.env.abbreviations).sort(function (a, b) {
return b.length - a.length;
}).map(regEscape).join('|') + ')'
+ '($|[' + PUNCT_CHARS.split('').map(regEscape).join('') + '])';
state.env.abbrRegExp = new RegExp(regText, 'g');
}
reg = state.env.abbrRegExp;
for (j = 0, l = blockTokens.length; j < l; j++) {
if (blockTokens[j].type !== 'inline') { continue; }
tokens = blockTokens[j].children;
// We scan from the end, to keep position when new tags added.
for (i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];
if (token.type !== 'text') { continue; }
pos = 0;
text = token.content;
reg.lastIndex = 0;
level = token.level;
nodes = [];
while ((m = reg.exec(text))) {
if (reg.lastIndex > pos) {
nodes.push({
type: 'text',
content: text.slice(pos, m.index + m[1].length),
level: level
});
}
nodes.push({
type: 'abbr_open',
title: state.env.abbreviations[m[2]],
level: level++
});
nodes.push({
type: 'text',
content: m[2],
level: level
});
nodes.push({
type: 'abbr_close',
level: --level
});
pos = reg.lastIndex - m[3].length;
}
if (!nodes.length) { continue; }
if (pos < text.length) {
nodes.push({
type: 'text',
content: text.slice(pos),
level: level
});
}
// replace current node
blockTokens[j].children = tokens = [].concat(tokens.slice(0, i), nodes, tokens.slice(i + 1));
}
}
};

87
test/fixtures/remarkable/abbr.txt

@ -0,0 +1,87 @@
An example from php markdown readme:
.
*[HTML]: Hyper Text Markup Language
*[W3C]: World Wide Web Consortium
The HTML specification
is maintained by the W3C.
.
<p>The <abbr title="Hyper Text Markup Language">HTML</abbr> specification
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p>
.
They can be multiline (see pandoc implementation). Not sure about newlines, but we should at least skip those definitions:
.
*[
foo
bar
]: desc
foo
.
<p>foo</p>
.
They can contain arbitrary markup (see pandoc implementation):
.
*[`]:`]: foo
\`]:\`
.
<p><abbr title="foo">`]:`</abbr></p>
.
Can contain matched brackets:
.
*[[abbr]]: foo
[abbr]
.
<p><abbr title="foo">[abbr]</abbr></p>
.
No empty abbreviations:
.
*[foo]:
foo
.
<p>*[foo]:
foo</p>
.
Intersecting abbreviations (first should match):
.
*[Bar Foo]: 123
*[Foo Bar]: 456
Foo Bar Foo
Bar Foo Bar
.
<p><abbr title="456">Foo Bar</abbr> Foo</p>
<p><abbr title="123">Bar Foo</abbr> Bar</p>
.
Don't bother with nested abbreviations (yet?):
.
*[JS]: javascript
*[HTTP]: hyper text blah blah
*[JS HTTP]: is awesome
JS HTTP is a collection of low-level javascript HTTP-related modules
.
<p><abbr title="is awesome">JS HTTP</abbr> is a collection of low-level javascript <abbr title="hyper text blah blah">HTTP</abbr>-related modules</p>
.
Don't match the middle of the string:
.
*[foo]: blah
*[bar]: blah
foobar
.
<p>foobar</p>
.
Loading…
Cancel
Save