diff --git a/lib/parser_core.js b/lib/parser_core.js index 7f69b98..512c926 100644 --- a/lib/parser_core.js +++ b/lib/parser_core.js @@ -8,8 +8,10 @@ var Ruler = require('./ruler'); var _rules = [ [ 'block', require('./rules_core/block') ], + [ 'abbr', require('./rules_core/abbr') ], [ 'references', require('./rules_core/references') ], [ 'inline', require('./rules_core/inline') ], + [ 'abbr2', require('./rules_core/abbr2') ], [ 'replacements', require('./rules_core/replacements') ], [ 'smartquotes', require('./rules_core/smartquotes') ], [ 'linkify', require('./rules_core/linkify') ] diff --git a/lib/renderer.js b/lib/renderer.js index 615a484..68c3981 100644 --- a/lib/renderer.js +++ b/lib/renderer.js @@ -71,6 +71,15 @@ function getBreak(tokens, idx) { var rules = {}; +rules.abbr_open = function (tokens, idx/*, options*/) { + var title = tokens[idx].title ? (' title="' + escapeHtml(replaceEntities(tokens[idx].title)) + '"') : ''; + return ''; +}; +rules.abbr_close = function (/*tokens, idx, options*/) { + return ''; +}; + + rules.blockquote_open = function (/*tokens, idx, options*/) { return '
\n'; }; diff --git a/lib/rules_core/abbr.js b/lib/rules_core/abbr.js new file mode 100644 index 0000000..12a983e --- /dev/null +++ b/lib/rules_core/abbr.js @@ -0,0 +1,62 @@ +// Parse abbreviation definitions, i.e. `*[abbr]: description` +// + +'use strict'; + +var StateInline = require('../rules_inline/state_inline'); +var parseLinkLabel = require('../links').parseLinkLabel; + + +function parseAbbr(str, parser, options, env) { + var state, labelEnd, pos, max, label, title; + + if (str.charCodeAt(0) !== 0x2A/* * */) { return -1; } + if (str.charCodeAt(1) !== 0x5B/* [ */) { return -1; } + + if (str.indexOf(']:') === -1) { return -1; } + + state = new StateInline(str, parser, options, env); + labelEnd = parseLinkLabel(state, 1); + + if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return -1; } + + max = state.posMax; + + // abbr title is always one line, so looking for ending "\n" here + for (pos = labelEnd + 2; pos < max; pos++) { + if (state.src.charCodeAt(pos) === 0x0A) { break; } + } + + label = str.slice(2, labelEnd); + title = str.slice(labelEnd + 2, pos).trim(); + if (title.length === 0) { return -1; } + if (!env.abbreviations) { env.abbreviations = {}; } + env.abbreviations[label] = env.abbreviations[label] || title; + + return pos; +} + +module.exports = function abbr(state) { + var tokens = state.tokens, i, l, content, pos; + + // Parse inlines + for (i = 1, l = tokens.length - 1; i < l; i++) { + if (tokens[i - 1].type === 'paragraph_open' && + tokens[i].type === 'inline' && + tokens[i + 1].type === 'paragraph_close') { + + content = tokens[i].content; + while (content.length) { + pos = parseAbbr(content, state.inline, state.options, state.env); + if (pos < 0) { break; } + content = content.slice(pos).trim(); + } + + tokens[i].content = content; + if (!content.length) { + tokens[i - 1].tight = true; + tokens[i + 1].tight = true; + } + } + } +}; diff --git a/lib/rules_core/abbr2.js b/lib/rules_core/abbr2.js new file mode 100644 index 0000000..ec76f38 --- /dev/null +++ b/lib/rules_core/abbr2.js @@ -0,0 +1,86 @@ +// Enclose abbreviations in tags +// +'use strict'; + + +var PUNCT_CHARS = ' \n()[]\'".,!?-'; + + +// from Google closure library +// http://closure-library.googlecode.com/git-history/docs/local_closure_goog_string_string.js.source.html#line1021 +function regEscape(s) { + return s.replace(/([-()\[\]{}+?*.$\^|,:#= 0; i--) { + token = tokens[i]; + if (token.type !== 'text') { continue; } + + pos = 0; + text = token.content; + reg.lastIndex = 0; + level = token.level; + nodes = []; + + while ((m = reg.exec(text))) { + if (reg.lastIndex > pos) { + nodes.push({ + type: 'text', + content: text.slice(pos, m.index + m[1].length), + level: level + }); + } + + nodes.push({ + type: 'abbr_open', + title: state.env.abbreviations[m[2]], + level: level++ + }); + nodes.push({ + type: 'text', + content: m[2], + level: level + }); + nodes.push({ + type: 'abbr_close', + level: --level + }); + pos = reg.lastIndex - m[3].length; + } + + if (!nodes.length) { continue; } + + if (pos < text.length) { + nodes.push({ + type: 'text', + content: text.slice(pos), + level: level + }); + } + + // replace current node + blockTokens[j].children = tokens = [].concat(tokens.slice(0, i), nodes, tokens.slice(i + 1)); + } + } +}; diff --git a/test/fixtures/remarkable/abbr.txt b/test/fixtures/remarkable/abbr.txt new file mode 100644 index 0000000..a52fc66 --- /dev/null +++ b/test/fixtures/remarkable/abbr.txt @@ -0,0 +1,87 @@ + +An example from php markdown readme: + +. +*[HTML]: Hyper Text Markup Language +*[W3C]: World Wide Web Consortium +The HTML specification +is maintained by the W3C. +. +

The HTML specification +is maintained by the W3C.

+. + +They can be multiline (see pandoc implementation). Not sure about newlines, but we should at least skip those definitions: + +. +*[ +foo +bar +]: desc +foo +. +

foo

+. + +They can contain arbitrary markup (see pandoc implementation): + +. +*[`]:`]: foo +\`]:\` +. +

`]:`

+. + +Can contain matched brackets: + +. +*[[abbr]]: foo +[abbr] +. +

[abbr]

+. + +No empty abbreviations: + +. +*[foo]: +foo +. +

*[foo]: +foo

+. + +Intersecting abbreviations (first should match): + +. +*[Bar Foo]: 123 +*[Foo Bar]: 456 + +Foo Bar Foo + +Bar Foo Bar +. +

Foo Bar Foo

+

Bar Foo Bar

+. + +Don't bother with nested abbreviations (yet?): + +. +*[JS]: javascript +*[HTTP]: hyper text blah blah +*[JS HTTP]: is awesome +JS HTTP is a collection of low-level javascript HTTP-related modules +. +

JS HTTP is a collection of low-level javascript HTTP-related modules

+. + +Don't match the middle of the string: + +. +*[foo]: blah +*[bar]: blah +foobar +. +

foobar

+.