Alex Kocharin
10 years ago
5 changed files with 246 additions and 0 deletions
@ -0,0 +1,62 @@ |
|||||
|
// Parse abbreviation definitions, i.e. `*[abbr]: description`
|
||||
|
//
|
||||
|
|
||||
|
'use strict'; |
||||
|
|
||||
|
var StateInline = require('../rules_inline/state_inline'); |
||||
|
var parseLinkLabel = require('../links').parseLinkLabel; |
||||
|
|
||||
|
|
||||
|
function parseAbbr(str, parser, options, env) { |
||||
|
var state, labelEnd, pos, max, label, title; |
||||
|
|
||||
|
if (str.charCodeAt(0) !== 0x2A/* * */) { return -1; } |
||||
|
if (str.charCodeAt(1) !== 0x5B/* [ */) { return -1; } |
||||
|
|
||||
|
if (str.indexOf(']:') === -1) { return -1; } |
||||
|
|
||||
|
state = new StateInline(str, parser, options, env); |
||||
|
labelEnd = parseLinkLabel(state, 1); |
||||
|
|
||||
|
if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return -1; } |
||||
|
|
||||
|
max = state.posMax; |
||||
|
|
||||
|
// abbr title is always one line, so looking for ending "\n" here
|
||||
|
for (pos = labelEnd + 2; pos < max; pos++) { |
||||
|
if (state.src.charCodeAt(pos) === 0x0A) { break; } |
||||
|
} |
||||
|
|
||||
|
label = str.slice(2, labelEnd); |
||||
|
title = str.slice(labelEnd + 2, pos).trim(); |
||||
|
if (title.length === 0) { return -1; } |
||||
|
if (!env.abbreviations) { env.abbreviations = {}; } |
||||
|
env.abbreviations[label] = env.abbreviations[label] || title; |
||||
|
|
||||
|
return pos; |
||||
|
} |
||||
|
|
||||
|
module.exports = function abbr(state) { |
||||
|
var tokens = state.tokens, i, l, content, pos; |
||||
|
|
||||
|
// Parse inlines
|
||||
|
for (i = 1, l = tokens.length - 1; i < l; i++) { |
||||
|
if (tokens[i - 1].type === 'paragraph_open' && |
||||
|
tokens[i].type === 'inline' && |
||||
|
tokens[i + 1].type === 'paragraph_close') { |
||||
|
|
||||
|
content = tokens[i].content; |
||||
|
while (content.length) { |
||||
|
pos = parseAbbr(content, state.inline, state.options, state.env); |
||||
|
if (pos < 0) { break; } |
||||
|
content = content.slice(pos).trim(); |
||||
|
} |
||||
|
|
||||
|
tokens[i].content = content; |
||||
|
if (!content.length) { |
||||
|
tokens[i - 1].tight = true; |
||||
|
tokens[i + 1].tight = true; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
}; |
@ -0,0 +1,86 @@ |
|||||
|
// Enclose abbreviations in <abbr> tags
|
||||
|
//
|
||||
|
'use strict'; |
||||
|
|
||||
|
|
||||
|
var PUNCT_CHARS = ' \n()[]\'".,!?-'; |
||||
|
|
||||
|
|
||||
|
// from Google closure library
|
||||
|
// http://closure-library.googlecode.com/git-history/docs/local_closure_goog_string_string.js.source.html#line1021
|
||||
|
function regEscape(s) { |
||||
|
return s.replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1'); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
module.exports = function abbr2(state) { |
||||
|
var i, j, l, tokens, token, text, nodes, pos, level, reg, m, regText, |
||||
|
blockTokens = state.tokens; |
||||
|
|
||||
|
if (!state.env.abbreviations) { return; } |
||||
|
if (!state.env.abbrRegExp) { |
||||
|
regText = '(^|[' + PUNCT_CHARS.split('').map(regEscape).join('') + '])' |
||||
|
+ '(' + Object.keys(state.env.abbreviations).sort(function (a, b) { |
||||
|
return b.length - a.length; |
||||
|
}).map(regEscape).join('|') + ')' |
||||
|
+ '($|[' + PUNCT_CHARS.split('').map(regEscape).join('') + '])'; |
||||
|
state.env.abbrRegExp = new RegExp(regText, 'g'); |
||||
|
} |
||||
|
reg = state.env.abbrRegExp; |
||||
|
|
||||
|
for (j = 0, l = blockTokens.length; j < l; j++) { |
||||
|
if (blockTokens[j].type !== 'inline') { continue; } |
||||
|
tokens = blockTokens[j].children; |
||||
|
|
||||
|
// We scan from the end, to keep position when new tags added.
|
||||
|
for (i = tokens.length - 1; i >= 0; i--) { |
||||
|
token = tokens[i]; |
||||
|
if (token.type !== 'text') { continue; } |
||||
|
|
||||
|
pos = 0; |
||||
|
text = token.content; |
||||
|
reg.lastIndex = 0; |
||||
|
level = token.level; |
||||
|
nodes = []; |
||||
|
|
||||
|
while ((m = reg.exec(text))) { |
||||
|
if (reg.lastIndex > pos) { |
||||
|
nodes.push({ |
||||
|
type: 'text', |
||||
|
content: text.slice(pos, m.index + m[1].length), |
||||
|
level: level |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
nodes.push({ |
||||
|
type: 'abbr_open', |
||||
|
title: state.env.abbreviations[m[2]], |
||||
|
level: level++ |
||||
|
}); |
||||
|
nodes.push({ |
||||
|
type: 'text', |
||||
|
content: m[2], |
||||
|
level: level |
||||
|
}); |
||||
|
nodes.push({ |
||||
|
type: 'abbr_close', |
||||
|
level: --level |
||||
|
}); |
||||
|
pos = reg.lastIndex - m[3].length; |
||||
|
} |
||||
|
|
||||
|
if (!nodes.length) { continue; } |
||||
|
|
||||
|
if (pos < text.length) { |
||||
|
nodes.push({ |
||||
|
type: 'text', |
||||
|
content: text.slice(pos), |
||||
|
level: level |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
// replace current node
|
||||
|
blockTokens[j].children = tokens = [].concat(tokens.slice(0, i), nodes, tokens.slice(i + 1)); |
||||
|
} |
||||
|
} |
||||
|
}; |
@ -0,0 +1,87 @@ |
|||||
|
|
||||
|
An example from php markdown readme: |
||||
|
|
||||
|
. |
||||
|
*[HTML]: Hyper Text Markup Language |
||||
|
*[W3C]: World Wide Web Consortium |
||||
|
The HTML specification |
||||
|
is maintained by the W3C. |
||||
|
. |
||||
|
<p>The <abbr title="Hyper Text Markup Language">HTML</abbr> specification |
||||
|
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p> |
||||
|
. |
||||
|
|
||||
|
They can be multiline (see pandoc implementation). Not sure about newlines, but we should at least skip those definitions: |
||||
|
|
||||
|
. |
||||
|
*[ |
||||
|
foo |
||||
|
bar |
||||
|
]: desc |
||||
|
foo |
||||
|
. |
||||
|
<p>foo</p> |
||||
|
. |
||||
|
|
||||
|
They can contain arbitrary markup (see pandoc implementation): |
||||
|
|
||||
|
. |
||||
|
*[`]:`]: foo |
||||
|
\`]:\` |
||||
|
. |
||||
|
<p><abbr title="foo">`]:`</abbr></p> |
||||
|
. |
||||
|
|
||||
|
Can contain matched brackets: |
||||
|
|
||||
|
. |
||||
|
*[[abbr]]: foo |
||||
|
[abbr] |
||||
|
. |
||||
|
<p><abbr title="foo">[abbr]</abbr></p> |
||||
|
. |
||||
|
|
||||
|
No empty abbreviations: |
||||
|
|
||||
|
. |
||||
|
*[foo]: |
||||
|
foo |
||||
|
. |
||||
|
<p>*[foo]: |
||||
|
foo</p> |
||||
|
. |
||||
|
|
||||
|
Intersecting abbreviations (first should match): |
||||
|
|
||||
|
. |
||||
|
*[Bar Foo]: 123 |
||||
|
*[Foo Bar]: 456 |
||||
|
|
||||
|
Foo Bar Foo |
||||
|
|
||||
|
Bar Foo Bar |
||||
|
. |
||||
|
<p><abbr title="456">Foo Bar</abbr> Foo</p> |
||||
|
<p><abbr title="123">Bar Foo</abbr> Bar</p> |
||||
|
. |
||||
|
|
||||
|
Don't bother with nested abbreviations (yet?): |
||||
|
|
||||
|
. |
||||
|
*[JS]: javascript |
||||
|
*[HTTP]: hyper text blah blah |
||||
|
*[JS HTTP]: is awesome |
||||
|
JS HTTP is a collection of low-level javascript HTTP-related modules |
||||
|
. |
||||
|
<p><abbr title="is awesome">JS HTTP</abbr> is a collection of low-level javascript <abbr title="hyper text blah blah">HTTP</abbr>-related modules</p> |
||||
|
. |
||||
|
|
||||
|
Don't match the middle of the string: |
||||
|
|
||||
|
. |
||||
|
*[foo]: blah |
||||
|
*[bar]: blah |
||||
|
foobar |
||||
|
. |
||||
|
<p>foobar</p> |
||||
|
. |
Loading…
Reference in new issue