Alex Kocharin
10 years ago
5 changed files with 246 additions and 0 deletions
@ -0,0 +1,62 @@ |
|||
// Parse abbreviation definitions, i.e. `*[abbr]: description`
|
|||
//
|
|||
|
|||
'use strict'; |
|||
|
|||
var StateInline = require('../rules_inline/state_inline'); |
|||
var parseLinkLabel = require('../links').parseLinkLabel; |
|||
|
|||
|
|||
function parseAbbr(str, parser, options, env) { |
|||
var state, labelEnd, pos, max, label, title; |
|||
|
|||
if (str.charCodeAt(0) !== 0x2A/* * */) { return -1; } |
|||
if (str.charCodeAt(1) !== 0x5B/* [ */) { return -1; } |
|||
|
|||
if (str.indexOf(']:') === -1) { return -1; } |
|||
|
|||
state = new StateInline(str, parser, options, env); |
|||
labelEnd = parseLinkLabel(state, 1); |
|||
|
|||
if (labelEnd < 0 || str.charCodeAt(labelEnd + 1) !== 0x3A/* : */) { return -1; } |
|||
|
|||
max = state.posMax; |
|||
|
|||
// abbr title is always one line, so looking for ending "\n" here
|
|||
for (pos = labelEnd + 2; pos < max; pos++) { |
|||
if (state.src.charCodeAt(pos) === 0x0A) { break; } |
|||
} |
|||
|
|||
label = str.slice(2, labelEnd); |
|||
title = str.slice(labelEnd + 2, pos).trim(); |
|||
if (title.length === 0) { return -1; } |
|||
if (!env.abbreviations) { env.abbreviations = {}; } |
|||
env.abbreviations[label] = env.abbreviations[label] || title; |
|||
|
|||
return pos; |
|||
} |
|||
|
|||
module.exports = function abbr(state) { |
|||
var tokens = state.tokens, i, l, content, pos; |
|||
|
|||
// Parse inlines
|
|||
for (i = 1, l = tokens.length - 1; i < l; i++) { |
|||
if (tokens[i - 1].type === 'paragraph_open' && |
|||
tokens[i].type === 'inline' && |
|||
tokens[i + 1].type === 'paragraph_close') { |
|||
|
|||
content = tokens[i].content; |
|||
while (content.length) { |
|||
pos = parseAbbr(content, state.inline, state.options, state.env); |
|||
if (pos < 0) { break; } |
|||
content = content.slice(pos).trim(); |
|||
} |
|||
|
|||
tokens[i].content = content; |
|||
if (!content.length) { |
|||
tokens[i - 1].tight = true; |
|||
tokens[i + 1].tight = true; |
|||
} |
|||
} |
|||
} |
|||
}; |
@ -0,0 +1,86 @@ |
|||
// Enclose abbreviations in <abbr> tags
|
|||
//
|
|||
'use strict'; |
|||
|
|||
|
|||
var PUNCT_CHARS = ' \n()[]\'".,!?-'; |
|||
|
|||
|
|||
// from Google closure library
|
|||
// http://closure-library.googlecode.com/git-history/docs/local_closure_goog_string_string.js.source.html#line1021
|
|||
function regEscape(s) { |
|||
return s.replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1'); |
|||
} |
|||
|
|||
|
|||
module.exports = function abbr2(state) { |
|||
var i, j, l, tokens, token, text, nodes, pos, level, reg, m, regText, |
|||
blockTokens = state.tokens; |
|||
|
|||
if (!state.env.abbreviations) { return; } |
|||
if (!state.env.abbrRegExp) { |
|||
regText = '(^|[' + PUNCT_CHARS.split('').map(regEscape).join('') + '])' |
|||
+ '(' + Object.keys(state.env.abbreviations).sort(function (a, b) { |
|||
return b.length - a.length; |
|||
}).map(regEscape).join('|') + ')' |
|||
+ '($|[' + PUNCT_CHARS.split('').map(regEscape).join('') + '])'; |
|||
state.env.abbrRegExp = new RegExp(regText, 'g'); |
|||
} |
|||
reg = state.env.abbrRegExp; |
|||
|
|||
for (j = 0, l = blockTokens.length; j < l; j++) { |
|||
if (blockTokens[j].type !== 'inline') { continue; } |
|||
tokens = blockTokens[j].children; |
|||
|
|||
// We scan from the end, to keep position when new tags added.
|
|||
for (i = tokens.length - 1; i >= 0; i--) { |
|||
token = tokens[i]; |
|||
if (token.type !== 'text') { continue; } |
|||
|
|||
pos = 0; |
|||
text = token.content; |
|||
reg.lastIndex = 0; |
|||
level = token.level; |
|||
nodes = []; |
|||
|
|||
while ((m = reg.exec(text))) { |
|||
if (reg.lastIndex > pos) { |
|||
nodes.push({ |
|||
type: 'text', |
|||
content: text.slice(pos, m.index + m[1].length), |
|||
level: level |
|||
}); |
|||
} |
|||
|
|||
nodes.push({ |
|||
type: 'abbr_open', |
|||
title: state.env.abbreviations[m[2]], |
|||
level: level++ |
|||
}); |
|||
nodes.push({ |
|||
type: 'text', |
|||
content: m[2], |
|||
level: level |
|||
}); |
|||
nodes.push({ |
|||
type: 'abbr_close', |
|||
level: --level |
|||
}); |
|||
pos = reg.lastIndex - m[3].length; |
|||
} |
|||
|
|||
if (!nodes.length) { continue; } |
|||
|
|||
if (pos < text.length) { |
|||
nodes.push({ |
|||
type: 'text', |
|||
content: text.slice(pos), |
|||
level: level |
|||
}); |
|||
} |
|||
|
|||
// replace current node
|
|||
blockTokens[j].children = tokens = [].concat(tokens.slice(0, i), nodes, tokens.slice(i + 1)); |
|||
} |
|||
} |
|||
}; |
@ -0,0 +1,87 @@ |
|||
|
|||
An example from php markdown readme: |
|||
|
|||
. |
|||
*[HTML]: Hyper Text Markup Language |
|||
*[W3C]: World Wide Web Consortium |
|||
The HTML specification |
|||
is maintained by the W3C. |
|||
. |
|||
<p>The <abbr title="Hyper Text Markup Language">HTML</abbr> specification |
|||
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p> |
|||
. |
|||
|
|||
They can be multiline (see pandoc implementation). Not sure about newlines, but we should at least skip those definitions: |
|||
|
|||
. |
|||
*[ |
|||
foo |
|||
bar |
|||
]: desc |
|||
foo |
|||
. |
|||
<p>foo</p> |
|||
. |
|||
|
|||
They can contain arbitrary markup (see pandoc implementation): |
|||
|
|||
. |
|||
*[`]:`]: foo |
|||
\`]:\` |
|||
. |
|||
<p><abbr title="foo">`]:`</abbr></p> |
|||
. |
|||
|
|||
Can contain matched brackets: |
|||
|
|||
. |
|||
*[[abbr]]: foo |
|||
[abbr] |
|||
. |
|||
<p><abbr title="foo">[abbr]</abbr></p> |
|||
. |
|||
|
|||
No empty abbreviations: |
|||
|
|||
. |
|||
*[foo]: |
|||
foo |
|||
. |
|||
<p>*[foo]: |
|||
foo</p> |
|||
. |
|||
|
|||
Intersecting abbreviations (first should match): |
|||
|
|||
. |
|||
*[Bar Foo]: 123 |
|||
*[Foo Bar]: 456 |
|||
|
|||
Foo Bar Foo |
|||
|
|||
Bar Foo Bar |
|||
. |
|||
<p><abbr title="456">Foo Bar</abbr> Foo</p> |
|||
<p><abbr title="123">Bar Foo</abbr> Bar</p> |
|||
. |
|||
|
|||
Don't bother with nested abbreviations (yet?): |
|||
|
|||
. |
|||
*[JS]: javascript |
|||
*[HTTP]: hyper text blah blah |
|||
*[JS HTTP]: is awesome |
|||
JS HTTP is a collection of low-level javascript HTTP-related modules |
|||
. |
|||
<p><abbr title="is awesome">JS HTTP</abbr> is a collection of low-level javascript <abbr title="hyper text blah blah">HTTP</abbr>-related modules</p> |
|||
. |
|||
|
|||
Don't match the middle of the string: |
|||
|
|||
. |
|||
*[foo]: blah |
|||
*[bar]: blah |
|||
foobar |
|||
. |
|||
<p>foobar</p> |
|||
. |
Loading…
Reference in new issue