Browse Source

Update CommonMark spec to 0.28

pull/403/head
Alex Kocharin 7 years ago
parent
commit
2959f8c27c
  1. 2
      lib/common/html_blocks.js
  2. 4
      lib/helpers/parse_link_destination.js
  3. 20
      lib/rules_inline/emphasis.js
  4. 1305
      test/fixtures/commonmark/good.txt
  5. 144
      test/fixtures/commonmark/spec.txt
  6. 4
      test/misc.js

2
lib/common/html_blocks.js

@ -55,10 +55,8 @@ module.exports = [
'option', 'option',
'p', 'p',
'param', 'param',
'pre',
'section', 'section',
'source', 'source',
'title',
'summary', 'summary',
'table', 'table',
'tbody', 'tbody',

4
lib/helpers/parse_link_destination.js

@ -59,18 +59,18 @@ module.exports = function parseLinkDestination(str, pos, max) {
if (code === 0x28 /* ( */) { if (code === 0x28 /* ( */) {
level++; level++;
if (level > 1) { break; }
} }
if (code === 0x29 /* ) */) { if (code === 0x29 /* ) */) {
if (level === 0) { break; }
level--; level--;
if (level < 0) { break; }
} }
pos++; pos++;
} }
if (start === pos) { return result; } if (start === pos) { return result; }
if (level !== 0) { return result; }
result.str = unescapeAll(str.slice(start, pos)); result.str = unescapeAll(str.slice(start, pos));
result.lines = lines; result.lines = lines;

20
lib/rules_inline/emphasis.js

@ -77,7 +77,7 @@ module.exports.postProcess = function emphasis(state) {
delimiters = state.delimiters, delimiters = state.delimiters,
max = state.delimiters.length; max = state.delimiters.length;
for (i = 0; i < max; i++) { for (i = max - 1; i >= 0; i--) {
startDelim = delimiters[i]; startDelim = delimiters[i];
if (startDelim.marker !== 0x5F/* _ */ && startDelim.marker !== 0x2A/* * */) { if (startDelim.marker !== 0x5F/* _ */ && startDelim.marker !== 0x2A/* * */) {
@ -91,16 +91,16 @@ module.exports.postProcess = function emphasis(state) {
endDelim = delimiters[startDelim.end]; endDelim = delimiters[startDelim.end];
// If the next delimiter has the same marker and is adjacent to this one, // If the previous delimiter has the same marker and is adjacent to this one,
// merge those into one strong delimiter. // merge those into one strong delimiter.
// //
// `<em><em>whatever</em></em>` -> `<strong>whatever</strong>` // `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
// //
isStrong = i + 1 < max && isStrong = i > 0 &&
delimiters[i + 1].end === startDelim.end - 1 && delimiters[i - 1].end === startDelim.end + 1 &&
delimiters[i + 1].token === startDelim.token + 1 && delimiters[i - 1].token === startDelim.token - 1 &&
delimiters[startDelim.end - 1].token === endDelim.token - 1 && delimiters[startDelim.end + 1].token === endDelim.token + 1 &&
delimiters[i + 1].marker === startDelim.marker; delimiters[i - 1].marker === startDelim.marker;
ch = String.fromCharCode(startDelim.marker); ch = String.fromCharCode(startDelim.marker);
@ -119,9 +119,9 @@ module.exports.postProcess = function emphasis(state) {
token.content = ''; token.content = '';
if (isStrong) { if (isStrong) {
state.tokens[delimiters[i + 1].token].content = ''; state.tokens[delimiters[i - 1].token].content = '';
state.tokens[delimiters[startDelim.end - 1].token].content = ''; state.tokens[delimiters[startDelim.end + 1].token].content = '';
i++; i--;
} }
} }
}; };

1305
test/fixtures/commonmark/good.txt

File diff suppressed because it is too large

144
test/fixtures/commonmark/spec.txt

@ -1,8 +1,8 @@
--- ---
title: CommonMark Spec title: CommonMark Spec
author: John MacFarlane author: John MacFarlane
version: 0.27 version: 0.28
date: '2016-11-18' date: '2017-08-01'
license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
... ...
@ -11,10 +11,12 @@ license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
## What is Markdown? ## What is Markdown?
Markdown is a plain text format for writing structured documents, Markdown is a plain text format for writing structured documents,
based on conventions used for indicating formatting in email and based on conventions for indicating formatting in email
usenet posts. It was developed in 2004 by John Gruber, who wrote and usenet posts. It was developed by John Gruber (with
the first Markdown-to-HTML converter in Perl, and it soon became help from Aaron Swartz) and released in 2004 in the form of a
ubiquitous. In the next decade, dozens of implementations were [syntax description](http://daringfireball.net/projects/markdown/syntax)
and a Perl script (`Markdown.pl`) for converting Markdown to
HTML. In the next decade, dozens of implementations were
developed in many languages. Some extended the original developed in many languages. Some extended the original
Markdown syntax with conventions for footnotes, tables, and Markdown syntax with conventions for footnotes, tables, and
other document elements. Some allowed Markdown documents to be other document elements. Some allowed Markdown documents to be
@ -312,7 +314,7 @@ form feed (`U+000C`), or carriage return (`U+000D`).
characters]. characters].
A [Unicode whitespace character](@) is A [Unicode whitespace character](@) is
any code point in the Unicode `Zs` class, or a tab (`U+0009`), any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
carriage return (`U+000D`), newline (`U+000A`), or form feed carriage return (`U+000D`), newline (`U+000A`), or form feed
(`U+000C`). (`U+000C`).
@ -331,7 +333,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
A [punctuation character](@) is an [ASCII A [punctuation character](@) is an [ASCII
punctuation character] or anything in punctuation character] or anything in
the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
## Tabs ## Tabs
@ -402,8 +404,8 @@ as indentation with four spaces would:
Normally the `>` that begins a block quote may be followed Normally the `>` that begins a block quote may be followed
optionally by a space, which is not considered part of the optionally by a space, which is not considered part of the
content. In the following case `>` is followed by a tab, content. In the following case `>` is followed by a tab,
which is treated as if it were expanded into spaces. which is treated as if it were expanded into three spaces.
Since one of theses spaces is considered part of the Since one of these spaces is considered part of the
delimiter, `foo` is considered to be indented six spaces delimiter, `foo` is considered to be indented six spaces
inside the block quote context, so we get an indented inside the block quote context, so we get an indented
code block starting with two spaces. code block starting with two spaces.
@ -481,7 +483,7 @@ We can think of a document as a sequence of
quotations, lists, headings, rules, and code blocks. Some blocks (like quotations, lists, headings, rules, and code blocks. Some blocks (like
block quotes and list items) contain other blocks; others (like block quotes and list items) contain other blocks; others (like
headings and paragraphs) contain [inline](@) content---text, headings and paragraphs) contain [inline](@) content---text,
links, emphasized text, images, code, and so on. links, emphasized text, images, code spans, and so on.
## Precedence ## Precedence
@ -1643,6 +1645,15 @@ With tildes:
</code></pre> </code></pre>
```````````````````````````````` ````````````````````````````````
Fewer than three backticks is not enough:
```````````````````````````````` example
``
foo
``
.
<p><code>foo</code></p>
````````````````````````````````
The closing code fence must use the same character as the opening The closing code fence must use the same character as the opening
fence: fence:
@ -2031,6 +2042,37 @@ or [closing tag] (with any [tag name] other than `script`,
or the end of the line.\ or the end of the line.\
**End condition:** line is followed by a [blank line]. **End condition:** line is followed by a [blank line].
HTML blocks continue until they are closed by their appropriate
[end condition], or the last line of the document or other [container block].
This means any HTML **within an HTML block** that might otherwise be recognised
as a start condition will be ignored by the parser and passed through as-is,
without changing the parser's state.
For instance, `<pre>` within a HTML block started by `<table>` will not affect
the parser state; as the HTML block was started in by start condition 6, it
will end at any blank line. This can be surprising:
```````````````````````````````` example
<table><tr><td>
<pre>
**Hello**,
_world_.
</pre>
</td></tr></table>
.
<table><tr><td>
<pre>
**Hello**,
<p><em>world</em>.
</pre></p>
</td></tr></table>
````````````````````````````````
In this case, the HTML block is terminated by the newline — the `**hello**`
text remains verbatim — and regular parsing resumes, with a paragraph,
emphasised `world` and inline and block HTML following.
All types of [HTML blocks] except type 7 may interrupt All types of [HTML blocks] except type 7 may interrupt
a paragraph. Blocks of type 7 may not interrupt a paragraph. a paragraph. Blocks of type 7 may not interrupt a paragraph.
(This restriction is intended to prevent unwanted interpretation (This restriction is intended to prevent unwanted interpretation
@ -3637,11 +3679,15 @@ The following rules define [list items]:
If the list item is ordered, then it is also assigned a start If the list item is ordered, then it is also assigned a start
number, based on the ordered list marker. number, based on the ordered list marker.
Exceptions: When the first list item in a [list] interrupts Exceptions:
1. When the first list item in a [list] interrupts
a paragraph---that is, when it starts on a line that would a paragraph---that is, when it starts on a line that would
otherwise count as [paragraph continuation text]---then (a) otherwise count as [paragraph continuation text]---then (a)
the lines *Ls* must not begin with a blank line, and (b) if the lines *Ls* must not begin with a blank line, and (b) if
the list item is ordered, the start number must be 1. the list item is ordered, the start number must be 1.
2. If any line is a [thematic break][thematic breaks] then
that line is not a list item.
For example, let *Ls* be the lines For example, let *Ls* be the lines
@ -5796,6 +5842,15 @@ we just have literal backticks:
<p>`foo</p> <p>`foo</p>
```````````````````````````````` ````````````````````````````````
The following case also illustrates the need for opening and
closing backtick strings to be equal in length:
```````````````````````````````` example
`foo``bar``
.
<p>`foo<code>bar</code></p>
````````````````````````````````
## Emphasis and strong emphasis ## Emphasis and strong emphasis
@ -5845,19 +5900,20 @@ for efficient parsing strategies that do not backtrack.
First, some definitions. A [delimiter run](@) is either First, some definitions. A [delimiter run](@) is either
a sequence of one or more `*` characters that is not preceded or a sequence of one or more `*` characters that is not preceded or
followed by a `*` character, or a sequence of one or more `_` followed by a non-backslash-escaped `*` character, or a sequence
characters that is not preceded or followed by a `_` character. of one or more `_` characters that is not preceded or followed by
a non-backslash-escaped `_` character.
A [left-flanking delimiter run](@) is A [left-flanking delimiter run](@) is
a [delimiter run] that is (a) not followed by [Unicode whitespace], a [delimiter run] that is (a) not followed by [Unicode whitespace],
and (b) either not followed by a [punctuation character], or and (b) not followed by a [punctuation character], or
preceded by [Unicode whitespace] or a [punctuation character]. preceded by [Unicode whitespace] or a [punctuation character].
For purposes of this definition, the beginning and the end of For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace. the line count as Unicode whitespace.
A [right-flanking delimiter run](@) is A [right-flanking delimiter run](@) is
a [delimiter run] that is (a) not preceded by [Unicode whitespace], a [delimiter run] that is (a) not preceded by [Unicode whitespace],
and (b) either not preceded by a [punctuation character], or and (b) not preceded by a [punctuation character], or
followed by [Unicode whitespace] or a [punctuation character]. followed by [Unicode whitespace] or a [punctuation character].
For purposes of this definition, the beginning and the end of For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace. the line count as Unicode whitespace.
@ -5936,7 +5992,7 @@ The following rules define emphasis and strong emphasis:
7. A double `**` [can close strong emphasis](@) 7. A double `**` [can close strong emphasis](@)
iff it is part of a [right-flanking delimiter run]. iff it is part of a [right-flanking delimiter run].
8. A double `__` [can close strong emphasis] 8. A double `__` [can close strong emphasis] iff
it is part of a [right-flanking delimiter run] it is part of a [right-flanking delimiter run]
and either (a) not part of a [left-flanking delimiter run] and either (a) not part of a [left-flanking delimiter run]
or (b) part of a [left-flanking delimiter run] or (b) part of a [left-flanking delimiter run]
@ -5976,8 +6032,8 @@ the following principles resolve ambiguity:
an interpretation `<strong>...</strong>` is always preferred to an interpretation `<strong>...</strong>` is always preferred to
`<em><em>...</em></em>`. `<em><em>...</em></em>`.
14. An interpretation `<strong><em>...</em></strong>` is always 14. An interpretation `<em><strong>...</strong></em>` is always
preferred to `<em><strong>..</strong></em>`. preferred to `<strong><em>...</em></strong>`.
15. When two potential emphasis or strong emphasis spans overlap, 15. When two potential emphasis or strong emphasis spans overlap,
so that the second begins before the first ends and ends after so that the second begins before the first ends and ends after
@ -7000,14 +7056,14 @@ Rule 14:
```````````````````````````````` example ```````````````````````````````` example
***foo*** ***foo***
. .
<p><strong><em>foo</em></strong></p> <p><em><strong>foo</strong></em></p>
```````````````````````````````` ````````````````````````````````
```````````````````````````````` example ```````````````````````````````` example
_____foo_____ _____foo_____
. .
<p><strong><strong><em>foo</em></strong></strong></p> <p><em><strong><strong>foo</strong></strong></em></p>
```````````````````````````````` ````````````````````````````````
@ -7148,8 +7204,9 @@ A [link destination](@) consists of either
- a nonempty sequence of characters that does not include - a nonempty sequence of characters that does not include
ASCII space or control characters, and includes parentheses ASCII space or control characters, and includes parentheses
only if (a) they are backslash-escaped or (b) they are part of only if (a) they are backslash-escaped or (b) they are part of
a balanced pair of unescaped parentheses that is not itself a balanced pair of unescaped parentheses. (Implementations
inside a balanced pair of unescaped parentheses. may impose limits on parentheses nesting to avoid performance
issues, but at least three levels of nesting should be supported.)
A [link title](@) consists of either A [link title](@) consists of either
@ -7255,35 +7312,29 @@ Parentheses inside the link destination may be escaped:
<p><a href="(foo)">link</a></p> <p><a href="(foo)">link</a></p>
```````````````````````````````` ````````````````````````````````
One level of balanced parentheses is allowed without escaping: Any number of parentheses are allowed without escaping, as long as they are
balanced:
```````````````````````````````` example
[link]((foo)and(bar))
.
<p><a href="(foo)and(bar)">link</a></p>
````````````````````````````````
However, if you have parentheses within parentheses, you need to escape
or use the `<...>` form:
```````````````````````````````` example ```````````````````````````````` example
[link](foo(and(bar))) [link](foo(and(bar)))
. .
<p>[link](foo(and(bar)))</p> <p><a href="foo(and(bar))">link</a></p>
```````````````````````````````` ````````````````````````````````
However, if you have unbalanced parentheses, you need to escape or use the
`<...>` form:
```````````````````````````````` example ```````````````````````````````` example
[link](foo(and\(bar\))) [link](foo\(and\(bar\))
. .
<p><a href="foo(and(bar))">link</a></p> <p><a href="foo(and(bar)">link</a></p>
```````````````````````````````` ````````````````````````````````
```````````````````````````````` example ```````````````````````````````` example
[link](<foo(and(bar))>) [link](<foo(and(bar)>)
. .
<p><a href="foo(and(bar))">link</a></p> <p><a href="foo(and(bar)">link</a></p>
```````````````````````````````` ````````````````````````````````
@ -7567,13 +7618,16 @@ that [matches] a [link reference definition] elsewhere in the document.
A [link label](@) begins with a left bracket (`[`) and ends A [link label](@) begins with a left bracket (`[`) and ends
with the first right bracket (`]`) that is not backslash-escaped. with the first right bracket (`]`) that is not backslash-escaped.
Between these brackets there must be at least one [non-whitespace character]. Between these brackets there must be at least one [non-whitespace character].
Unescaped square bracket characters are not allowed in Unescaped square bracket characters are not allowed inside the
[link labels]. A link label can have at most 999 opening and closing square brackets of [link labels]. A link
characters inside the square brackets. label can have at most 999 characters inside the square
brackets.
One label [matches](@) One label [matches](@)
another just in case their normalized forms are equal. To normalize a another just in case their normalized forms are equal. To normalize a
label, perform the *Unicode case fold* and collapse consecutive internal label, strip off the opening and closing brackets,
perform the *Unicode case fold*, strip leading and trailing
[whitespace] and collapse consecutive internal
[whitespace] to a single space. If there are multiple [whitespace] to a single space. If there are multiple
matching reference link definitions, the one that comes first in the matching reference link definitions, the one that comes first in the
document is used. (It is desirable in such cases to emit a warning.) document is used. (It is desirable in such cases to emit a warning.)
@ -8326,11 +8380,11 @@ The link labels are case-insensitive:
```````````````````````````````` ````````````````````````````````
If you just want bracketed text, you can backslash-escape the If you just want a literal `!` followed by bracketed text, you can
opening `!` and `[`: backslash-escape the opening `[`:
```````````````````````````````` example ```````````````````````````````` example
\!\[foo] !\[foo]
[foo]: /url "title" [foo]: /url "title"
. .

4
test/misc.js

@ -203,8 +203,8 @@ describe('Misc', function () {
md.enable('emphasis'); md.enable('emphasis');
assert.strictEqual(md.render('___foo___'), '<p><strong><em>foo</em></strong></p>\n'); assert.strictEqual(md.render('___foo___'), '<p><em><strong>foo</strong></em></p>\n');
assert.strictEqual(md.renderInline('___foo___'), '<strong><em>foo</em></strong>'); assert.strictEqual(md.renderInline('___foo___'), '<em><strong>foo</strong></em>');
}); });
it('Should correctly check block termination rules when those are disabled (#13)', function () { it('Should correctly check block termination rules when those are disabled (#13)', function () {

Loading…
Cancel
Save