Browse Source

Update CommonMark spec to 0.28

pull/403/head
Alex Kocharin 7 years ago
parent
commit
2959f8c27c
  1. 2
      lib/common/html_blocks.js
  2. 4
      lib/helpers/parse_link_destination.js
  3. 20
      lib/rules_inline/emphasis.js
  4. 1305
      test/fixtures/commonmark/good.txt
  5. 144
      test/fixtures/commonmark/spec.txt
  6. 4
      test/misc.js

2
lib/common/html_blocks.js

@ -55,10 +55,8 @@ module.exports = [
'option',
'p',
'param',
'pre',
'section',
'source',
'title',
'summary',
'table',
'tbody',

4
lib/helpers/parse_link_destination.js

@ -59,18 +59,18 @@ module.exports = function parseLinkDestination(str, pos, max) {
if (code === 0x28 /* ( */) {
level++;
if (level > 1) { break; }
}
if (code === 0x29 /* ) */) {
if (level === 0) { break; }
level--;
if (level < 0) { break; }
}
pos++;
}
if (start === pos) { return result; }
if (level !== 0) { return result; }
result.str = unescapeAll(str.slice(start, pos));
result.lines = lines;

20
lib/rules_inline/emphasis.js

@ -77,7 +77,7 @@ module.exports.postProcess = function emphasis(state) {
delimiters = state.delimiters,
max = state.delimiters.length;
for (i = 0; i < max; i++) {
for (i = max - 1; i >= 0; i--) {
startDelim = delimiters[i];
if (startDelim.marker !== 0x5F/* _ */ && startDelim.marker !== 0x2A/* * */) {
@ -91,16 +91,16 @@ module.exports.postProcess = function emphasis(state) {
endDelim = delimiters[startDelim.end];
// If the next delimiter has the same marker and is adjacent to this one,
// If the previous delimiter has the same marker and is adjacent to this one,
// merge those into one strong delimiter.
//
// `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
//
isStrong = i + 1 < max &&
delimiters[i + 1].end === startDelim.end - 1 &&
delimiters[i + 1].token === startDelim.token + 1 &&
delimiters[startDelim.end - 1].token === endDelim.token - 1 &&
delimiters[i + 1].marker === startDelim.marker;
isStrong = i > 0 &&
delimiters[i - 1].end === startDelim.end + 1 &&
delimiters[i - 1].token === startDelim.token - 1 &&
delimiters[startDelim.end + 1].token === endDelim.token + 1 &&
delimiters[i - 1].marker === startDelim.marker;
ch = String.fromCharCode(startDelim.marker);
@ -119,9 +119,9 @@ module.exports.postProcess = function emphasis(state) {
token.content = '';
if (isStrong) {
state.tokens[delimiters[i + 1].token].content = '';
state.tokens[delimiters[startDelim.end - 1].token].content = '';
i++;
state.tokens[delimiters[i - 1].token].content = '';
state.tokens[delimiters[startDelim.end + 1].token].content = '';
i--;
}
}
};

1305
test/fixtures/commonmark/good.txt

File diff suppressed because it is too large

144
test/fixtures/commonmark/spec.txt

@ -1,8 +1,8 @@
---
title: CommonMark Spec
author: John MacFarlane
version: 0.27
date: '2016-11-18'
version: 0.28
date: '2017-08-01'
license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
...
@ -11,10 +11,12 @@ license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
## What is Markdown?
Markdown is a plain text format for writing structured documents,
based on conventions used for indicating formatting in email and
usenet posts. It was developed in 2004 by John Gruber, who wrote
the first Markdown-to-HTML converter in Perl, and it soon became
ubiquitous. In the next decade, dozens of implementations were
based on conventions for indicating formatting in email
and usenet posts. It was developed by John Gruber (with
help from Aaron Swartz) and released in 2004 in the form of a
[syntax description](http://daringfireball.net/projects/markdown/syntax)
and a Perl script (`Markdown.pl`) for converting Markdown to
HTML. In the next decade, dozens of implementations were
developed in many languages. Some extended the original
Markdown syntax with conventions for footnotes, tables, and
other document elements. Some allowed Markdown documents to be
@ -312,7 +314,7 @@ form feed (`U+000C`), or carriage return (`U+000D`).
characters].
A [Unicode whitespace character](@) is
any code point in the Unicode `Zs` class, or a tab (`U+0009`),
any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
carriage return (`U+000D`), newline (`U+000A`), or form feed
(`U+000C`).
@ -331,7 +333,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
A [punctuation character](@) is an [ASCII
punctuation character] or anything in
the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
## Tabs
@ -402,8 +404,8 @@ as indentation with four spaces would:
Normally the `>` that begins a block quote may be followed
optionally by a space, which is not considered part of the
content. In the following case `>` is followed by a tab,
which is treated as if it were expanded into spaces.
Since one of theses spaces is considered part of the
which is treated as if it were expanded into three spaces.
Since one of these spaces is considered part of the
delimiter, `foo` is considered to be indented six spaces
inside the block quote context, so we get an indented
code block starting with two spaces.
@ -481,7 +483,7 @@ We can think of a document as a sequence of
quotations, lists, headings, rules, and code blocks. Some blocks (like
block quotes and list items) contain other blocks; others (like
headings and paragraphs) contain [inline](@) content---text,
links, emphasized text, images, code, and so on.
links, emphasized text, images, code spans, and so on.
## Precedence
@ -1643,6 +1645,15 @@ With tildes:
</code></pre>
````````````````````````````````
Fewer than three backticks is not enough:
```````````````````````````````` example
``
foo
``
.
<p><code>foo</code></p>
````````````````````````````````
The closing code fence must use the same character as the opening
fence:
@ -2031,6 +2042,37 @@ or [closing tag] (with any [tag name] other than `script`,
or the end of the line.\
**End condition:** line is followed by a [blank line].
HTML blocks continue until they are closed by their appropriate
[end condition], or the last line of the document or other [container block].
This means any HTML **within an HTML block** that might otherwise be recognised
as a start condition will be ignored by the parser and passed through as-is,
without changing the parser's state.
For instance, `<pre>` within a HTML block started by `<table>` will not affect
the parser state; as the HTML block was started in by start condition 6, it
will end at any blank line. This can be surprising:
```````````````````````````````` example
<table><tr><td>
<pre>
**Hello**,
_world_.
</pre>
</td></tr></table>
.
<table><tr><td>
<pre>
**Hello**,
<p><em>world</em>.
</pre></p>
</td></tr></table>
````````````````````````````````
In this case, the HTML block is terminated by the newline — the `**hello**`
text remains verbatim — and regular parsing resumes, with a paragraph,
emphasised `world` and inline and block HTML following.
All types of [HTML blocks] except type 7 may interrupt
a paragraph. Blocks of type 7 may not interrupt a paragraph.
(This restriction is intended to prevent unwanted interpretation
@ -3637,11 +3679,15 @@ The following rules define [list items]:
If the list item is ordered, then it is also assigned a start
number, based on the ordered list marker.
Exceptions: When the first list item in a [list] interrupts
Exceptions:
1. When the first list item in a [list] interrupts
a paragraph---that is, when it starts on a line that would
otherwise count as [paragraph continuation text]---then (a)
the lines *Ls* must not begin with a blank line, and (b) if
the list item is ordered, the start number must be 1.
2. If any line is a [thematic break][thematic breaks] then
that line is not a list item.
For example, let *Ls* be the lines
@ -5796,6 +5842,15 @@ we just have literal backticks:
<p>`foo</p>
````````````````````````````````
The following case also illustrates the need for opening and
closing backtick strings to be equal in length:
```````````````````````````````` example
`foo``bar``
.
<p>`foo<code>bar</code></p>
````````````````````````````````
## Emphasis and strong emphasis
@ -5845,19 +5900,20 @@ for efficient parsing strategies that do not backtrack.
First, some definitions. A [delimiter run](@) is either
a sequence of one or more `*` characters that is not preceded or
followed by a `*` character, or a sequence of one or more `_`
characters that is not preceded or followed by a `_` character.
followed by a non-backslash-escaped `*` character, or a sequence
of one or more `_` characters that is not preceded or followed by
a non-backslash-escaped `_` character.
A [left-flanking delimiter run](@) is
a [delimiter run] that is (a) not followed by [Unicode whitespace],
and (b) either not followed by a [punctuation character], or
and (b) not followed by a [punctuation character], or
preceded by [Unicode whitespace] or a [punctuation character].
For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace.
A [right-flanking delimiter run](@) is
a [delimiter run] that is (a) not preceded by [Unicode whitespace],
and (b) either not preceded by a [punctuation character], or
and (b) not preceded by a [punctuation character], or
followed by [Unicode whitespace] or a [punctuation character].
For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace.
@ -5936,7 +5992,7 @@ The following rules define emphasis and strong emphasis:
7. A double `**` [can close strong emphasis](@)
iff it is part of a [right-flanking delimiter run].
8. A double `__` [can close strong emphasis]
8. A double `__` [can close strong emphasis] iff
it is part of a [right-flanking delimiter run]
and either (a) not part of a [left-flanking delimiter run]
or (b) part of a [left-flanking delimiter run]
@ -5976,8 +6032,8 @@ the following principles resolve ambiguity:
an interpretation `<strong>...</strong>` is always preferred to
`<em><em>...</em></em>`.
14. An interpretation `<strong><em>...</em></strong>` is always
preferred to `<em><strong>..</strong></em>`.
14. An interpretation `<em><strong>...</strong></em>` is always
preferred to `<strong><em>...</em></strong>`.
15. When two potential emphasis or strong emphasis spans overlap,
so that the second begins before the first ends and ends after
@ -7000,14 +7056,14 @@ Rule 14:
```````````````````````````````` example
***foo***
.
<p><strong><em>foo</em></strong></p>
<p><em><strong>foo</strong></em></p>
````````````````````````````````
```````````````````````````````` example
_____foo_____
.
<p><strong><strong><em>foo</em></strong></strong></p>
<p><em><strong><strong>foo</strong></strong></em></p>
````````````````````````````````
@ -7148,8 +7204,9 @@ A [link destination](@) consists of either
- a nonempty sequence of characters that does not include
ASCII space or control characters, and includes parentheses
only if (a) they are backslash-escaped or (b) they are part of
a balanced pair of unescaped parentheses that is not itself
inside a balanced pair of unescaped parentheses.
a balanced pair of unescaped parentheses. (Implementations
may impose limits on parentheses nesting to avoid performance
issues, but at least three levels of nesting should be supported.)
A [link title](@) consists of either
@ -7255,35 +7312,29 @@ Parentheses inside the link destination may be escaped:
<p><a href="(foo)">link</a></p>
````````````````````````````````
One level of balanced parentheses is allowed without escaping:
```````````````````````````````` example
[link]((foo)and(bar))
.
<p><a href="(foo)and(bar)">link</a></p>
````````````````````````````````
However, if you have parentheses within parentheses, you need to escape
or use the `<...>` form:
Any number of parentheses are allowed without escaping, as long as they are
balanced:
```````````````````````````````` example
[link](foo(and(bar)))
.
<p>[link](foo(and(bar)))</p>
<p><a href="foo(and(bar))">link</a></p>
````````````````````````````````
However, if you have unbalanced parentheses, you need to escape or use the
`<...>` form:
```````````````````````````````` example
[link](foo(and\(bar\)))
[link](foo\(and\(bar\))
.
<p><a href="foo(and(bar))">link</a></p>
<p><a href="foo(and(bar)">link</a></p>
````````````````````````````````
```````````````````````````````` example
[link](<foo(and(bar))>)
[link](<foo(and(bar)>)
.
<p><a href="foo(and(bar))">link</a></p>
<p><a href="foo(and(bar)">link</a></p>
````````````````````````````````
@ -7567,13 +7618,16 @@ that [matches] a [link reference definition] elsewhere in the document.
A [link label](@) begins with a left bracket (`[`) and ends
with the first right bracket (`]`) that is not backslash-escaped.
Between these brackets there must be at least one [non-whitespace character].
Unescaped square bracket characters are not allowed in
[link labels]. A link label can have at most 999
characters inside the square brackets.
Unescaped square bracket characters are not allowed inside the
opening and closing square brackets of [link labels]. A link
label can have at most 999 characters inside the square
brackets.
One label [matches](@)
another just in case their normalized forms are equal. To normalize a
label, perform the *Unicode case fold* and collapse consecutive internal
label, strip off the opening and closing brackets,
perform the *Unicode case fold*, strip leading and trailing
[whitespace] and collapse consecutive internal
[whitespace] to a single space. If there are multiple
matching reference link definitions, the one that comes first in the
document is used. (It is desirable in such cases to emit a warning.)
@ -8326,11 +8380,11 @@ The link labels are case-insensitive:
````````````````````````````````
If you just want bracketed text, you can backslash-escape the
opening `!` and `[`:
If you just want a literal `!` followed by bracketed text, you can
backslash-escape the opening `[`:
```````````````````````````````` example
\!\[foo]
!\[foo]
[foo]: /url "title"
.

4
test/misc.js

@ -203,8 +203,8 @@ describe('Misc', function () {
md.enable('emphasis');
assert.strictEqual(md.render('___foo___'), '<p><strong><em>foo</em></strong></p>\n');
assert.strictEqual(md.renderInline('___foo___'), '<strong><em>foo</em></strong>');
assert.strictEqual(md.render('___foo___'), '<p><em><strong>foo</strong></em></p>\n');
assert.strictEqual(md.renderInline('___foo___'), '<em><strong>foo</strong></em>');
});
it('Should correctly check block termination rules when those are disabled (#13)', function () {

Loading…
Cancel
Save