Browse Source

Update to comply with spec 0.31.2

The spec update changes these things:

* It simplifies the HTML regex so that `<!-- a -- b -->` is an HTML
  comment. HTML5 reports this as an error, but still parses it.
* It changes the set of known HTML block elements to match HTML5, adding
  `search` and removing `source`.
* It adds Unicode Symbols to the set of punctuation characters that are
  used to evaluate flankingness.

This commit also changes the declaration HTML regex to match lowercase,
even though that change was technically made in spec version 0.30.
pull/1009/head
Michael Howell 10 months ago
parent
commit
cd2477863f
  1. 2
      lib/common/html_blocks.mjs
  2. 4
      lib/common/html_re.mjs
  3. 2
      lib/common/utils.mjs
  4. 2
      package.json
  5. 2
      test/fixtures/markdown-it/fatal.txt

2
lib/common/html_blocks.mjs

@ -51,8 +51,8 @@ export default [
'option', 'option',
'p', 'p',
'param', 'param',
'search',
'section', 'section',
'source',
'summary', 'summary',
'table', 'table',
'tbody', 'tbody',

4
lib/common/html_re.mjs

@ -13,9 +13,9 @@ const attribute = '(?:\\s+' + attr_name + '(?:\\s*=\\s*' + attr_value + ')?)'
const open_tag = '<[A-Za-z][A-Za-z0-9\\-]*' + attribute + '*\\s*\\/?>' const open_tag = '<[A-Za-z][A-Za-z0-9\\-]*' + attribute + '*\\s*\\/?>'
const close_tag = '<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>' const close_tag = '<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>'
const comment = '<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->' const comment = '<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->'
const processing = '<[?][\\s\\S]*?[?]>' const processing = '<[?][\\s\\S]*?[?]>'
const declaration = '<![A-Z]+\\s+[^>]*>' const declaration = '<![A-Za-z][^>]*>'
const cdata = '<!\\[CDATA\\[[\\s\\S]*?\\]\\]>' const cdata = '<!\\[CDATA\\[[\\s\\S]*?\\]\\]>'
const HTML_TAG_RE = new RegExp('^(?:' + open_tag + '|' + close_tag + '|' + comment + const HTML_TAG_RE = new RegExp('^(?:' + open_tag + '|' + close_tag + '|' + comment +

2
lib/common/utils.mjs

@ -176,7 +176,7 @@ function isWhiteSpace (code) {
// Currently without astral characters support. // Currently without astral characters support.
function isPunctChar (ch) { function isPunctChar (ch) {
return ucmicro.P.test(ch) return ucmicro.P.test(ch) || ucmicro.S.test(ch)
} }
// Markdown ASCII punctuation characters. // Markdown ASCII punctuation characters.

2
package.json

@ -50,7 +50,7 @@
"linkify-it": "^5.0.0", "linkify-it": "^5.0.0",
"mdurl": "^2.0.0", "mdurl": "^2.0.0",
"punycode.js": "^2.3.1", "punycode.js": "^2.3.1",
"uc.micro": "^2.0.0" "uc.micro": "^2.1.0"
}, },
"devDependencies": { "devDependencies": {
"@rollup/plugin-babel": "^6.0.4", "@rollup/plugin-babel": "^6.0.4",

2
test/fixtures/markdown-it/fatal.txt

@ -29,7 +29,7 @@ foo <!--- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ->
foo <!-------------------------------------------------------------------> foo <!------------------------------------------------------------------->
. .
<p>foo &lt;!— xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -&gt;</p> <p>foo &lt;!— xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -&gt;</p>
<p>foo &lt;!-------------------------------------------------------------------&gt;</p> <p>foo <!-------------------------------------------------------------------></p>
. .

Loading…
Cancel
Save