From cd2477863fdcc182cc8739e9bedc7363acb344d8 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 2 Mar 2024 12:31:42 -0700 Subject: [PATCH] Update to comply with spec 0.31.2 The spec update changes these things: * It simplifies the HTML regex so that `` is an HTML comment. HTML5 reports this as an error, but still parses it. * It changes the set of known HTML block elements to match HTML5, adding `search` and removing `source`. * It adds Unicode Symbols to the set of punctuation characters that are used to evaluate flankingness. This commit also changes the declaration HTML regex to match lowercase, even though that change was technically made in spec version 0.30. --- lib/common/html_blocks.mjs | 2 +- lib/common/html_re.mjs | 4 ++-- lib/common/utils.mjs | 2 +- package.json | 2 +- test/fixtures/markdown-it/fatal.txt | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/common/html_blocks.mjs b/lib/common/html_blocks.mjs index b72d152..1e27a7f 100644 --- a/lib/common/html_blocks.mjs +++ b/lib/common/html_blocks.mjs @@ -51,8 +51,8 @@ export default [ 'option', 'p', 'param', + 'search', 'section', - 'source', 'summary', 'table', 'tbody', diff --git a/lib/common/html_re.mjs b/lib/common/html_re.mjs index cb466d8..ccfbf87 100644 --- a/lib/common/html_re.mjs +++ b/lib/common/html_re.mjs @@ -13,9 +13,9 @@ const attribute = '(?:\\s+' + attr_name + '(?:\\s*=\\s*' + attr_value + ')?)' const open_tag = '<[A-Za-z][A-Za-z0-9\\-]*' + attribute + '*\\s*\\/?>' const close_tag = '<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>' -const comment = '|' +const comment = '' const processing = '<[?][\\s\\S]*?[?]>' -const declaration = ']*>' +const declaration = ']*>' const cdata = '' const HTML_TAG_RE = new RegExp('^(?:' + open_tag + '|' + close_tag + '|' + comment + diff --git a/lib/common/utils.mjs b/lib/common/utils.mjs index 0fbc183..b78c9b0 100644 --- a/lib/common/utils.mjs +++ b/lib/common/utils.mjs @@ -176,7 +176,7 @@ function isWhiteSpace (code) { // Currently without astral characters support. function isPunctChar (ch) { - return ucmicro.P.test(ch) + return ucmicro.P.test(ch) || ucmicro.S.test(ch) } // Markdown ASCII punctuation characters. diff --git a/package.json b/package.json index 8aeb2df..3be144e 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,7 @@ "linkify-it": "^5.0.0", "mdurl": "^2.0.0", "punycode.js": "^2.3.1", - "uc.micro": "^2.0.0" + "uc.micro": "^2.1.0" }, "devDependencies": { "@rollup/plugin-babel": "^6.0.4", diff --git a/test/fixtures/markdown-it/fatal.txt b/test/fixtures/markdown-it/fatal.txt index dfeeb2e..57e085d 100644 --- a/test/fixtures/markdown-it/fatal.txt +++ b/test/fixtures/markdown-it/fatal.txt @@ -29,7 +29,7 @@ foo .

foo <!— xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ->

-

foo <!------------------------------------------------------------------->

+

foo

.