From 003134a723a9166e8ea80efab4818ca97bdbc3f1 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Wed, 2 Sep 2020 22:56:32 -0700 Subject: [PATCH] Markdown.pl: correct comment sanitation The XML standard section 2.5 is quite specific: the string "--" (double-hyphen) MUST NOT occur within comments In fact, xmllint will complain about any comments that incorrectly contain an internal "--" sequence as they are not valid XML. Adjust the sanitation code to only pass through valid XML comments using the same pattern that _HashHTMLBlocks uses to recognize them. With this change, invalid XML comments will be treated as literal text by the sanitizer and have the initial "<" escaped to < thus rendering them as not a comment at all. Also take this opportunity to correct the comments in the _HashHTMLBlocks function from "HTML" to "XML" to reflect what it actually matches. Signed-off-by: Kyle J. McKay --- Markdown.pl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Markdown.pl b/Markdown.pl index 9a53557..1ea3c30 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -834,7 +834,7 @@ sub _HashHTMLBlocks { "\n\n" . $key . "\n\n"; }eigx; - # Special case for standalone HTML comments: + # Special case for standalone XML comments: $text =~ s{ (?: (?<=\n\n) # Starting after a blank line @@ -2535,12 +2535,13 @@ sub _SanitizeTags { next; } my $tstart = pos($text); + if ($text =~ /\G()/gc) { + # pass "comments" through + $ans .= $1; + next; + } if ($text =~ /\G(<[^>]*>)/gc) { my $tag = $1; - if ($tag =~ /^