From c3cfcf92d6661be5575cce5b64a91126728817cc Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Fri, 2 Oct 2020 18:18:18 -0700 Subject: [PATCH] Markdown.pl: improve standalone XML comment stripping When stripping XML comments, if any XML comments are recognized as a standalone block, strip that entire block when forming paragraphs the final time. This provides a much cleaner output as it results in many superfluous blank lines being suppressed that the XML parser would not otherwise remove when it strips out XML comments. Signed-off-by: Kyle J. McKay --- Markdown.pl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Markdown.pl b/Markdown.pl index 8f65beb..714624c 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -86,6 +86,7 @@ my %g_block_ids; my %g_code_block_ids; my %g_html_blocks; my %g_code_blocks; +my @g_xml_comments; my %opt; # Return a "block id" to use to identify the block that does not contain @@ -741,6 +742,7 @@ sub Markdown { %g_code_block_ids = (); %g_html_blocks = (); %g_code_blocks = (); + @g_xml_comments = (); $g_list_level = 0; # Make sure $text ends with a couple of newlines: @@ -1005,6 +1007,8 @@ sub _HashHTMLBlocks { ) }{ my $key = block_id($1); + push(@g_xml_comments, $key) + if $opt{stripcomments} && !exists($g_html_blocks{$key}); $g_html_blocks{$key} = $1; "\n\n" . $key . "\n\n"; }egx; @@ -1038,7 +1042,7 @@ sub _RunBlockGamut { #

tags around block-level tags. $text = _HashHTMLBlocks($text); - $text = _FormParagraphs($text); + $text = _FormParagraphs($text, $anchors); return $text; } @@ -2469,7 +2473,7 @@ sub _FormParagraphs { # Params: # $text - string to process with html

tags # - my $text = shift; + my ($text, $anchors) = @_; # Strip leading and trailing lines: $text =~ s/\A\n+//; @@ -2488,6 +2492,17 @@ sub _FormParagraphs { } } + # + # Strip standalone XML comments if requested + # + if ($anchors && $opt{stripcomments} && @g_xml_comments) { + my %xml_comment = (); + $xml_comment{$_} = 1 foreach @g_xml_comments; + my @grafs2 = (); + do { push(@grafs2, $_) unless $xml_comment{$_} } foreach @grafs; + @grafs = @grafs2; + } + # # Unhashify HTML blocks #