From 417428129368309d97aa6d3f48553bde787072d0 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Wed, 8 Jul 2020 03:54:22 -0700 Subject: [PATCH] Markdown.pl: remove markup from value When using `--stub` and picking up the value of the first "H1" tag to use as the title, remove markup (such as links, italic, bold, etc.) from the value before using it. Since <title>... value cannot contain links or other markup this makes the displayed title look much better where such markup is present in the original document. Signed-off-by: Kyle J. McKay --- Markdown.pl | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/Markdown.pl b/Markdown.pl index 4241c1e..186fd57 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -1442,6 +1442,8 @@ sub _DoHeaders { my $geth1 = $anchors && !defined($opt{h1}) ? sub { return unless !defined($h1); my $h = shift; + return unless defined($h) && $h !~ /^\s*$/; + $h = _StripTags(_UnescapeSpecialChars($h)); $h =~ s/^\s+//; $h =~ s/\s+$//; $h =~ s/\s+/ /g; @@ -1467,9 +1469,10 @@ sub _DoHeaders { $h =~ s/#+$//; $h =~ s/\s+$//; my $id = $h eq "" ? "" : _GetNewAnchorId($h); - &$geth1($h) if $h_level == 1 && $h ne ""; $id = " id=\"$id\"" if $id ne ""; - "" . _RunSpanGamut($h) . "\n\n"; + my $rsg = _RunSpanGamut($h); + &$geth1($rsg) if $h_level == 1 && $h ne ""; + "" . $rsg . "\n\n"; }egmx; # Setext-style headers: @@ -1485,9 +1488,10 @@ sub _DoHeaders { $text =~ s{ ^(?:=+[ ]*\n)?[ ]*(.+?)[ ]*\n=+[ ]*\n+ }{ my $h = $1; my $id = _GetNewAnchorId($h); - &$geth1($h); $id = " id=\"$id\"" if $id ne ""; - "" . _RunSpanGamut($h) . "\n\n"; + my $rsg = _RunSpanGamut($h); + &$geth1($rsg); + "" . $rsg . "\n\n"; }egmx; $text =~ s{ ^(?:-+[ ]*\n)?[ ]*(.+?)[ ]*\n-+[ ]*\n+ }{ @@ -2399,6 +2403,22 @@ sub _DoTag { return $tag; } +# Strip out all tags that _DoTag would match +sub _StripTags { + my $text = shift; + my $_StripTag = sub { + my $tag = shift; + return $tag if $tag =~ /^<[?\$!]/; + if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} || $tag =~ m{^}) && + $ok_tag_name{lc($1)}) { + + return ""; # strip it out + } + return $tag; + }; + $text =~ s{(<[^>]*>)}{&$_StripTag($1)}ige; + return $text; +} my %univatt; # universally allowed attribute names my %tagatt; # per-element allowed attribute names