From 06b8bccb120a14ad4cba2a5af1c2a4b37d5d7ad8 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Thu, 14 Nov 2019 15:53:57 -0700 Subject: [PATCH] Markdown.pl: ignore likely non-tag tags Certain start tags (a, area, img, map) do not make sense unless they have at least one attribute present. If a completely attribute barren start tag for one of these elements is found, treat it as invalid and escape the leading '<'. This is an heuristic that shouldn't cause too many problems while silently "correcting" incorrect input. Either way (leaving the bare start tag with no attributes or escaping it and potentially causing a fault as its end tag no longer has anything to match up with), it's broken. The question becomes then which breakage is more common in order to handle that one in preference to the other. With this change, the "it wasn't really a tag after all" situation will now be considered more common than the "it was deliberatly an invalid start tag with a matching end tag" situation. Signed-off-by: Kyle J. McKay --- Markdown.pl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Markdown.pl b/Markdown.pl index 8999faa..737a226 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -2251,6 +2251,7 @@ my %tagmt; # empty element tags my %tagocl; # non-empty elements with optional closing tag my %tagacl; # which %tagocl an opening %tagocl will close my %tagblk; # block elements +my %taga1p; # open tags which require at least one attribute my %lcattval; # names of attribute values to lowercase my %impatt; # names of "implied" attributes BEGIN { @@ -2312,6 +2313,7 @@ BEGIN { compact coords height hspace ismap nohref noshade nowrap rowspan size span shape valign vspace width )); + %taga1p = map({$_ => 1} qw(a area img map)); } @@ -2493,8 +2495,9 @@ sub _Sanitize { $out .= "" and $typ = 3 if $tag =~ m,/>$,; } return ($out,$typ); - } elsif ($tag =~ /^<([^\s<\/>]+)/gs) { + } elsif ($tag =~ /^<([^\s<\/>]+)/s) { my $tt = lc($1); + return ("<" . substr($tag,1), 0) if $taga1p{$tt}; if ($tagmt{$tt}) { return ("<" . $tt . $opt{empty_element_suffix}, 3); } elsif ($tag =~ m,/>$,) {