diff --git a/Markdown.pl b/Markdown.pl index eec3bec..03d5957 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -3197,6 +3197,7 @@ sub _Sanitize { my $out = "<" . $tt . " "; my $ok = $tagatt{$tt}; ref($ok) eq "HASH" or $ok = {}; + my $atc = 0; while ($tag =~ /\G\s*([^\s\042\047<\/>=]+)((?>=)|\s*)/gcs) { my ($a,$s) = ($1, $2); if ($s eq "" && substr($tag, pos($tag), 1) =~ /^[\042\047]/) { @@ -3207,10 +3208,12 @@ sub _Sanitize { # it's one of "those" attributes (e.g. compact) or not # _SanitizeAtt will fix it up if it is $out .= _SanitizeAtt($a, '""', $ok, $seenatt); + ++$atc; next; } if ($tag =~ /\G([\042\047])((?:(?!\1)(?!<).)*)\1\s*/gcs) { $out .= _SanitizeAtt($a, $1.$2.$1, $ok, $seenatt); + ++$atc; next; } if ($tag =~ /\G([\042\047])((?:(?!\1)(?![<>])(?![\/][>]).)*)/gcs) { @@ -3219,6 +3222,7 @@ sub _Sanitize { my ($q, $v) = ($1, $2); $v =~ s/\s+$//; $out .= _SanitizeAtt($a, $q.$v.$q, $ok, $seenatt); + ++$atc; next; } if ($tag =~ /\G([^\s<\/>]+)\s*/gcs) { @@ -3226,10 +3230,12 @@ sub _Sanitize { my $v = $1; $v =~ s/\042/"/go; $out .= _SanitizeAtt($a, '"'.$v.'"', $ok, $seenatt); + ++$atc; next; } # give it an empty value $out .= _SanitizeAtt($a, '""', $ok, $seenatt); + ++$atc; } my $sfx = substr($tag, pos($tag)); $out =~ s/\s+$//; @@ -3237,9 +3243,16 @@ sub _Sanitize { if ($tagmt{$tt}) { $typ = ($tag =~ m,/>$,) ? 3 : -3; $out .= $opt{empty_element_suffix}; + return ("<" . substr($tag,1), 0) if !$atc && $taga1p{$tt}; } else { + if ($tag =~ m,/>$,) { + return ("<" . substr($tag,1), 0) if !$atc && $taga1p{$tt}; + $typ = 3; + } else { + return ("<" . substr($tag,1), 0) if !$atc && $taga1p{$tt}; + } $out .= ">"; - $out .= "" and $typ = 3 if $tag =~ m,/>$,; + $out .= "" if $typ == 3; } return ($out,$typ,$autocloseflag); } elsif ($tag =~ /^<([^\s<\/>]+)/s) { @@ -3914,6 +3927,22 @@ Combines adjacent (whitespace separated only) opening and closing tags for the same HTML empty element into a single minimized tag. For example, C<<

>> will become C<<
>>. +Tags that require at least one attribute to be present to be meaningful +(e.g. C, C, C, C) but have none will be treated as non-tags +potentially creating unexpected errors. For example, the sequence +C<< text here >> will be sanitized to C<< <a>text here >> since +an C tag without any attributes is meaningless, but then the trailing +close tag C<< >> will become an error because it has no matching open +C<< >> tag. + +The point of this check is not to cause undue frustration, but to allow +such constructs to be used as text without the need for escaping since they +are meaningless as tags. For example, C<< >> works just fine +as plain text and so does C<< >> because the +C<< >>/C<< >> will be treated as a non-tag automatically. In fact, +they can even appear inside links too such as +C<< Link to article >>. + Problematic C<&> characters are fixed up such as standalone C<&>s (or those not part of a valid entity reference) are turned into C<&>. Within attribute values, single and double quotes are turned into C<&> entity refs.