diff --git a/Markdown.pl b/Markdown.pl
index f86763f..8999faa 100755
--- a/Markdown.pl
+++ b/Markdown.pl
@@ -2245,6 +2245,76 @@ sub _DoTag {
}
+my %univatt; # universally allowed attribute names
+my %tagatt; # per-element allowed attribute names
+my %tagmt; # empty element tags
+my %tagocl; # non-empty elements with optional closing tag
+my %tagacl; # which %tagocl an opening %tagocl will close
+my %tagblk; # block elements
+my %lcattval; # names of attribute values to lowercase
+my %impatt; # names of "implied" attributes
+BEGIN {
+ %univatt = map({$_ => 1} qw(class dir id lang style title xml:lang));
+ %tagatt = (
+ 'a' => { map({$_ => 1} qw(href name)) },
+ 'area' => { map({$_ => 1} qw(alt coords href nohref shape)) },
+ 'basefont' => { map({$_ => 1} qw(color face size)) },
+ 'br' => { map({$_ => 1} qw(clear)) },
+ 'caption' => { map({$_ => 1} qw(align)) },
+ 'col' => { map({$_ => 1} qw(align span width valign)) },
+ 'colgroup' => { map({$_ => 1} qw(align span width valign)) },
+ 'dir' => { map({$_ => 1} qw(compact)) },
+ 'div' => { map({$_ => 1} qw(align)) },
+ 'dl' => { map({$_ => 1} qw(compact)) },
+ 'font' => { map({$_ => 1} qw(color face size)) },
+ 'h1' => { map({$_ => 1} qw(align)) },
+ 'h2' => { map({$_ => 1} qw(align)) },
+ 'h3' => { map({$_ => 1} qw(align)) },
+ 'h4' => { map({$_ => 1} qw(align)) },
+ 'h5' => { map({$_ => 1} qw(align)) },
+ 'h6' => { map({$_ => 1} qw(align)) },
+ 'hr' => { map({$_ => 1} qw(align noshade size width)) },
+ # NO server-side image maps, therefore NOT ismap !
+ 'img' => { map({$_ => 1} qw(align alt border height hspace src usemap vspace width)) },
+ 'li' => { map({$_ => 1} qw(compact type value)) },
+ 'map' => { map({$_ => 1} qw(name)) },
+ 'menu' => { map({$_ => 1} qw(compact)) },
+ 'ol' => { map({$_ => 1} qw(compact start type)) },
+ 'p' => { map({$_ => 1} qw(align)) },
+ 'pre' => { map({$_ => 1} qw(width)) },
+ 'table' => { map({$_ => 1} qw(align border cellpadding cellspacing summary width)) },
+ 'tbody' => { map({$_ => 1} qw(align valign)) },
+ 'tfoot' => { map({$_ => 1} qw(align valign)) },
+ 'thead' => { map({$_ => 1} qw(align valign)) },
+ 'td' => { map({$_ => 1} qw(align colspan height nowrap rowspan valign width)) },
+ 'th' => { map({$_ => 1} qw(align colspan height nowrap rowspan valign width)) },
+ 'tr' => { map({$_ => 1} qw(align valign)) },
+ 'ul' => { map({$_ => 1} qw(compact type)) }
+ );
+ %tagmt = map({$_ => 1} qw(area basefont br col hr img));
+ %tagocl = map({$_ => 1} qw(colgroup dd dt li p tbody td tfoot th thead tr));
+ %tagacl = (
+ 'colgroup' => \%tagocl,
+ 'dd' => \%tagocl,
+ 'dt' => \%tagocl,
+ 'li' => \%tagocl,
+ 'tbody' => \%tagocl,
+ 'td' => { map({$_ => 1} qw(colgroup dd dt li p td tfoot th thead)) },
+ 'tfoot' => \%tagocl,
+ 'th' => { map({$_ => 1} qw(colgroup dd dt li p td tfoot th thead)) },
+ 'thead' => \%tagocl,
+ 'tr' => { map({$_ => 1} qw(colgroup dd dt li p td tfoot th thead tr)) },
+ );
+ %tagblk = map({$_ => 1} qw(address blockquote div dl h1 h2 h3 h4 h5 h6 hr ol p pre table));
+ %impatt = map({$_ => 1} qw(checked compact ismap nohref noshade nowrap));
+ %lcattval = map({$_ => 1} qw(
+ align border cellpadding cellspacing checked clear color colspan
+ compact coords height hspace ismap nohref noshade nowrap rowspan size
+ span shape valign vspace width
+ ));
+}
+
+
# _SanitizeTags
#
# Inspect all '<'...'>' tags in the input and HTML encode those things
@@ -2254,13 +2324,37 @@ sub _DoTag {
# <= sanitized text
sub _SanitizeTags {
my ($text, $validate) = @_;
+ $text =~ s/\s+$//;
+ $text ne "" or return "";
my @stack = ();
my $ans = "";
my $end = length($text);
pos($text) = 0;
+ my ($autoclose, $autoclopen);
+ my $lastmt = "";
+ $autoclose = sub {
+ my $s = $_[0] || "";
+ while (@stack && $stack[$#stack]->[0] ne $s &&
+ $tagocl{$stack[$#stack]->[0]}) {
+ $ans .= "" . $stack[$#stack]->[0] . ">";
+ pop(@stack);
+ }
+ } if $validate;
+ $autoclopen = sub {
+ my $s = $_[0] || "";
+ my $c;
+ if ($tagblk{$s}) {$c = {p=>1}}
+ elsif ($tagocl{$s}) {$c = $tagacl{$s}}
+ else {return}
+ while (@stack && $c->{$stack[$#stack]->[0]}) {
+ $ans .= "" . $stack[$#stack]->[0] . ">";
+ pop(@stack);
+ }
+ } if $validate;
while (pos($text) < $end) {
if ($text =~ /\G([^<]+)/gc) {
$ans .= $1;
+ $lastmt = "" if $1 =~ /\S/;
next;
}
my $tstart = pos($text);
@@ -2270,17 +2364,23 @@ sub _SanitizeTags {
$ans .= $tag;
next;
}
+ my $tt;
if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} ||
$tag =~ m{^($g_possible_tag_name)\s*>}) &&
- $ok_tag_name{lc($1)})
+ $ok_tag_name{$tt=lc($1)})
{
my ($stag, $styp) = _Sanitize($tag);
- $ans .= $stag;
- if ($validate && ($styp == 1 || $styp == 2) && $stag =~ m{^?([^/\s>]+)}) {
- my $tt = $1;
+ if ($styp == 2 && $lastmt eq $tt) {
+ $lastmt = "";
+ next;
+ }
+ $lastmt = $styp == 3 ? $tt : "";
+ if ($validate && $styp) {
+ &$autoclopen($tt) if $styp == 1 || $styp == 3;
if ($styp == 1) {
push(@stack,[$tt,$tstart]);
- } else {
+ } elsif ($styp == 2) {
+ &$autoclose($tt) unless $tt eq "p";
!@stack and _xmlfail("closing tag $tt without matching open at " .
_linecol($tstart, $text));
if ($stack[$#stack]->[0] eq $tt) {
@@ -2292,28 +2392,32 @@ sub _SanitizeTags {
}
}
}
+ $ans .= $stag;
next;
} else {
$tag =~ s/^</;
$ans .= $tag;
+ $lastmt = "";
next;
}
}
# can only get here if "\G" char is an unmatched "<"
pos($text) += 1;
$ans .= "<";
+ $lastmt = "";
}
+ &$autoclose if $validate;
if ($validate && @stack) {
my @errs;
my $j;
for ($j = 0; $j <= $#stack; ++$j) {
my @i = @{$stack[$j]};
- push(@errs, "opening tag $i[0] without matching close at " .
+ unshift(@errs, "opening tag $i[0] without matching close at " .
_linecol($i[1], $text));
}
_xmlfail(@errs);
}
- return $ans;
+ return $ans."\n";
}
@@ -2332,59 +2436,6 @@ sub _xmlfail {
}
-my %univatt; # universally allowed attribute names
-my %tagatt; # per-element allowed attribute names
-my %tagmt; # empty element tags
-my %lcattval; # names of attribute values to lowercase
-my %impatt; # names of "implied" attributes
-BEGIN {
- %univatt = map({$_ => 1} qw(class dir id lang style title xml:lang));
- %tagatt = (
- 'a' => { map({$_ => 1} qw(href name)) },
- 'area' => { map({$_ => 1} qw(alt coords href nohref shape)) },
- 'basefont' => { map({$_ => 1} qw(color face size)) },
- 'br' => { map({$_ => 1} qw(clear)) },
- 'caption' => { map({$_ => 1} qw(align)) },
- 'col' => { map({$_ => 1} qw(align span width valign)) },
- 'colgroup' => { map({$_ => 1} qw(align span width valign)) },
- 'dir' => { map({$_ => 1} qw(compact)) },
- 'div' => { map({$_ => 1} qw(align)) },
- 'dl' => { map({$_ => 1} qw(compact)) },
- 'font' => { map({$_ => 1} qw(color face size)) },
- 'h1' => { map({$_ => 1} qw(align)) },
- 'h2' => { map({$_ => 1} qw(align)) },
- 'h3' => { map({$_ => 1} qw(align)) },
- 'h4' => { map({$_ => 1} qw(align)) },
- 'h5' => { map({$_ => 1} qw(align)) },
- 'h6' => { map({$_ => 1} qw(align)) },
- 'hr' => { map({$_ => 1} qw(align noshade size width)) },
- # NO server-side image maps, therefore NOT ismap !
- 'img' => { map({$_ => 1} qw(align alt border height hspace src usemap vspace width)) },
- 'li' => { map({$_ => 1} qw(compact type value)) },
- 'map' => { map({$_ => 1} qw(name)) },
- 'menu' => { map({$_ => 1} qw(compact)) },
- 'ol' => { map({$_ => 1} qw(compact start type)) },
- 'p' => { map({$_ => 1} qw(align)) },
- 'pre' => { map({$_ => 1} qw(width)) },
- 'table' => { map({$_ => 1} qw(align border cellpadding cellspacing summary width)) },
- 'tbody' => { map({$_ => 1} qw(align valign)) },
- 'tfoot' => { map({$_ => 1} qw(align valign)) },
- 'thead' => { map({$_ => 1} qw(align valign)) },
- 'td' => { map({$_ => 1} qw(align colspan height nowrap rowspan valign width)) },
- 'th' => { map({$_ => 1} qw(align colspan height nowrap rowspan valign width)) },
- 'tr' => { map({$_ => 1} qw(align valign)) },
- 'ul' => { map({$_ => 1} qw(compact type)) }
- );
- %tagmt = map({$_ => 1} qw(area basefont br col hr img));
- %impatt = map({$_ => 1} qw(checked compact ismap nohref noshade nowrap));
- %lcattval = map({$_ => 1} qw(
- align border cellpadding cellspacing checked clear color colspan
- compact coords height hspace ismap nohref noshade nowrap rowspan size
- span shape valign vspace width
- ));
-}
-
-
sub _Sanitize {
my $tag = shift;
my $seenatt = {};
@@ -3057,15 +3108,20 @@ to be recognized and passed through even without using this option.
=item B<--sanitize>
-Remove troublesome tag attributes from embedded tags. Only a very strictly
+Removes troublesome tag attributes from embedded tags. Only a very strictly
limited set of tag attributes will be permitted, other attributes will be
silently discarded. The set of allowed attributes varies by tag.
-This is enabled by default.
-Also split empty minimized elements that are not one of the HTML allowed
-empty elements (C C
C
C
C