diff --git a/Markdown.pl b/Markdown.pl index 66098f2..ec5cf36 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -609,13 +609,13 @@ sub _main { 'validate-xml' => sub {$cli_opts{'validate-xml'} = 1}, 'validate-xml-internal' => sub {$cli_opts{'validate-xml'} = 2}, 'no-validate-xml' => sub {$cli_opts{'validate-xml'} = 0}, - 'stripcomments|strip-comments' => sub - {!$cli_opts{'stripcomments'} and $cli_opts{'stripcomments'} = 1}, - 'stripcommentslax|stripcomments-lax|strip-comments-lax' => + 'stripcommentsstrict|stripcomments-strict|strip-comments-strict' => + sub {$cli_opts{'stripcomments'} = 1}, + 'stripcomments|stripcommentslax|stripcomments-lax|strip-comments|strip-comments-lax' => sub {$cli_opts{'stripcomments'} = 2}, 'stripcommentslaxonly|stripcomments-laxonly|stripcomments-lax-only|strip-comments-lax-only' => sub {$cli_opts{'stripcomments'} = 3}, - 'no-stripcomments|no-strip-comments' => sub {$cli_opts{'stripcomments'} = 0}, + 'nostripcomments|no-stripcomments|no-strip-comments' => sub {$cli_opts{'stripcomments'} = 0}, 'keepabs|keep-abs|k' => \$cli_opts{'keepabs'}, 'absroot|a=s' => \$cli_opts{'absroot'}, 'base|b=s' => \$cli_opts{'base'}, @@ -995,16 +995,20 @@ sub ProcessRaw { # which gets turned into "

" which then # no longer validates). # stripcomments => any-false-value (no action), any-true-value (strip). -# => 1 (strip), 2 (strip-lax), 3 (strip-lax-only) -# a non-integer true value will be forced to 1. -# an integer value < 0 will be forced to 1. -# an integer value > 3 will be forced to 3. +# => 1 (strip-strict), 2 (strip-lax), 3 (strip-lax-only) +# a non-numeric true value will be forced to 2. +# a numeric value < 0 will be forced to 2. +# a numeric value > 0 and < 1 will be forced to 2. +# a numeric value > 3 will be forced to 3. +# a non-integer value will forced to an integral value. # 1, 2, and 3 correspond to the command line options -# --strip-comments, --strip-comments-lax and +# --strip-comments-strict, --strip-comments-lax and # --strip-comments-lax-only respectively. # since the strip comments mechanism is a function of the # sanitizer, if stripcomments is set to any-true-value then # tag attributes will also always be sanitized. +# if stripcomments is not set or is set to the empty string, +# then it will be set to 3 if sanitize is true and 0 otherwise. # effective for both ProcessRaw and Markdown. # empty_element_suffix => " />" or ">" # will be forced to " />" if not valid or defined. @@ -1172,13 +1176,15 @@ sub _SanitizeOpts { $o->{keep_named_character_entities} = 0 unless defined($o->{keep_named_character_entities}) && $o->{keep_named_character_entities} eq "1"; $o->{xmlcheck} = looks_like_number($o->{xmlcheck}) && $o->{xmlcheck} == 0 ? 0 : 2; - !looks_like_number($o->{stripcomments}) and $o->{stripcomments} = $o->{stripcomments} ? 1 : 0; - $o->{stripcomments} && $o->{stripcomments} < 2 and $o->{stripcomments} = 1; - $o->{stripcomments} = int($o->{stripcomments}); - $o->{stripcomments} > 3 and $o->{stripcomments} = 3; - $o->{sanitize} = 1 if $o->{stripcomments} && !$o->{sanitize}; $o->{sanitize} = 1 if $o->{xmlcheck} && !$o->{sanitize}; $o->{sanitize} = 1 if ref($o->{urlfunc}) eq 'CODE' && !$o->{sanitize}; + !looks_like_number($o->{stripcomments}) and + $o->{stripcomments} = $o->{stripcomments} ? 2 : + ($o->{sanitize} && (!defined($o->{stripcomments}) || $o->{stripcomments} eq "") ? 3 : 0); + $o->{stripcomments} && $o->{stripcomments} < 1 and $o->{stripcomments} = 2; + $o->{stripcomments} = int($o->{stripcomments}); + $o->{stripcomments} > 3 and $o->{stripcomments} = 3; + $o->{stripcomments} && !$o->{sanitize} and $o->{sanitize} = 1; # this is gross, but having the globals avoids unnecessary slowdown if ($o->{sanitize} && $o->{xmlcheck}) { @@ -4503,10 +4509,11 @@ B [B<--help>] [B<--html4tags>] [B<--htmlroot>=I] --validate-xml check if output is valid XML --validate-xml-internal fast basic check if output is valid XML --no-validate-xml do not check output for valid XML - --strip-comments remove XML comments from output + --strip-comments remove XML-like comments from output --strip-comments-lax remove XML-like comments from output + --strip-comments-strict remove only strictly valid XML comments --strip-comments-lax-only remove only invalid XML-like comments - --no-strip-comments do not remove XML comments (default) + --no-strip-comments do not remove any XML/XML-like comments --tabwidth=num expand tabs to num instead of 8 --auto-number automatically number h1-h6 headers -k | --keep-abs keep abspath URLs despite -r/-i @@ -4715,15 +4722,31 @@ B<--no-sanitize> is used in which case B<--no-validate-xml> is the default option. -=item B<--strip-comments> +=item B<--strip-comments>/B<--strip-comments-lax> + +(N.B. B<--strip-comments> is just a short form of B<--strip-comments-lax>) + +Strip XML and XML-like comments from the output. Any XML or XML-like +comments encountered will be omitted from the output if either of these +options is given. -Strip XML comments from the output. Any XML comments encountered will -be omitted from the output if this option is given. +Unlike the B<--strip-comments-strict> option, these options I +strip any XML-like comments that contain internal double hyphen +(i.e. C<-->) sequences. This option requires the B<--sanitize> option to be used (which is the default). -However, note that the XML standard section 2.5 specifically prohibits +If either of these options is given, it will supersede any previous +B<--strip-comments-strict>, B<--strip-comments-lax-only> or +B<--no-strip-comments> options. + + +=item B<--strip-comments-strict> + +Strip only strictly XML standard compliant comments from the output. + +Note that the XML standard section 2.5 specifically prohibits a C<--> sequence within an XML comment (i.e. C<--> cannot occur after the comment start tag C<< >>). @@ -4737,56 +4760,56 @@ option), any invalid tags have their leading C<< < >> escaped (to C<< &#lt; >>) thus making them ordinary text and this I invalid XML comments. -What this means is that the B<--strip-comments> option I remove -invalid XML comments (such as S >>>)! +What this means is that the B<--strip-comments-strict> option I +remove invalid XML comments (such as S >>>)! But see the B<--strip-comments-lax> option for a solution. - -=item B<--strip-comments-lax> - -Strip XML-like comments from the output. Any XML-like comments encountered -will be omitted from the output if this option is given. Supersedes the -B<--strip-comments> option if both are given. - -While the syntax of XML comments cannot be relaxed (that would require -altering the XML standard), if they are being stripped out anyway, then the -standard isn't quite so relevant since they will not be present in the output. - -The B<--strip-comments-lax> option acts just like the B<--strip-comments> -option EXCEPT that the content between the starting comment tag S>> -and then ending comment tag S >>> is I restricted since it will be -stripped out of the final result which will therefore remain XML compliant. - -The only restriction, of course, is that the content between the XML comment -start tag and the XML comment end tag cannot contain the XML comment end tag -itself. - -With the B<--strip-comments-lax> option, strictly invalid XML comments -(such as S >>>) I be stripped as well as all -strictly valid XML comments. +If this option is given, it will supersede any previous +B<--strip-comments>, B<--strip-comments-lax>, B<--strip-comments-lax-only> +or B<--no-strip-comments> options. =item B<--strip-comments-lax-only> -This is a compromise option. It works just like B<--strip-comments-lax>, but -I on strictly invalid XML-like comments. Supersedes the -B<--strip-comments> option if both are given. +This is the default option if no other strip comments options are given +AND the B<--sanitize> option is active (the default). + +This is a compromise option. It works just like the B<--strip-comments-lax> +option, but I on strictly invalid XML-like comments. In other words, if a strictly valid XML comment is present, it will be retained in the output. If a strictly invalid XML comment is present which would have been stripped by B<--strip-comments-lax> but would have had its leading C<< < >> -escaped automatically by the default B<--no-strip-comments> mode (because it's -not a strictly valid XML comment), then it will be stripped by this mode. +escaped automatically by the B<--no-strip-comments> or B<--strip-comments-strict> +modes (because it's not a strictly valid XML comment), then it I be stripped +by this mode. This option prevents ugly invalid XML comments from slipping through into the output as escaped plain text while still passing through valid XML comments without stripping them. +If this option is given, it will supersede any previous +B<--strip-comments>, B<--strip-comments-lax>, B<--strip-comments-lax-only> +or B<--no-strip-comments> options. + =item B<--no-strip-comments> -Do not strip XML comments from the output. This is the default. +Do not strip XML or XML-like comments from the output. + +This is the default option I when no other strip comments options have +been give I the B<--no-sanitize> option is in effect (which is I the +default). + +When B<--no-strip-comments> is active, strictly invalid XML comments such +as those that contain an internal double hyphen (C<-->) sequence will end +up having their leading C<< < >> escaped automatically and end up as plain +text in the output! + +If this option is given, it will supersede any previous +B<--strip-comments>, B<--strip-comments-lax>, B<--strip-comments-lax-only> +or B<--no-strip-comments> options. =item B<--tabwidth>=I