@ -609,13 +609,13 @@ sub _main {
'validate-xml' = > sub { $ cli_opts { 'validate-xml' } = 1 } ,
'validate-xml-internal' = > sub { $ cli_opts { 'validate-xml' } = 2 } ,
'no-validate-xml' = > sub { $ cli_opts { 'validate-xml' } = 0 } ,
'stripcomments|strip-comments' = > sub
{ ! $ cli_opts { 'stripcomments' } and $ cli_opts { 'stripcomments' } = 1 } ,
'stripcommentslax|stripcomments-lax|strip-comments-lax' = >
'stripcommentsstrict|stripcomments-strict |strip-comments-strict ' = >
sub { $ cli_opts { 'stripcomments' } = 1 } ,
'stripcomments|stripcomments lax|stripcomments-lax|strip-comments |strip-comments-lax' = >
sub { $ cli_opts { 'stripcomments' } = 2 } ,
'stripcommentslaxonly|stripcomments-laxonly|stripcomments-lax-only|strip-comments-lax-only' = >
sub { $ cli_opts { 'stripcomments' } = 3 } ,
'no-stripcomments|no-strip-comments' = > sub { $ cli_opts { 'stripcomments' } = 0 } ,
'nostripcomments|no -stripcomments|no-strip-comments' = > sub { $ cli_opts { 'stripcomments' } = 0 } ,
'keepabs|keep-abs|k' = > \ $ cli_opts { 'keepabs' } ,
'absroot|a=s' = > \ $ cli_opts { 'absroot' } ,
'base|b=s' = > \ $ cli_opts { 'base' } ,
@ -995,16 +995,20 @@ sub ProcessRaw {
# which gets turned into "<p></p><pre></pre></p>" which then
# no longer validates).
# stripcomments => any-false-value (no action), any-true-value (strip).
# => 1 (strip), 2 (strip-lax), 3 (strip-lax-only)
# a non-integer true value will be forced to 1.
# an integer value < 0 will be forced to 1.
# an integer value > 3 will be forced to 3.
# => 1 (strip-strict), 2 (strip-lax), 3 (strip-lax-only)
# a non-numeric true value will be forced to 2.
# a numeric value < 0 will be forced to 2.
# a numeric value > 0 and < 1 will be forced to 2.
# a numeric value > 3 will be forced to 3.
# a non-integer value will forced to an integral value.
# 1, 2, and 3 correspond to the command line options
# --strip-comments, --strip-comments-lax and
# --strip-comments-strict , --strip-comments-lax and
# --strip-comments-lax-only respectively.
# since the strip comments mechanism is a function of the
# sanitizer, if stripcomments is set to any-true-value then
# tag attributes will also always be sanitized.
# if stripcomments is not set or is set to the empty string,
# then it will be set to 3 if sanitize is true and 0 otherwise.
# effective for both ProcessRaw and Markdown.
# empty_element_suffix => " />" or ">"
# will be forced to " />" if not valid or defined.
@ -1172,13 +1176,15 @@ sub _SanitizeOpts {
$ o - > { keep_named_character_entities } = 0 unless
defined ( $ o - > { keep_named_character_entities } ) && $ o - > { keep_named_character_entities } eq "1" ;
$ o - > { xmlcheck } = looks_like_number ( $ o - > { xmlcheck } ) && $ o - > { xmlcheck } == 0 ? 0 : 2 ;
! looks_like_number ( $ o - > { stripcomments } ) and $ o - > { stripcomments } = $ o - > { stripcomments } ? 1 : 0 ;
$ o - > { stripcomments } && $ o - > { stripcomments } < 2 and $ o - > { stripcomments } = 1 ;
$ o - > { stripcomments } = int ( $ o - > { stripcomments } ) ;
$ o - > { stripcomments } > 3 and $ o - > { stripcomments } = 3 ;
$ o - > { sanitize } = 1 if $ o - > { stripcomments } && ! $ o - > { sanitize } ;
$ o - > { sanitize } = 1 if $ o - > { xmlcheck } && ! $ o - > { sanitize } ;
$ o - > { sanitize } = 1 if ref ( $ o - > { urlfunc } ) eq 'CODE' && ! $ o - > { sanitize } ;
! looks_like_number ( $ o - > { stripcomments } ) and
$ o - > { stripcomments } = $ o - > { stripcomments } ? 2 :
( $ o - > { sanitize } && ( ! defined ( $ o - > { stripcomments } ) || $ o - > { stripcomments } eq "" ) ? 3 : 0 ) ;
$ o - > { stripcomments } && $ o - > { stripcomments } < 1 and $ o - > { stripcomments } = 2 ;
$ o - > { stripcomments } = int ( $ o - > { stripcomments } ) ;
$ o - > { stripcomments } > 3 and $ o - > { stripcomments } = 3 ;
$ o - > { stripcomments } && ! $ o - > { sanitize } and $ o - > { sanitize } = 1 ;
# this is gross, but having the globals avoids unnecessary slowdown
if ( $ o - > { sanitize } && $ o - > { xmlcheck } ) {
@ -4503,10 +4509,11 @@ B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
- - validate - xml check if output is valid XML
- - validate - xml - internal fast basic check if output is valid XML
- - no - validate - xml do not check output for valid XML
- - strip - comments remove XML comments from output
- - strip - comments remove XML - like comments from output
- - strip - comments - lax remove XML - like comments from output
- - strip - comments - strict remove only strictly valid XML comments
- - strip - comments - lax - only remove only invalid XML - like comments
- - no - strip - comments do not remove XML comments ( default )
- - no - strip - comments do not remove any XML / XML - like comments
- - tabwidth = num expand tabs to num instead of 8
- - auto - number automatically number h1 - h6 headers
- k | - - keep - abs keep abspath URLs despite - r / - i
@ -4715,15 +4722,31 @@ B<--no-sanitize> is used in which case B<--no-validate-xml> is the
default option .
= item B <--strip-comments>
= item B <--strip-comments> / B <--strip-comments-lax>
( N . B . B <--strip-comments> is just a short form of B <--strip-comments-lax> )
Strip XML and XML - like comments from the output . Any XML or XML - like
comments encountered will be omitted from the output if either of these
options is given .
Strip XML comments from the output . Any XML comments encountered will
be omitted from the output if this option is given .
Unlike the B <--strip-comments-strict> option , these options I <will>
strip any XML - like comments that contain internal double hyphen
( i . e . C <--> ) sequences .
This option requires the B <--sanitize> option to be used ( which is
the default ) .
However , note that the XML standard section 2.5 specifically prohibits
If either of these options is given , it will supersede any previous
B <--strip-comments-strict> , B <--strip-comments-lax-only> or
B <--no-strip-comments> options .
= item B <--strip-comments-strict>
Strip only strictly XML standard compliant comments from the output .
Note that the XML standard section 2.5 specifically prohibits
a C <--> sequence within an XML comment ( i . e . C <--> cannot occur after
the comment start tag C << < ! - - >> unless it is immediately followed
by C << > >> which makes it the comment end tag C << - - > >> ) .
@ -4737,56 +4760,56 @@ option), any invalid tags have their leading C<< < >> escaped (to
C << & #lt; >>) thus making them ordinary text and this I<includes>
invalid XML comments .
What this means is that the B <--strip-comments> option I < will not > remove
invalid XML comments ( such as S < C << <!-----> >> > ) !
What this means is that the B <--strip-comments-strict > option I < will not >
remove invalid XML comments ( such as S < C << <!-----> >> > ) !
But see the B <--strip-comments-lax> option for a solution .
= item B <--strip-comments-lax>
Strip XML - like comments from the output . Any XML - like comments encountered
will be omitted from the output if this option is given . Supersedes the
B <--strip-comments> option if both are given .
While the syntax of XML comments cannot be relaxed ( that would require
altering the XML standard ) , if they are being stripped out anyway , then the
standard isn ' t quite so relevant since they will not be present in the output .
The B <--strip-comments-lax> option acts just like the B <--strip-comments>
option EXCEPT that the content between the starting comment tag S < C << < ! - - >> >
and then ending comment tag S < C << - - > >> > is I <NOT> restricted since it will be
stripped out of the final result which will therefore remain XML compliant .
The only restriction , of course , is that the content between the XML comment
start tag and the XML comment end tag cannot contain the XML comment end tag
itself .
With the B <--strip-comments-lax> option , strictly invalid XML comments
( such as S < C << < ! - - - - - - - - - - > >> > ) I <WILL> be stripped as well as all
strictly valid XML comments .
If this option is given , it will supersede any previous
B <--strip-comments> , B <--strip-comments-lax> , B <--strip-comments-lax-only>
or B <--no-strip-comments> options .
= item B <--strip-comments-lax-only>
This is a compromise option . It works just like B <--strip-comments-lax> , but
I <ONLY> on strictly invalid XML - like comments . Supersedes the
B <--strip-comments> option if both are given .
This is the default option if no other strip comments options are given
AND the B <--sanitize> option is active ( the default ) .
This is a compromise option . It works just like the B <--strip-comments-lax>
option , but I <ONLY> on strictly invalid XML - like comments .
In other words , if a strictly valid XML comment is present , it will be retained
in the output . If a strictly invalid XML comment is present which would have
been stripped by B <--strip-comments-lax> but would have had its leading C << < >>
escaped automatically by the default B <--no-strip-comments> mode ( because it ' s
not a strictly valid XML comment ) , then it will be stripped by this mode .
escaped automatically by the B <--no-strip-comments> or B <--strip-comments-strict>
modes ( because it ' s not a strictly valid XML comment ) , then it I <will> be stripped
by this mode .
This option prevents ugly invalid XML comments from slipping through into the
output as escaped plain text while still passing through valid XML comments
without stripping them .
If this option is given , it will supersede any previous
B <--strip-comments> , B <--strip-comments-lax> , B <--strip-comments-lax-only>
or B <--no-strip-comments> options .
= item B <--no-strip-comments>
Do not strip XML comments from the output . This is the default .
Do not strip XML or XML - like comments from the output .
This is the default option I <ONLY> when no other strip comments options have
been give I <and> the B <--no-sanitize> option is in effect ( which is I <not> the
default ) .
When B <--no-strip-comments> is active , strictly invalid XML comments such
as those that contain an internal double hyphen ( C <--> ) sequence will end
up having their leading C << < >> escaped automatically and end up as plain
text in the output !
If this option is given , it will supersede any previous
B <--strip-comments> , B <--strip-comments-lax> , B <--strip-comments-lax-only>
or B <--no-strip-comments> options .
= item B <--tabwidth> = I <num>