@ -609,13 +609,13 @@ sub _main {
'validate-xml' = > sub { $ cli_opts { 'validate-xml' } = 1 } ,
'validate-xml' = > sub { $ cli_opts { 'validate-xml' } = 1 } ,
'validate-xml-internal' = > sub { $ cli_opts { 'validate-xml' } = 2 } ,
'validate-xml-internal' = > sub { $ cli_opts { 'validate-xml' } = 2 } ,
'no-validate-xml' = > sub { $ cli_opts { 'validate-xml' } = 0 } ,
'no-validate-xml' = > sub { $ cli_opts { 'validate-xml' } = 0 } ,
'stripcomments|strip-comments' = > sub
'stripcommentsstrict|stripcomments-strict |strip-comments-strict ' = >
{ ! $ cli_opts { 'stripcomments' } and $ cli_opts { 'stripcomments' } = 1 } ,
sub { $ cli_opts { 'stripcomments' } = 1 } ,
'stripcommentslax|stripcomments-lax|strip-comments-lax' = >
'stripcomments|stripcomments lax|stripcomments-lax|strip-comments |strip-comments-lax' = >
sub { $ cli_opts { 'stripcomments' } = 2 } ,
sub { $ cli_opts { 'stripcomments' } = 2 } ,
'stripcommentslaxonly|stripcomments-laxonly|stripcomments-lax-only|strip-comments-lax-only' = >
'stripcommentslaxonly|stripcomments-laxonly|stripcomments-lax-only|strip-comments-lax-only' = >
sub { $ cli_opts { 'stripcomments' } = 3 } ,
sub { $ cli_opts { 'stripcomments' } = 3 } ,
'no-stripcomments|no-strip-comments' = > sub { $ cli_opts { 'stripcomments' } = 0 } ,
'nostripcomments|no -stripcomments|no-strip-comments' = > sub { $ cli_opts { 'stripcomments' } = 0 } ,
'keepabs|keep-abs|k' = > \ $ cli_opts { 'keepabs' } ,
'keepabs|keep-abs|k' = > \ $ cli_opts { 'keepabs' } ,
'absroot|a=s' = > \ $ cli_opts { 'absroot' } ,
'absroot|a=s' = > \ $ cli_opts { 'absroot' } ,
'base|b=s' = > \ $ cli_opts { 'base' } ,
'base|b=s' = > \ $ cli_opts { 'base' } ,
@ -995,16 +995,20 @@ sub ProcessRaw {
# which gets turned into "<p></p><pre></pre></p>" which then
# which gets turned into "<p></p><pre></pre></p>" which then
# no longer validates).
# no longer validates).
# stripcomments => any-false-value (no action), any-true-value (strip).
# stripcomments => any-false-value (no action), any-true-value (strip).
# => 1 (strip), 2 (strip-lax), 3 (strip-lax-only)
# => 1 (strip-strict), 2 (strip-lax), 3 (strip-lax-only)
# a non-integer true value will be forced to 1.
# a non-numeric true value will be forced to 2.
# an integer value < 0 will be forced to 1.
# a numeric value < 0 will be forced to 2.
# an integer value > 3 will be forced to 3.
# a numeric value > 0 and < 1 will be forced to 2.
# a numeric value > 3 will be forced to 3.
# a non-integer value will forced to an integral value.
# 1, 2, and 3 correspond to the command line options
# 1, 2, and 3 correspond to the command line options
# --strip-comments, --strip-comments-lax and
# --strip-comments-strict , --strip-comments-lax and
# --strip-comments-lax-only respectively.
# --strip-comments-lax-only respectively.
# since the strip comments mechanism is a function of the
# since the strip comments mechanism is a function of the
# sanitizer, if stripcomments is set to any-true-value then
# sanitizer, if stripcomments is set to any-true-value then
# tag attributes will also always be sanitized.
# tag attributes will also always be sanitized.
# if stripcomments is not set or is set to the empty string,
# then it will be set to 3 if sanitize is true and 0 otherwise.
# effective for both ProcessRaw and Markdown.
# effective for both ProcessRaw and Markdown.
# empty_element_suffix => " />" or ">"
# empty_element_suffix => " />" or ">"
# will be forced to " />" if not valid or defined.
# will be forced to " />" if not valid or defined.
@ -1172,13 +1176,15 @@ sub _SanitizeOpts {
$ o - > { keep_named_character_entities } = 0 unless
$ o - > { keep_named_character_entities } = 0 unless
defined ( $ o - > { keep_named_character_entities } ) && $ o - > { keep_named_character_entities } eq "1" ;
defined ( $ o - > { keep_named_character_entities } ) && $ o - > { keep_named_character_entities } eq "1" ;
$ o - > { xmlcheck } = looks_like_number ( $ o - > { xmlcheck } ) && $ o - > { xmlcheck } == 0 ? 0 : 2 ;
$ o - > { xmlcheck } = looks_like_number ( $ o - > { xmlcheck } ) && $ o - > { xmlcheck } == 0 ? 0 : 2 ;
! looks_like_number ( $ o - > { stripcomments } ) and $ o - > { stripcomments } = $ o - > { stripcomments } ? 1 : 0 ;
$ o - > { stripcomments } && $ o - > { stripcomments } < 2 and $ o - > { stripcomments } = 1 ;
$ o - > { stripcomments } = int ( $ o - > { stripcomments } ) ;
$ o - > { stripcomments } > 3 and $ o - > { stripcomments } = 3 ;
$ o - > { sanitize } = 1 if $ o - > { stripcomments } && ! $ o - > { sanitize } ;
$ o - > { sanitize } = 1 if $ o - > { xmlcheck } && ! $ o - > { sanitize } ;
$ o - > { sanitize } = 1 if $ o - > { xmlcheck } && ! $ o - > { sanitize } ;
$ o - > { sanitize } = 1 if ref ( $ o - > { urlfunc } ) eq 'CODE' && ! $ o - > { sanitize } ;
$ o - > { sanitize } = 1 if ref ( $ o - > { urlfunc } ) eq 'CODE' && ! $ o - > { sanitize } ;
! looks_like_number ( $ o - > { stripcomments } ) and
$ o - > { stripcomments } = $ o - > { stripcomments } ? 2 :
( $ o - > { sanitize } && ( ! defined ( $ o - > { stripcomments } ) || $ o - > { stripcomments } eq "" ) ? 3 : 0 ) ;
$ o - > { stripcomments } && $ o - > { stripcomments } < 1 and $ o - > { stripcomments } = 2 ;
$ o - > { stripcomments } = int ( $ o - > { stripcomments } ) ;
$ o - > { stripcomments } > 3 and $ o - > { stripcomments } = 3 ;
$ o - > { stripcomments } && ! $ o - > { sanitize } and $ o - > { sanitize } = 1 ;
# this is gross, but having the globals avoids unnecessary slowdown
# this is gross, but having the globals avoids unnecessary slowdown
if ( $ o - > { sanitize } && $ o - > { xmlcheck } ) {
if ( $ o - > { sanitize } && $ o - > { xmlcheck } ) {
@ -4503,10 +4509,11 @@ B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
- - validate - xml check if output is valid XML
- - validate - xml check if output is valid XML
- - validate - xml - internal fast basic check if output is valid XML
- - validate - xml - internal fast basic check if output is valid XML
- - no - validate - xml do not check output for valid XML
- - no - validate - xml do not check output for valid XML
- - strip - comments remove XML comments from output
- - strip - comments remove XML - like comments from output
- - strip - comments - lax remove XML - like comments from output
- - strip - comments - lax remove XML - like comments from output
- - strip - comments - strict remove only strictly valid XML comments
- - strip - comments - lax - only remove only invalid XML - like comments
- - strip - comments - lax - only remove only invalid XML - like comments
- - no - strip - comments do not remove XML comments ( default )
- - no - strip - comments do not remove any XML / XML - like comments
- - tabwidth = num expand tabs to num instead of 8
- - tabwidth = num expand tabs to num instead of 8
- - auto - number automatically number h1 - h6 headers
- - auto - number automatically number h1 - h6 headers
- k | - - keep - abs keep abspath URLs despite - r / - i
- k | - - keep - abs keep abspath URLs despite - r / - i
@ -4715,15 +4722,31 @@ B<--no-sanitize> is used in which case B<--no-validate-xml> is the
default option .
default option .
= item B <--strip-comments>
= item B <--strip-comments> / B <--strip-comments-lax>
( N . B . B <--strip-comments> is just a short form of B <--strip-comments-lax> )
Strip XML and XML - like comments from the output . Any XML or XML - like
comments encountered will be omitted from the output if either of these
options is given .
Strip XML comments from the output . Any XML comments encountered will
Unlike the B <--strip-comments-strict> option , these options I <will>
be omitted from the output if this option is given .
strip any XML - like comments that contain internal double hyphen
( i . e . C <--> ) sequences .
This option requires the B <--sanitize> option to be used ( which is
This option requires the B <--sanitize> option to be used ( which is
the default ) .
the default ) .
However , note that the XML standard section 2.5 specifically prohibits
If either of these options is given , it will supersede any previous
B <--strip-comments-strict> , B <--strip-comments-lax-only> or
B <--no-strip-comments> options .
= item B <--strip-comments-strict>
Strip only strictly XML standard compliant comments from the output .
Note that the XML standard section 2.5 specifically prohibits
a C <--> sequence within an XML comment ( i . e . C <--> cannot occur after
a C <--> sequence within an XML comment ( i . e . C <--> cannot occur after
the comment start tag C << < ! - - >> unless it is immediately followed
the comment start tag C << < ! - - >> unless it is immediately followed
by C << > >> which makes it the comment end tag C << - - > >> ) .
by C << > >> which makes it the comment end tag C << - - > >> ) .
@ -4737,56 +4760,56 @@ option), any invalid tags have their leading C<< < >> escaped (to
C << & #lt; >>) thus making them ordinary text and this I<includes>
C << & #lt; >>) thus making them ordinary text and this I<includes>
invalid XML comments .
invalid XML comments .
What this means is that the B <--strip-comments> option I < will not > remove
What this means is that the B <--strip-comments-strict > option I < will not >
invalid XML comments ( such as S < C << <!-----> >> > ) !
remove invalid XML comments ( such as S < C << <!-----> >> > ) !
But see the B <--strip-comments-lax> option for a solution .
But see the B <--strip-comments-lax> option for a solution .
If this option is given , it will supersede any previous
= item B <--strip-comments-lax>
B <--strip-comments> , B <--strip-comments-lax> , B <--strip-comments-lax-only>
or B <--no-strip-comments> options .
Strip XML - like comments from the output . Any XML - like comments encountered
will be omitted from the output if this option is given . Supersedes the
B <--strip-comments> option if both are given .
While the syntax of XML comments cannot be relaxed ( that would require
altering the XML standard ) , if they are being stripped out anyway , then the
standard isn ' t quite so relevant since they will not be present in the output .
The B <--strip-comments-lax> option acts just like the B <--strip-comments>
option EXCEPT that the content between the starting comment tag S < C << < ! - - >> >
and then ending comment tag S < C << - - > >> > is I <NOT> restricted since it will be
stripped out of the final result which will therefore remain XML compliant .
The only restriction , of course , is that the content between the XML comment
start tag and the XML comment end tag cannot contain the XML comment end tag
itself .
With the B <--strip-comments-lax> option , strictly invalid XML comments
( such as S < C << < ! - - - - - - - - - - > >> > ) I <WILL> be stripped as well as all
strictly valid XML comments .
= item B <--strip-comments-lax-only>
= item B <--strip-comments-lax-only>
This is a compromise option . It works just like B <--strip-comments-lax> , but
This is the default option if no other strip comments options are given
I <ONLY> on strictly invalid XML - like comments . Supersedes the
AND the B <--sanitize> option is active ( the default ) .
B <--strip-comments> option if both are given .
This is a compromise option . It works just like the B <--strip-comments-lax>
option , but I <ONLY> on strictly invalid XML - like comments .
In other words , if a strictly valid XML comment is present , it will be retained
In other words , if a strictly valid XML comment is present , it will be retained
in the output . If a strictly invalid XML comment is present which would have
in the output . If a strictly invalid XML comment is present which would have
been stripped by B <--strip-comments-lax> but would have had its leading C << < >>
been stripped by B <--strip-comments-lax> but would have had its leading C << < >>
escaped automatically by the default B <--no-strip-comments> mode ( because it ' s
escaped automatically by the B <--no-strip-comments> or B <--strip-comments-strict>
not a strictly valid XML comment ) , then it will be stripped by this mode .
modes ( because it ' s not a strictly valid XML comment ) , then it I <will> be stripped
by this mode .
This option prevents ugly invalid XML comments from slipping through into the
This option prevents ugly invalid XML comments from slipping through into the
output as escaped plain text while still passing through valid XML comments
output as escaped plain text while still passing through valid XML comments
without stripping them .
without stripping them .
If this option is given , it will supersede any previous
B <--strip-comments> , B <--strip-comments-lax> , B <--strip-comments-lax-only>
or B <--no-strip-comments> options .
= item B <--no-strip-comments>
= item B <--no-strip-comments>
Do not strip XML comments from the output . This is the default .
Do not strip XML or XML - like comments from the output .
This is the default option I <ONLY> when no other strip comments options have
been give I <and> the B <--no-sanitize> option is in effect ( which is I <not> the
default ) .
When B <--no-strip-comments> is active , strictly invalid XML comments such
as those that contain an internal double hyphen ( C <--> ) sequence will end
up having their leading C << < >> escaped automatically and end up as plain
text in the output !
If this option is given , it will supersede any previous
B <--strip-comments> , B <--strip-comments-lax> , B <--strip-comments-lax-only>
or B <--no-strip-comments> options .
= item B <--tabwidth> = I <num>
= item B <--tabwidth> = I <num>