@ -303,6 +303,7 @@ sub _main {
'validate-xml' ,
'validate-xml' ,
'validate-xml-internal' ,
'validate-xml-internal' ,
'no-validate-xml' ,
'no-validate-xml' ,
'base|b=s' ,
'htmlroot|r=s' ,
'htmlroot|r=s' ,
'imageroot|i=s' ,
'imageroot|i=s' ,
'wiki|w:s' ,
'wiki|w:s' ,
@ -374,6 +375,10 @@ sub _main {
die "invalid tab width (must be >= 2 and <= 32)\n" unless $ tw >= 2 && $ tw <= 32 ;
die "invalid tab width (must be >= 2 and <= 32)\n" unless $ tw >= 2 && $ tw <= 32 ;
$ options { tab_width } = int ( 0 + $ tw ) ;
$ options { tab_width } = int ( 0 + $ tw ) ;
}
}
$ options { base_prefix } = "" ; # no base prefix by default
if ( $ cli_opts { 'base' } ) { # Use base prefix for fragment URLs
$ options { base_prefix } = $ cli_opts { 'base' } ;
}
if ( $ cli_opts { 'htmlroot' } ) { # Use URL prefix
if ( $ cli_opts { 'htmlroot' } ) { # Use URL prefix
$ options { url_prefix } = $ cli_opts { 'htmlroot' } ;
$ options { url_prefix } = $ cli_opts { 'htmlroot' } ;
}
}
@ -936,7 +941,7 @@ sub _ProcessWikiLink {
if ( defined ( $ link_loc ) &&
if ( defined ( $ link_loc ) &&
( $ link_loc =~ m {^#\S*$} || $ link_loc =~ m {^(?:http|ftp)s?://\S+$}i ) ) {
( $ link_loc =~ m {^#\S*$} || $ link_loc =~ m {^(?:http|ftp)s?://\S+$}i ) ) {
# Return the new link
# Return the new link
return _MakeATag ( $ link_loc , $ link_text ) ;
return _MakeATag ( _FindFragmentMatch ( $ link_loc ) , $ link_text ) ;
}
}
if ( ! defined ( $ link_loc ) &&
if ( ! defined ( $ link_loc ) &&
( $ link_loc = _strip ( $ link_text ) ) =~ m {^(?:http|ftp)s?://\S+$}i ) {
( $ link_loc = _strip ( $ link_text ) ) =~ m {^(?:http|ftp)s?://\S+$}i ) {
@ -1007,21 +1012,22 @@ sub _wxform {
# On input NONE of $url, $text or $title should be xmlencoded
# On input NONE of $url, $text or $title should be xmlencoded
# but $url should already be url-encoded if needed, but NOT g_escape_table'd
# but $url should already be url-encoded if needed, but NOT g_escape_table'd
sub _MakeATag {
sub _MakeATag {
my ( $ url , $ text , $ title ) = @ _ ;
my ( $ url , $ text , $ title ) = @ _ ;
defined ( $ url ) or $ url = "" ;
defined ( $ url ) or $ url = "" ;
defined ( $ text ) or $ text = "" ;
defined ( $ text ) or $ text = "" ;
defined ( $ title ) or $ title = "" ;
defined ( $ title ) or $ title = "" ;
my $ result = $ g_escape_table { '<' } . "a href=\"" . _EncodeAttText ( $ url ) . "\"" ;
$ url =~ m "^#" and $ url = $ opt { base_prefix } . $ url ;
$ title = _strip ( $ title ) ;
my $ result = $ g_escape_table { '<' } . "a href=\"" . _EncodeAttText ( $ url ) . "\"" ;
$ text =~ s{<(/?a)} {<$1}sogi ;
$ title = _strip ( $ title ) ;
$ text = _DoItalicsAndBoldAndStrike ( $ text ) ;
$ text =~ s{<(/?a)} {<$1}sogi ;
# We've got to encode any of these remaining to avoid
$ text = _DoItalicsAndBoldAndStrike ( $ text ) ;
# conflicting with other italics, bold and strike through.
# We've got to encode any of these remaining to avoid
$ text =~ s!([*_~])!$g_escape_table{$1}!g ;
# conflicting with other italics, bold and strike through.
$ result . = " title=\"" . _EncodeAttText ( $ title ) . "\"" if $ title ne "" ;
$ text =~ s!([*_~])!$g_escape_table{$1}!g ;
return $ result . $ g_escape_table { '>' } .
$ result . = " title=\"" . _EncodeAttText ( $ title ) . "\"" if $ title ne "" ;
$ text . $ g_escape_table { '<' } . "/a" . $ g_escape_table { '>' } ;
return $ result . $ g_escape_table { '>' } .
$ text . $ g_escape_table { '<' } . "/a" . $ g_escape_table { '>' } ;
}
}
@ -1111,37 +1117,13 @@ sub _DoAnchors {
my $ link_text = $ 2 ;
my $ link_text = $ 2 ;
my ( $ url , $ title ) = _SplitUrlTitlePart ( $ 3 ) ;
my ( $ url , $ title ) = _SplitUrlTitlePart ( $ 3 ) ;
if ( defined ( $ url ) && $ url =~ /^#\S/ ) {
# try very hard to find a match
my $ idbase = _strip ( lc ( substr ( $ url , 1 ) ) ) ;
my $ idbase0 = $ idbase ;
my $ id = _MakeAnchorId ( $ idbase ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
} else {
$ idbase =~ s/-/_/gs ;
$ id = _MakeAnchorId ( $ idbase ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
} else {
$ id = _MakeAnchorId ( $ idbase0 , 1 ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
} else {
$ id = _MakeAnchorId ( $ idbase , 1 ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
}
}
}
}
}
if ( defined ( $ url ) ) {
if ( defined ( $ url ) ) {
$ link_text = '[' . $ link_text . ']' if $ link_text =~ /^\d{1,3}$/ ;
$ url = _FindFragmentMatch ( $ url ) ;
_MakeATag ( _PrefixURL ( $ url ) , $ link_text , $ title ) ;
$ link_text = '[' . $ link_text . ']' if $ link_text =~ /^\d{1,3}$/ ;
_MakeATag ( _PrefixURL ( $ url ) , $ link_text , $ title ) ;
} else {
} else {
# The href/title part didn't match the pattern
# The href/title part didn't match the pattern
$ whole_match ;
$ whole_match ;
}
}
} xsge ;
} xsge ;
@ -1177,36 +1159,67 @@ sub _DoAnchors {
sub _PeelWrapped {
sub _PeelWrapped {
defined ( $ _ [ 0 ] ) or return undef ;
defined ( $ _ [ 0 ] ) or return undef ;
if ( substr ( $ _ [ 0 ] , 0 , 1 ) eq "(" ) {
if ( substr ( $ _ [ 0 ] , 0 , 1 ) eq "(" ) {
return substr ( $ _ [ 0 ] , 1 , length ( $ _ [ 0 ] ) - ( substr ( $ _ [ 0 ] , - 1 , 1 ) eq ")" ? 2 : 1 ) ) ;
return substr ( $ _ [ 0 ] , 1 , length ( $ _ [ 0 ] ) - ( substr ( $ _ [ 0 ] , - 1 , 1 ) eq ")" ? 2 : 1 ) ) ;
}
}
return $ _ [ 0 ] ;
return $ _ [ 0 ] ;
}
}
sub _SplitUrlTitlePart {
sub _SplitUrlTitlePart {
return ( "" , undef ) if $ _ [ 0 ] =~ m {^\s*$} ; # explicitly allowed
return ( "" , undef ) if $ _ [ 0 ] =~ m {^\s*$} ; # explicitly allowed
my $ u = $ _ [ 0 ] ;
my $ u = $ _ [ 0 ] ;
$ u =~ s/^\s*(['\042])/# $1/ ;
$ u =~ s/^\s*(['\042])/# $1/ ;
if ( $ u =~ m {
if ( $ u =~ m {
^ # match beginning
^ # match beginning
\ s * ?
\ s * ?
<?([^\s'\042]\S*?)> ? # URL = $1
<?([^\s'\042]\S*?)> ? # URL = $1
( ? : # optional grouping
( ? : # optional grouping
\ s + # must be distinct from URL
\ s + # must be distinct from URL
( [ ' \ 042 ] ? ) # quote char = $2
( [ ' \ 042 ] ? ) # quote char = $2
( . * ? ) # Title = $3
( . * ? ) # Title = $3
\ 2 ? # matching quote
\ 2 ? # matching quote
) ? # title is optional
) ? # title is optional
\ s *
\ s *
\ z # match end
\ z # match end
} osx ) {
} osx ) {
return ( undef , undef ) if $ _ [ 1 ] && ( $ 1 eq "" || $ 1 eq "#" ) ;
return ( undef , undef ) if $ _ [ 1 ] && ( $ 1 eq "" || $ 1 eq "#" ) ;
return ( _PeelWrapped ( $ 1 ) , $ 2 ? $ 3 : _PeelWrapped ( $ 3 ) ) ;
return ( _PeelWrapped ( $ 1 ) , $ 2 ? $ 3 : _PeelWrapped ( $ 3 ) ) ;
} else {
return ( undef , undef ) ;
}
}
sub _FindFragmentMatch {
my $ url = shift ;
if ( defined ( $ url ) && $ url =~ /^#\S/ ) {
# try very hard to find a match
my $ idbase = _strip ( lc ( substr ( $ url , 1 ) ) ) ;
my $ idbase0 = $ idbase ;
my $ id = _MakeAnchorId ( $ idbase ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
} else {
} else {
return ( undef , undef ) ;
$ idbase =~ s/-/_/gs ;
$ id = _MakeAnchorId ( $ idbase ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
} else {
$ id = _MakeAnchorId ( $ idbase0 , 1 ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
} else {
$ id = _MakeAnchorId ( $ idbase , 1 ) ;
if ( defined ( $ g_anchors_id { $ id } ) ) {
$ url = $ g_anchors_id { $ id } ;
}
}
}
}
}
}
return $ url ;
}
}
@ -2345,7 +2358,7 @@ sub _DoTag {
if ( ( $ tag =~ m {^<($g_possible_tag_name)(?:[\s>]|/>$)} || $ tag =~ m {^</($g_possible_tag_name)\s*>} ) &&
if ( ( $ tag =~ m {^<($g_possible_tag_name)(?:[\s>]|/>$)} || $ tag =~ m {^</($g_possible_tag_name)\s*>} ) &&
$ ok_tag_name { lc ( $ 1 ) } ) {
$ ok_tag_name { lc ( $ 1 ) } ) {
return _ProcessURLTag ( "href" , $ tag ) if $ tag =~ /^<a\s/i ;
return _ProcessURLTag ( "href" , $ tag , 1 ) if $ tag =~ /^<a\s/i ;
return _ProcessURLTag ( "src" , $ tag ) if $ tag =~ /^<img\s/i ;
return _ProcessURLTag ( "src" , $ tag ) if $ tag =~ /^<img\s/i ;
return $ tag ;
return $ tag ;
}
}
@ -2635,23 +2648,31 @@ sub _SanitizeAtt {
sub _ProcessURLTag {
sub _ProcessURLTag {
my $ att = shift ;
my ( $ att , $ tag , $ dofrag ) = @ _ ;
my $ tag = shift ;
$ att = lc ( $ att ) . "=" ;
$ att = lc ( $ att ) . "=" ;
if ( $ tag =~ /^(<[^\s>]+\s+)/g ) {
if ( $ tag =~ /^(<[^\s>]+\s+)/g ) {
my $ out = $ 1 ;
my $ out = $ 1 ;
while ( $ tag =~ /\G([^\s\042\047<\/>=]+=)([\042\047])((?:(?!\2)(?!<).)*)(\2\s*)/gcs ) {
while ( $ tag =~ /\G([^\s\042\047<\/>=]+=)([\042\047])((?:(?!\2)(?!<).)*)(\2\s*)/gcs ) {
my ( $ p , $ q , $ v , $ s ) = ( $ 1 , $ 2 , $ 3 , $ 4 ) ;
my ( $ p , $ q , $ v , $ s ) = ( $ 1 , $ 2 , $ 3 , $ 4 ) ;
if ( lc ( $ p ) eq $ att && $ v ne "" ) {
if ( lc ( $ p ) eq $ att && $ v ne "" ) {
$ v = _EncodeAttText ( _PrefixURL ( $ v ) ) ;
if ( $ dofrag && $ v =~ m "^#" ) {
$ v = _FindFragmentMatch ( $ v ) ;
my $ bp ;
if ( ( $ bp = $ opt { base_prefix } ) ne "" ) {
$ v = "\2\3" . $ bp . $ v ;
}
}
$ out . = $ p . $ q . $ v . $ s ;
} else {
$ v = _PrefixURL ( $ v ) ;
}
$ v = _EncodeAttText ( $ v ) ;
}
}
$ out . = substr ( $ tag , pos ( $ tag ) ) ;
$ out . = $ p . $ q . $ v . $ s ;
substr ( $ out , 0 , 1 ) = $ g_escape_table { '<' } ;
}
substr ( $ out , - 1 , 1 ) = $ g_escape_table { '>' } ;
$ out . = substr ( $ tag , pos ( $ tag ) ) ;
return $ out ;
substr ( $ out , 0 , 1 ) = $ g_escape_table { '<' } ;
substr ( $ out , - 1 , 1 ) = $ g_escape_table { '>' } ;
return $ out ;
}
}
return $ tag ;
return $ tag ;
@ -3134,10 +3155,9 @@ B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
- - validate - xml - internal fast basic check if output is valid XML
- - validate - xml - internal fast basic check if output is valid XML
- - no - validate - xml do not check output for valid XML
- - no - validate - xml do not check output for valid XML
- - tabwidth = num expand tabs to num instead of 8
- - tabwidth = num expand tabs to num instead of 8
- r prefix | - - htmlroot = prefix append relative non - img URLs
- b prefix | - - base = prefix prepend prefix to fragment - only URLs
to prefix
- r prefix | - - htmlroot = prefix append relative non - img URLs to prefix
- i prefix | - - imageroot = prefix append relative img URLs to
- i prefix | - - imageroot = prefix append relative img URLs to prefix
prefix
- w [ wikipat ] | - - wiki [ = wikipat ] activate wiki links using wikipat
- w [ wikipat ] | - - wiki [ = wikipat ] activate wiki links using wikipat
- V | - - version show version , authors , license
- V | - - version show version , authors , license
and copyright
and copyright
@ -3231,7 +3251,7 @@ Do not sanitize tag attributes. This option does not allow any tags that
would not be allowed without this option , but it does completely suppress
would not be allowed without this option , but it does completely suppress
the attribute sanitation process . If this option is specified , no
the attribute sanitation process . If this option is specified , no
attributes will be removed from any tag ( although C <img> and C <a> tags will
attributes will be removed from any tag ( although C <img> and C <a> tags will
still be affected by B <--imageroot> and / or B <--htmlroot > options ) .
still be affected by B <--imageroot> , B <--htmlroot> and / or B <--base > options ) .
Use of this option is I < NOT RECOMMENDED > .
Use of this option is I < NOT RECOMMENDED > .
@ -3328,6 +3348,15 @@ stop positions no matter what value is used for this option.
The value must be S < 2 <= I <num> <= 32 > .
The value must be S < 2 <= I <num> <= 32 > .
= item B <-b> I <prefix> , B <--base> = I <prefix>
Any fragment - only URLs have I <prefix> prepended . The default is to prepend
nothing and leave them as bare fragment URLs . Use of this option may be
necessary when embedding the output of Markdown . pl into a document that makes
use of the C << <base> >> tag in order for intra - document fragment URL links to
work properly in such a document .
= item B <-r> I <prefix> , B <--htmlroot> = I <prefix>
= item B <-r> I <prefix> , B <--htmlroot> = I <prefix>
Any non - absolute URLs have I <prefix> prepended .
Any non - absolute URLs have I <prefix> prepended .