@ -279,6 +279,7 @@ sub _main {
#### Check for command-line switches: #################
#### Check for command-line switches: #################
my % options = ( ) ;
my % options = ( ) ;
my % cli_opts ;
my % cli_opts ;
my $ raw = 0 ;
use Getopt::Long ;
use Getopt::Long ;
Getopt::Long:: Configure ( qw( bundling require_order pass_through ) ) ;
Getopt::Long:: Configure ( qw( bundling require_order pass_through ) ) ;
GetOptions ( \ % cli_opts ,
GetOptions ( \ % cli_opts ,
@ -295,6 +296,7 @@ sub _main {
'imageroot|i=s' ,
'imageroot|i=s' ,
'wiki|w:s' ,
'wiki|w:s' ,
'tabwidth|tab-width=s' ,
'tabwidth|tab-width=s' ,
'raw' ,
'stylesheet|style-sheet' ,
'stylesheet|style-sheet' ,
'no-stylesheet|no-style-sheet' ,
'no-stylesheet|no-style-sheet' ,
'stub' ,
'stub' ,
@ -367,6 +369,9 @@ sub _main {
}
}
$ options { wikiopt } = { map ( { $ _ = > 1 } split ( // , lc ( $ wopt ) ) ) } ;
$ options { wikiopt } = { map ( { $ _ = > 1 } split ( // , lc ( $ wopt ) ) ) } ;
}
}
if ( $ cli_opts { 'raw' } ) {
$ raw = 1 ;
}
if ( $ cli_opts { 'stylesheet' } ) { # Display the style sheet
if ( $ cli_opts { 'stylesheet' } ) { # Display the style sheet
$ options { show_styles } = 1 ;
$ options { show_styles } = 1 ;
}
}
@ -428,21 +433,21 @@ HTML4
}
}
defined ( $ contents ) or fauxdie "could not read \"$_\": $!\n" ;
defined ( $ contents ) or fauxdie "could not read \"$_\": $!\n" ;
$ _ eq "-" or close ( $ fh ) ;
$ _ eq "-" or close ( $ fh ) ;
$ oneresult = Markdown ( $ contents , \ % options ) ;
$ oneresult = $ raw ? ProcessRaw ( $ contents , \ % options ) : Markdown ( $ contents , \ % options ) ;
$ oneresult =~ s/\s+$//os ;
$ oneresult =~ s/\s+$//os ;
if ( $ oneresult ne "" ) {
if ( $ oneresult ne "" ) {
if ( ! $ didhdr ) {
if ( ! $ didhdr && ! $ raw ) {
$ hdr = & $ hdrf ( ) ;
$ hdr = & $ hdrf ( ) ;
$ didhdr = 1 ;
$ didhdr = 1 ;
}
}
$ result . = $ oneresult . "\n" ;
$ result . = $ oneresult . "\n" ;
}
}
}
}
$ hdr = & $ hdrf ( ) unless $ didhdr ;
$ hdr = & $ hdrf ( ) unless $ didhdr || $ raw ;
$ ftr = "</div>\n</body>\n</html>\n" if $ stub ;
$ ftr = "</div>\n</body>\n</html>\n" if $ stub && ! $ raw ;
if ( $ options { xmlcheck } ) {
if ( $ options { xmlcheck } ) {
my ( $ good , $ errs ) ;
my ( $ good , $ errs ) ;
if ( $ stub ) {
if ( $ stub && ! $ raw ) {
( $ good , $ errs ) = _xmlcheck ( $ hdr . $ result . $ ftr ) ;
( $ good , $ errs ) = _xmlcheck ( $ hdr . $ result . $ ftr ) ;
} else {
} else {
( $ good , $ errs ) = _xmlcheck ( "<div>" . $ result . "</div>" ) ;
( $ good , $ errs ) = _xmlcheck ( "<div>" . $ result . "</div>" ) ;
@ -475,6 +480,50 @@ sub _trimerr {
}
}
sub _PrepareInput {
my $ input = shift ;
defined $ input or $ input = "" ;
{
use bytes ;
$ input =~ s/[\x00-\x08\x0B\x0E-\x1F\x7F]+//gso ;
}
my $ output ;
if ( Encode:: is_utf8 ( $ input ) || utf8:: decode ( $ input ) ) {
$ output = $ input ;
} else {
$ output = $ encoder - > decode ( $ input , Encode:: FB_DEFAULT ) ;
}
# Standardize line endings:
$ output =~ s{\r\n} {\n}g ; # DOS to Unix
$ output =~ s{\r} {\n}g ; # Mac to Unix
return $ output ;
}
sub ProcessRaw {
my $ text = _PrepareInput ( shift ) ;
% opt = (
empty_element_suffix = > $ g_empty_element_suffix ,
) ;
my % args = ( ) ;
if ( ref ( $ _ [ 0 ] ) eq "HASH" ) {
% args = % { $ _ [ 0 ] } ;
} else {
% args = @ _ ;
}
while ( my ( $ k , $ v ) = each % args ) {
$ opt { $ k } = $ v ;
}
# Sanitize all '<'...'>' tags if requested
$ text = _SanitizeTags ( $ text ) if $ opt { sanitize } ;
utf8:: encode ( $ text ) ;
return $ text ;
}
sub Markdown {
sub Markdown {
#
#
# Primary function. The order in which other subs are called here is
# Primary function. The order in which other subs are called here is
@ -482,20 +531,7 @@ sub Markdown {
# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
# and <img> tags get encoded.
# and <img> tags get encoded.
#
#
my $ _text = shift ;
my $ text = _PrepareInput ( shift ) ;
defined $ _text or $ _text = '' ;
{
use bytes ;
$ _text =~ s/[\x00-\x08\x0B\x0E-\x1F\x7F]+//gso ;
}
my $ text ;
if ( Encode:: is_utf8 ( $ _text ) || utf8:: decode ( $ _text ) ) {
$ text = $ _text ;
} else {
$ text = $ encoder - > decode ( $ _text , Encode:: FB_DEFAULT ) ;
}
$ _text = undef ;
# Any remaining arguments after the first are options; either a single
# Any remaining arguments after the first are options; either a single
# hashref or a list of name, value paurs.
# hashref or a list of name, value paurs.
@ -531,10 +567,6 @@ sub Markdown {
% g_code_blocks = ( ) ;
% g_code_blocks = ( ) ;
$ g_list_level = 0 ;
$ g_list_level = 0 ;
# Standardize line endings:
$ text =~ s{\r\n} {\n}g ; # DOS to Unix
$ text =~ s{\r} {\n}g ; # Mac to Unix
# Make sure $text ends with a couple of newlines:
# Make sure $text ends with a couple of newlines:
$ text . = "\n\n" ;
$ text . = "\n\n" ;
@ -2897,6 +2929,7 @@ B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
- V | - - version show version , authors , license
- V | - - version show version , authors , license
and copyright
and copyright
- s | - - shortversion show just the version number
- s | - - shortversion show just the version number
- - raw input contains only raw html
- - stylesheet output the fancy style sheet
- - stylesheet output the fancy style sheet
- - no - stylesheet do not output fancy style sheet
- - no - stylesheet do not output fancy style sheet
- - stub wrap output in stub document
- - stub wrap output in stub document
@ -3127,6 +3160,24 @@ Display Markdown's version number and copyright information.
Display the short - form version number .
Display the short - form version number .
= item B <--raw>
Input contains only raw HTML / XHTML . All options other than
B <--html4tags> , B <--deprecated> , B <--sanitize> ( on by default ) and
B <--validate-xml> ( and their B <--no-...> variants ) are ignored .
With this option , arbitrary HTML / XHTML input can be passed through
the sanitizer and / or validator . If sanitation is requested ( the
default ) , input must only contain the contents of the "<body>"
section ( i . e . no "<head>" or "<html>" ) . Output I <will> be converted
to UTF - 8 regardless of the input encoding . All line endings will
be normalized to C <\n> and input encodings other than UTF - 8 or
ISO - 8859 - 1 or US - ASCII will end up mangled .
Remember that any B <--stub> and / or B <--stylesheet> options are
I < completely ignored > when B <--raw> is given .
= item B <--stylesheet>
= item B <--stylesheet>
Include the fancy style sheet at the beginning of the output ( or in the
Include the fancy style sheet at the beginning of the output ( or in the