Browse Source

Markdown.pl: introduce --raw mode

With `--raw`, no actual Markdown processing takes place, but the
input will still be sanitized (by default) and may optionally also
have --html4tags or --validate-xml used on it too.

The output's line endings will be normalized and the encoding
converted to UTF-8.

Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
master
Kyle J. McKay 5 years ago
parent
commit
cfa6b427dc
  1. 97
      Markdown.pl

97
Markdown.pl

@ -279,6 +279,7 @@ sub _main {
#### Check for command-line switches: ################# #### Check for command-line switches: #################
my %options = (); my %options = ();
my %cli_opts; my %cli_opts;
my $raw = 0;
use Getopt::Long; use Getopt::Long;
Getopt::Long::Configure(qw(bundling require_order pass_through)); Getopt::Long::Configure(qw(bundling require_order pass_through));
GetOptions(\%cli_opts, GetOptions(\%cli_opts,
@ -295,6 +296,7 @@ sub _main {
'imageroot|i=s', 'imageroot|i=s',
'wiki|w:s', 'wiki|w:s',
'tabwidth|tab-width=s', 'tabwidth|tab-width=s',
'raw',
'stylesheet|style-sheet', 'stylesheet|style-sheet',
'no-stylesheet|no-style-sheet', 'no-stylesheet|no-style-sheet',
'stub', 'stub',
@ -367,6 +369,9 @@ sub _main {
} }
$options{wikiopt} = { map({$_ => 1} split(//,lc($wopt))) }; $options{wikiopt} = { map({$_ => 1} split(//,lc($wopt))) };
} }
if ($cli_opts{'raw'}) {
$raw = 1;
}
if ($cli_opts{'stylesheet'}) { # Display the style sheet if ($cli_opts{'stylesheet'}) { # Display the style sheet
$options{show_styles} = 1; $options{show_styles} = 1;
} }
@ -428,21 +433,21 @@ HTML4
} }
defined($contents) or fauxdie "could not read \"$_\": $!\n"; defined($contents) or fauxdie "could not read \"$_\": $!\n";
$_ eq "-" or close($fh); $_ eq "-" or close($fh);
$oneresult = Markdown($contents, \%options); $oneresult = $raw ? ProcessRaw($contents, \%options) : Markdown($contents, \%options);
$oneresult =~ s/\s+$//os; $oneresult =~ s/\s+$//os;
if ($oneresult ne "") { if ($oneresult ne "") {
if (!$didhdr) { if (!$didhdr && !$raw) {
$hdr = &$hdrf(); $hdr = &$hdrf();
$didhdr = 1; $didhdr = 1;
} }
$result .= $oneresult . "\n"; $result .= $oneresult . "\n";
} }
} }
$hdr = &$hdrf() unless $didhdr; $hdr = &$hdrf() unless $didhdr || $raw;
$ftr = "</div>\n</body>\n</html>\n" if $stub; $ftr = "</div>\n</body>\n</html>\n" if $stub && !$raw;
if ($options{xmlcheck}) { if ($options{xmlcheck}) {
my ($good, $errs); my ($good, $errs);
if ($stub) { if ($stub && !$raw) {
($good, $errs) = _xmlcheck($hdr.$result.$ftr); ($good, $errs) = _xmlcheck($hdr.$result.$ftr);
} else { } else {
($good, $errs) = _xmlcheck("<div>".$result."</div>"); ($good, $errs) = _xmlcheck("<div>".$result."</div>");
@ -475,6 +480,50 @@ sub _trimerr {
} }
sub _PrepareInput {
my $input = shift;
defined $input or $input = "";
{
use bytes;
$input =~ s/[\x00-\x08\x0B\x0E-\x1F\x7F]+//gso;
}
my $output;
if (Encode::is_utf8($input) || utf8::decode($input)) {
$output = $input;
} else {
$output = $encoder->decode($input, Encode::FB_DEFAULT);
}
# Standardize line endings:
$output =~ s{\r\n}{\n}g; # DOS to Unix
$output =~ s{\r}{\n}g; # Mac to Unix
return $output;
}
sub ProcessRaw {
my $text = _PrepareInput(shift);
%opt = (
empty_element_suffix => $g_empty_element_suffix,
);
my %args = ();
if (ref($_[0]) eq "HASH") {
%args = %{$_[0]};
} else {
%args = @_;
}
while (my ($k,$v) = each %args) {
$opt{$k} = $v;
}
# Sanitize all '<'...'>' tags if requested
$text = _SanitizeTags($text) if $opt{sanitize};
utf8::encode($text);
return $text;
}
sub Markdown { sub Markdown {
# #
# Primary function. The order in which other subs are called here is # Primary function. The order in which other subs are called here is
@ -482,20 +531,7 @@ sub Markdown {
# _EscapeSpecialChars(), so that any *'s or _'s in the <a> # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
# and <img> tags get encoded. # and <img> tags get encoded.
# #
my $_text = shift; my $text = _PrepareInput(shift);
defined $_text or $_text='';
{
use bytes;
$_text =~ s/[\x00-\x08\x0B\x0E-\x1F\x7F]+//gso;
}
my $text;
if (Encode::is_utf8($_text) || utf8::decode($_text)) {
$text = $_text;
} else {
$text = $encoder->decode($_text, Encode::FB_DEFAULT);
}
$_text = undef;
# Any remaining arguments after the first are options; either a single # Any remaining arguments after the first are options; either a single
# hashref or a list of name, value paurs. # hashref or a list of name, value paurs.
@ -531,10 +567,6 @@ sub Markdown {
%g_code_blocks = (); %g_code_blocks = ();
$g_list_level = 0; $g_list_level = 0;
# Standardize line endings:
$text =~ s{\r\n}{\n}g; # DOS to Unix
$text =~ s{\r}{\n}g; # Mac to Unix
# Make sure $text ends with a couple of newlines: # Make sure $text ends with a couple of newlines:
$text .= "\n\n"; $text .= "\n\n";
@ -2897,6 +2929,7 @@ B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
-V | --version show version, authors, license -V | --version show version, authors, license
and copyright and copyright
-s | --shortversion show just the version number -s | --shortversion show just the version number
--raw input contains only raw html
--stylesheet output the fancy style sheet --stylesheet output the fancy style sheet
--no-stylesheet do not output fancy style sheet --no-stylesheet do not output fancy style sheet
--stub wrap output in stub document --stub wrap output in stub document
@ -3127,6 +3160,24 @@ Display Markdown's version number and copyright information.
Display the short-form version number. Display the short-form version number.
=item B<--raw>
Input contains only raw HTML/XHTML. All options other than
B<--html4tags>, B<--deprecated>, B<--sanitize> (on by default) and
B<--validate-xml> (and their B<--no-...> variants) are ignored.
With this option, arbitrary HTML/XHTML input can be passed through
the sanitizer and/or validator. If sanitation is requested (the
default), input must only contain the contents of the "<body>"
section (i.e. no "<head>" or "<html>"). Output I<will> be converted
to UTF-8 regardless of the input encoding. All line endings will
be normalized to C<\n> and input encodings other than UTF-8 or
ISO-8859-1 or US-ASCII will end up mangled.
Remember that any B<--stub> and/or B<--stylesheet> options are
I<completely ignored> when B<--raw> is given.
=item B<--stylesheet> =item B<--stylesheet>
Include the fancy style sheet at the beginning of the output (or in the Include the fancy style sheet at the beginning of the output (or in the

Loading…
Cancel
Save