diff --git a/Markdown.pl b/Markdown.pl index 9d8ee9a..f912da1 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -26,8 +26,6 @@ All rights reserved. *VERSION = \"1.0.4+" # Sun 05 Jun 2016+ } -close(DATA) if fileno(DATA); - require Exporter; use Digest::MD5 qw(md5); use File::Basename qw(basename); @@ -37,28 +35,57 @@ use Pod::Usage; @EXPORT_OK = qw(Markdown); $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'}; +close(DATA) if fileno(DATA); +exit(&_main(@ARGV)||0) unless caller; ## Disabled; causes problems under Perl 5.6.1: # use utf8; # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html -sub block_id; # # Global default settings: # -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; # Legacy even though it's wrong +my ($g_empty_element_suffix, $g_tab_width); +BEGIN { + $g_empty_element_suffix = " />"; # Change to ">" for HTML output + $g_tab_width = 4; # Legacy even though it's wrong +} # # Globals: # +# Permanent block id table +my %g_perm_block_ids; + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_block_ids; +my %g_html_blocks; +my %opt; + +# Return a "block id" to use to identify the block that does not contain +# any characters that could be misinterpreted by the rest of the code +# Originally this used md5_hex but that's unnecessarily slow +# Instead just use the refaddr of the scalar ref of the entry for that +# key in either the global or, if the optional second argument is true, +# permanent table. To avoid the result being confused with anything +# else, it's prefixed with a control character and suffixed with another +# both of which are not allowed by the XML standard or Unicode. +sub block_id { + $_[1] ? + "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" : + "\5".refaddr(\$g_block_ids{$_[0]})."\6"; +} + # Regex to match balanced [brackets]. See Friedl's # "Mastering Regular Expressions", 2nd Ed., pp. 328-331. my $g_nested_brackets; -$g_nested_brackets = qr{ +BEGIN { + $g_nested_brackets = qr{ (?> # Atomic matching [^\[\]]+ # Anything other than brackets | @@ -66,37 +93,41 @@ $g_nested_brackets = qr{ (??{ $g_nested_brackets }) # Recursive set of nested brackets \] )* -}x; + }ox +} # Table of hash values for escaped characters: my %g_escape_table; -foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") { - $g_escape_table{$char} = block_id($char,1); +BEGIN { + foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") { + $g_escape_table{$char} = block_id($char,1); + } } -# Permanent block id table -my %g_perm_block_ids; - -# Global hashes, used by various utility routines -my %g_urls; -my %g_titles; -my %g_block_ids; -my %g_html_blocks; -my %opt; - # Used to track when we're inside an ordered or unordered list # (see _ProcessListItems() for details): -my $g_list_level = 0; +my $g_list_level; +BEGIN { + $g_list_level = 0; +} #### Blosxom plug-in interface ########################################## +my $_haveBX; +BEGIN { + no warnings 'once'; + $_haveBX = defined($blosxom::version); +} # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine # which posts Markdown should process, using a "meta-markup: markdown" # header. If it's set to 0 (the default), Markdown will process all # entries. -my $g_blosxom_use_meta = 0; +my $g_blosxom_use_meta; +BEGIN { + $g_blosxom_use_meta = 0; +} sub start { 1; } sub story { @@ -112,15 +143,16 @@ sub story { #### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { +my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT +my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0. + +unless ($_haveMT) { require MT; import MT; require MT::Template::Context; import MT::Template::Context; - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { + unless ($_haveMT3) { require MT::Plugin; import MT::Plugin; my $plugin = new MT::Plugin({ @@ -203,88 +235,71 @@ unless ($@) { }); } } -elsif (!caller) { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %options = (); - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure(qw(bundling require_order pass_through)); - GetOptions(\%cli_opts, - 'help','h', - 'version|V', - 'shortversion|short-version|s', - 'html4tags', - 'htmlroot|r=s', - 'imageroot|i=s', - ); - if ($cli_opts{'help'}) { - pod2usage(-verbose => 2, -exitval => 0); - } - if ($cli_opts{'h'}) { - pod2usage(-verbose => 0, -exitval => 0); - } - if ($cli_opts{'version'}) { # Version info - print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT; - print "License is Modified BSD (aka 3-clause BSD) License\n"; - print "\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $options{empty_element_suffix} = ">"; - } - if ($cli_opts{'htmlroot'}) { # Use URL prefix - $options{url_prefix} = $cli_opts{'htmlroot'}; - } - if ($cli_opts{'imageroot'}) { # Use image URL prefix - $options{img_prefix} = $cli_opts{'imageroot'}; - } +#### BBEdit/command-line text filter interface ########################## +sub _main { + local *ARGV = \@_; + + + #### Check for command-line switches: ################# + my %options = (); + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure(qw(bundling require_order pass_through)); + GetOptions(\%cli_opts, + 'help','h', + 'version|V', + 'shortversion|short-version|s', + 'html4tags', + 'htmlroot|r=s', + 'imageroot|i=s', + ); + if ($cli_opts{'help'}) { + pod2usage(-verbose => 2, -exitval => 0); + } + if ($cli_opts{'h'}) { + pod2usage(-verbose => 0, -exitval => 0); + } + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT; + print "License is Modified BSD (aka 3-clause BSD) License\n"; + print "\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $options{empty_element_suffix} = ">"; + } + if ($cli_opts{'htmlroot'}) { # Use URL prefix + $options{url_prefix} = $cli_opts{'htmlroot'}; + } + if ($cli_opts{'imageroot'}) { # Use image URL prefix + $options{img_prefix} = $cli_opts{'imageroot'}; + } - #### Process incoming text: ########################### - for (;;) { - local $_; - { - local $/; # Slurp the whole file - $_ = <>; - } - defined($_) or last; - print Markdown($_, \%options); + #### Process incoming text: ########################### + for (;;) { + local $_; + { + local $/; # Slurp the whole file + $_ = <>; } - exit 0; + defined($_) or last; + print Markdown($_, \%options); } -} -# Return a "block id" to use to identify the block that does not contain -# any characters that could be misinterpreted by the rest of the code -# Originally this used md5_hex but that's unnecessarily slow -# Instead just use the refaddr of the scalar ref of the entry for that -# key in either the global or, if the optional second argument is true, -# permanent table. To avoid the result being confused with anything -# else, it's prefixed with a control character and suffixed with another -# both of which are not allowed by the XML standard or Unicode. -sub block_id { - $_[1] ? - "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" : - "\5".refaddr(\$g_block_ids{$_[0]})."\6"; + exit 0; } sub Markdown { # -# Main function. The order in which other subs are called here is +# Primary function. The order in which other subs are called here is # essential. Link and image substitutions need to happen before # _EscapeSpecialChars(), so that any *'s or _'s in the # and tags get encoded. @@ -426,6 +441,11 @@ sub _StripLinkDefinitions { return $text; } +my ($block_tags_a, $block_tags_b); +BEGIN { + $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o; + $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o; +} sub _HashHTMLBlocks { my $text = shift; @@ -437,8 +457,6 @@ sub _HashHTMLBlocks { # "paragraphs" that are wrapped in non-block-level tags, such as anchors, # phrase emphasis, and spans. The list of tags we're looking for is # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; # First, look for nested blocks, e.g.: #
@@ -882,6 +900,14 @@ sub _DoHeaders { } +my ($marker_ul, $marker_ol, $marker_any); +BEGIN { + # Re-usable patterns to match list item bullets and number markers: + $marker_ul = qr/[*+-]/o; + $marker_ol = qr/\d+[.]/o; + $marker_any = qr/(?:$marker_ul|$marker_ol)/o; +} + sub _DoLists { # # Form HTML ordered (numbered) and unordered (bulleted) lists. @@ -889,11 +915,6 @@ sub _DoLists { my $text = shift; my $less_than_tab = $opt{tab_width} - 1; - # Re-usable patterns to match list item bullets and number markers: - my $marker_ul = qr/[*+-]/; - my $marker_ol = qr/\d+[.]/; - my $marker_any = qr/(?:$marker_ul|$marker_ol)/; - # Re-usable pattern to match any entirel ul or ol list: my $whole_list = qr{ ( # $1 = whole list @@ -1132,13 +1153,7 @@ sub _EncodeCode { # Encode $'s, but only if we're running under Blosxom. # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/$/g; - } - } - + s/\$/$/g if $_haveBX; # Do the angle bracket song and dance: s! < !<!gx; @@ -1396,7 +1411,7 @@ sub _TokenizeHTML { my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); my $match = qr/(?s: ) | # comment (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags + $nested_tags/iox; # nested tags while ($str =~ m/($match)/g) { my $whole_tag = $1;