From f46d48232ae2021d092a84155b266df056b394fa Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Mon, 9 Jan 2017 00:48:17 -0800 Subject: [PATCH] Markdown.pl: refactor initialization and command line Move one-time initialization into BEGIN blocks. Avoid running qr(...) more than once on expressions that do not change (actually Perl should mostly already do this). Get rid of the kludgy check for command-line and move all that code into a new _main function and call it only when being run from the comamnd line. This seems to have resulted in a very very very tiny speed boost as well. Signed-off-by: Kyle J. McKay --- Markdown.pl | 235 ++++++++++++++++++++++++++++------------------------ 1 file changed, 125 insertions(+), 110 deletions(-) diff --git a/Markdown.pl b/Markdown.pl index 9d8ee9a..f912da1 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -26,8 +26,6 @@ All rights reserved. *VERSION = \"1.0.4+" # Sun 05 Jun 2016+ } -close(DATA) if fileno(DATA); - require Exporter; use Digest::MD5 qw(md5); use File::Basename qw(basename); @@ -37,28 +35,57 @@ use Pod::Usage; @EXPORT_OK = qw(Markdown); $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'}; +close(DATA) if fileno(DATA); +exit(&_main(@ARGV)||0) unless caller; ## Disabled; causes problems under Perl 5.6.1: # use utf8; # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html -sub block_id; # # Global default settings: # -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; # Legacy even though it's wrong +my ($g_empty_element_suffix, $g_tab_width); +BEGIN { + $g_empty_element_suffix = " />"; # Change to ">" for HTML output + $g_tab_width = 4; # Legacy even though it's wrong +} # # Globals: # +# Permanent block id table +my %g_perm_block_ids; + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_block_ids; +my %g_html_blocks; +my %opt; + +# Return a "block id" to use to identify the block that does not contain +# any characters that could be misinterpreted by the rest of the code +# Originally this used md5_hex but that's unnecessarily slow +# Instead just use the refaddr of the scalar ref of the entry for that +# key in either the global or, if the optional second argument is true, +# permanent table. To avoid the result being confused with anything +# else, it's prefixed with a control character and suffixed with another +# both of which are not allowed by the XML standard or Unicode. +sub block_id { + $_[1] ? + "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" : + "\5".refaddr(\$g_block_ids{$_[0]})."\6"; +} + # Regex to match balanced [brackets]. See Friedl's # "Mastering Regular Expressions", 2nd Ed., pp. 328-331. my $g_nested_brackets; -$g_nested_brackets = qr{ +BEGIN { + $g_nested_brackets = qr{ (?> # Atomic matching [^\[\]]+ # Anything other than brackets | @@ -66,37 +93,41 @@ $g_nested_brackets = qr{ (??{ $g_nested_brackets }) # Recursive set of nested brackets \] )* -}x; + }ox +} # Table of hash values for escaped characters: my %g_escape_table; -foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") { - $g_escape_table{$char} = block_id($char,1); +BEGIN { + foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") { + $g_escape_table{$char} = block_id($char,1); + } } -# Permanent block id table -my %g_perm_block_ids; - -# Global hashes, used by various utility routines -my %g_urls; -my %g_titles; -my %g_block_ids; -my %g_html_blocks; -my %opt; - # Used to track when we're inside an ordered or unordered list # (see _ProcessListItems() for details): -my $g_list_level = 0; +my $g_list_level; +BEGIN { + $g_list_level = 0; +} #### Blosxom plug-in interface ########################################## +my $_haveBX; +BEGIN { + no warnings 'once'; + $_haveBX = defined($blosxom::version); +} # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine # which posts Markdown should process, using a "meta-markup: markdown" # header. If it's set to 0 (the default), Markdown will process all # entries. -my $g_blosxom_use_meta = 0; +my $g_blosxom_use_meta; +BEGIN { + $g_blosxom_use_meta = 0; +} sub start { 1; } sub story { @@ -112,15 +143,16 @@ sub story { #### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { +my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT +my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0. + +unless ($_haveMT) { require MT; import MT; require MT::Template::Context; import MT::Template::Context; - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { + unless ($_haveMT3) { require MT::Plugin; import MT::Plugin; my $plugin = new MT::Plugin({ @@ -203,88 +235,71 @@ unless ($@) { }); } } -elsif (!caller) { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %options = (); - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure(qw(bundling require_order pass_through)); - GetOptions(\%cli_opts, - 'help','h', - 'version|V', - 'shortversion|short-version|s', - 'html4tags', - 'htmlroot|r=s', - 'imageroot|i=s', - ); - if ($cli_opts{'help'}) { - pod2usage(-verbose => 2, -exitval => 0); - } - if ($cli_opts{'h'}) { - pod2usage(-verbose => 0, -exitval => 0); - } - if ($cli_opts{'version'}) { # Version info - print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT; - print "License is Modified BSD (aka 3-clause BSD) License\n"; - print "\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $options{empty_element_suffix} = ">"; - } - if ($cli_opts{'htmlroot'}) { # Use URL prefix - $options{url_prefix} = $cli_opts{'htmlroot'}; - } - if ($cli_opts{'imageroot'}) { # Use image URL prefix - $options{img_prefix} = $cli_opts{'imageroot'}; - } +#### BBEdit/command-line text filter interface ########################## +sub _main { + local *ARGV = \@_; + + + #### Check for command-line switches: ################# + my %options = (); + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure(qw(bundling require_order pass_through)); + GetOptions(\%cli_opts, + 'help','h', + 'version|V', + 'shortversion|short-version|s', + 'html4tags', + 'htmlroot|r=s', + 'imageroot|i=s', + ); + if ($cli_opts{'help'}) { + pod2usage(-verbose => 2, -exitval => 0); + } + if ($cli_opts{'h'}) { + pod2usage(-verbose => 0, -exitval => 0); + } + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT; + print "License is Modified BSD (aka 3-clause BSD) License\n"; + print "\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $options{empty_element_suffix} = ">"; + } + if ($cli_opts{'htmlroot'}) { # Use URL prefix + $options{url_prefix} = $cli_opts{'htmlroot'}; + } + if ($cli_opts{'imageroot'}) { # Use image URL prefix + $options{img_prefix} = $cli_opts{'imageroot'}; + } - #### Process incoming text: ########################### - for (;;) { - local $_; - { - local $/; # Slurp the whole file - $_ = <>; - } - defined($_) or last; - print Markdown($_, \%options); + #### Process incoming text: ########################### + for (;;) { + local $_; + { + local $/; # Slurp the whole file + $_ = <>; } - exit 0; + defined($_) or last; + print Markdown($_, \%options); } -} -# Return a "block id" to use to identify the block that does not contain -# any characters that could be misinterpreted by the rest of the code -# Originally this used md5_hex but that's unnecessarily slow -# Instead just use the refaddr of the scalar ref of the entry for that -# key in either the global or, if the optional second argument is true, -# permanent table. To avoid the result being confused with anything -# else, it's prefixed with a control character and suffixed with another -# both of which are not allowed by the XML standard or Unicode. -sub block_id { - $_[1] ? - "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" : - "\5".refaddr(\$g_block_ids{$_[0]})."\6"; + exit 0; } sub Markdown { # -# Main function. The order in which other subs are called here is +# Primary function. The order in which other subs are called here is # essential. Link and image substitutions need to happen before # _EscapeSpecialChars(), so that any *'s or _'s in the # and tags get encoded. @@ -426,6 +441,11 @@ sub _StripLinkDefinitions { return $text; } +my ($block_tags_a, $block_tags_b); +BEGIN { + $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o; + $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o; +} sub _HashHTMLBlocks { my $text = shift; @@ -437,8 +457,6 @@ sub _HashHTMLBlocks { # "paragraphs" that are wrapped in non-block-level tags, such as anchors, # phrase emphasis, and spans. The list of tags we're looking for is # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; # First, look for nested blocks, e.g.: #
@@ -882,6 +900,14 @@ sub _DoHeaders { } +my ($marker_ul, $marker_ol, $marker_any); +BEGIN { + # Re-usable patterns to match list item bullets and number markers: + $marker_ul = qr/[*+-]/o; + $marker_ol = qr/\d+[.]/o; + $marker_any = qr/(?:$marker_ul|$marker_ol)/o; +} + sub _DoLists { # # Form HTML ordered (numbered) and unordered (bulleted) lists. @@ -889,11 +915,6 @@ sub _DoLists { my $text = shift; my $less_than_tab = $opt{tab_width} - 1; - # Re-usable patterns to match list item bullets and number markers: - my $marker_ul = qr/[*+-]/; - my $marker_ol = qr/\d+[.]/; - my $marker_any = qr/(?:$marker_ul|$marker_ol)/; - # Re-usable pattern to match any entirel ul or ol list: my $whole_list = qr{ ( # $1 = whole list @@ -1132,13 +1153,7 @@ sub _EncodeCode { # Encode $'s, but only if we're running under Blosxom. # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/$/g; - } - } - + s/\$/$/g if $_haveBX; # Do the angle bracket song and dance: s! < !<!gx; @@ -1396,7 +1411,7 @@ sub _TokenizeHTML { my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); my $match = qr/(?s: ) | # comment (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags + $nested_tags/iox; # nested tags while ($str =~ m/($match)/g) { my $whole_tag = $1;